Skip to content

Commit e05eb87

Browse files
committed
Add support of the ENUM8|ENUM16 for ClickHouse dialect
For the documentation, please refer to: https://clickhouse.com/docs/en/sql-reference/data-types/enum
1 parent c761f0b commit e05eb87

File tree

8 files changed

+188
-46
lines changed

8 files changed

+188
-46
lines changed

src/ast/data_type.rs

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,21 @@ use serde::{Deserialize, Serialize};
2525
#[cfg(feature = "visitor")]
2626
use sqlparser_derive::{Visit, VisitMut};
2727

28-
use crate::ast::{display_comma_separated, ObjectName, StructField, UnionField};
28+
use crate::ast::{display_comma_separated, ObjectName, StructField, UnionField, Value};
2929

3030
use super::{value::escape_single_quote_string, ColumnDef};
3131

32+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
33+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
34+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
35+
pub enum EnumValue {
36+
String(String),
37+
/// ClickHouse allows to specify an integer value for each enum value.
38+
///
39+
/// [clickhouse](https://clickhouse.com/docs/en/sql-reference/data-types/enum)
40+
Pair(String, Value),
41+
}
42+
3243
/// SQL data types
3344
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
3445
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
@@ -334,7 +345,7 @@ pub enum DataType {
334345
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nested-data-structures/nested
335346
Nested(Vec<ColumnDef>),
336347
/// Enums
337-
Enum(Vec<String>),
348+
Enum(Vec<EnumValue>, Option<i64>),
338349
/// Set
339350
Set(Vec<String>),
340351
/// Struct
@@ -546,13 +557,22 @@ impl fmt::Display for DataType {
546557
write!(f, "{}({})", ty, modifiers.join(", "))
547558
}
548559
}
549-
DataType::Enum(vals) => {
550-
write!(f, "ENUM(")?;
560+
DataType::Enum(vals, bits) => {
561+
match bits {
562+
Some(bits) => write!(f, "ENUM{}", bits),
563+
None => write!(f, "ENUM"),
564+
}?;
565+
write!(f, "(")?;
551566
for (i, v) in vals.iter().enumerate() {
552567
if i != 0 {
553568
write!(f, ", ")?;
554569
}
555-
write!(f, "'{}'", escape_single_quote_string(v))?;
570+
match v {
571+
EnumValue::String(v) => write!(f, "'{}'", escape_single_quote_string(v))?,
572+
EnumValue::Pair(v, i) => {
573+
write!(f, "'{}' = {}", escape_single_quote_string(v), i)?
574+
}
575+
}
556576
}
557577
write!(f, ")")
558578
}

src/ast/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ use sqlparser_derive::{Visit, VisitMut};
4040
use crate::tokenizer::Span;
4141

4242
pub use self::data_type::{
43-
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo,
43+
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, EnumValue, ExactNumberInfo,
4444
StructBracketKind, TimezoneInfo,
4545
};
4646
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue, Use};

src/dialect/clickhouse.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,10 @@ impl Dialect for ClickHouseDialect {
5050
fn supports_limit_comma(&self) -> bool {
5151
true
5252
}
53+
54+
/// ClickHouse supports `Enum8` and `Enum16` types.
55+
/// See [ClickHouse](https://clickhouse.com/docs/en/sql-reference/data-types/enum)
56+
fn supports_enum_type_with_bits(&self) -> bool {
57+
true
58+
}
5359
}

src/dialect/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -707,6 +707,11 @@ pub trait Dialect: Debug + Any {
707707
fn is_reserved_for_identifier(&self, kw: Keyword) -> bool {
708708
keywords::RESERVED_FOR_IDENTIFIER.contains(&kw)
709709
}
710+
711+
/// Return true if the dialect supports the Enum type with bits like Enum8, Enum16
712+
fn supports_enum_type_with_bits(&self) -> bool {
713+
false
714+
}
710715
}
711716

712717
/// This represents the operators for which precedence must be defined

src/keywords.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,8 @@ define_keywords!(
286286
ENFORCED,
287287
ENGINE,
288288
ENUM,
289+
ENUM16,
290+
ENUM8,
289291
EPHEMERAL,
290292
EPOCH,
291293
EQUALS,

src/parser/mod.rs

Lines changed: 59 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,18 +1049,18 @@ impl<'a> Parser<'a> {
10491049
| Keyword::CURRENT_USER
10501050
| Keyword::SESSION_USER
10511051
| Keyword::USER
1052-
if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
1053-
{
1054-
Ok(Some(Expr::Function(Function {
1055-
name: ObjectName(vec![w.to_ident(w_span)]),
1056-
parameters: FunctionArguments::None,
1057-
args: FunctionArguments::None,
1058-
null_treatment: None,
1059-
filter: None,
1060-
over: None,
1061-
within_group: vec![],
1062-
})))
1063-
}
1052+
if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
1053+
{
1054+
Ok(Some(Expr::Function(Function {
1055+
name: ObjectName(vec![w.to_ident(w_span)]),
1056+
parameters: FunctionArguments::None,
1057+
args: FunctionArguments::None,
1058+
null_treatment: None,
1059+
filter: None,
1060+
over: None,
1061+
within_group: vec![],
1062+
})))
1063+
}
10641064
Keyword::CURRENT_TIMESTAMP
10651065
| Keyword::CURRENT_TIME
10661066
| Keyword::CURRENT_DATE
@@ -1075,18 +1075,18 @@ impl<'a> Parser<'a> {
10751075
Keyword::TRY_CAST => Ok(Some(self.parse_cast_expr(CastKind::TryCast)?)),
10761076
Keyword::SAFE_CAST => Ok(Some(self.parse_cast_expr(CastKind::SafeCast)?)),
10771077
Keyword::EXISTS
1078-
// Support parsing Databricks has a function named `exists`.
1079-
if !dialect_of!(self is DatabricksDialect)
1080-
|| matches!(
1078+
// Support parsing Databricks has a function named `exists`.
1079+
if !dialect_of!(self is DatabricksDialect)
1080+
|| matches!(
10811081
self.peek_nth_token(1).token,
10821082
Token::Word(Word {
10831083
keyword: Keyword::SELECT | Keyword::WITH,
10841084
..
10851085
})
10861086
) =>
1087-
{
1088-
Ok(Some(self.parse_exists_expr(false)?))
1089-
}
1087+
{
1088+
Ok(Some(self.parse_exists_expr(false)?))
1089+
}
10901090
Keyword::EXTRACT => Ok(Some(self.parse_extract_expr()?)),
10911091
Keyword::CEIL => Ok(Some(self.parse_ceil_floor_expr(true)?)),
10921092
Keyword::FLOOR => Ok(Some(self.parse_ceil_floor_expr(false)?)),
@@ -1103,22 +1103,22 @@ impl<'a> Parser<'a> {
11031103
Ok(Some(self.parse_array_expr(true)?))
11041104
}
11051105
Keyword::ARRAY
1106-
if self.peek_token() == Token::LParen
1107-
&& !dialect_of!(self is ClickHouseDialect | DatabricksDialect) =>
1108-
{
1109-
self.expect_token(&Token::LParen)?;
1110-
let query = self.parse_query()?;
1111-
self.expect_token(&Token::RParen)?;
1112-
Ok(Some(Expr::Function(Function {
1113-
name: ObjectName(vec![w.to_ident(w_span)]),
1114-
parameters: FunctionArguments::None,
1115-
args: FunctionArguments::Subquery(query),
1116-
filter: None,
1117-
null_treatment: None,
1118-
over: None,
1119-
within_group: vec![],
1120-
})))
1121-
}
1106+
if self.peek_token() == Token::LParen
1107+
&& !dialect_of!(self is ClickHouseDialect | DatabricksDialect) =>
1108+
{
1109+
self.expect_token(&Token::LParen)?;
1110+
let query = self.parse_query()?;
1111+
self.expect_token(&Token::RParen)?;
1112+
Ok(Some(Expr::Function(Function {
1113+
name: ObjectName(vec![w.to_ident(w_span)]),
1114+
parameters: FunctionArguments::None,
1115+
args: FunctionArguments::Subquery(query),
1116+
filter: None,
1117+
null_treatment: None,
1118+
over: None,
1119+
within_group: vec![],
1120+
})))
1121+
}
11221122
Keyword::NOT => Ok(Some(self.parse_not()?)),
11231123
Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => {
11241124
Ok(Some(self.parse_match_against()?))
@@ -5023,7 +5023,7 @@ impl<'a> Parser<'a> {
50235023
return Err(ParserError::ParserError(format!("Expected: CURRENT_USER, CURRENT_ROLE, SESSION_USER or identifier after OWNER TO. {e}")))
50245024
}
50255025
}
5026-
},
5026+
}
50275027
};
50285028
Ok(owner)
50295029
}
@@ -7997,6 +7997,27 @@ impl<'a> Parser<'a> {
79977997
}
79987998
}
79997999

8000+
pub fn parse_enum_values(&mut self) -> Result<Vec<EnumValue>, ParserError> {
8001+
self.expect_token(&Token::LParen)?;
8002+
let values = self.parse_comma_separated(Parser::parse_enum_value)?;
8003+
self.expect_token(&Token::RParen)?;
8004+
Ok(values)
8005+
}
8006+
8007+
pub fn parse_enum_value(&mut self) -> Result<EnumValue, ParserError> {
8008+
let str = self.parse_literal_string()?;
8009+
let value = match self.peek_token().token {
8010+
Token::Eq => {
8011+
// Consume the `=` token
8012+
self.next_token();
8013+
let value = self.parse_number_value()?;
8014+
EnumValue::Pair(str, value)
8015+
}
8016+
_ => EnumValue::String(str),
8017+
};
8018+
Ok(value)
8019+
}
8020+
80008021
/// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
80018022
pub fn parse_data_type(&mut self) -> Result<DataType, ParserError> {
80028023
let (ty, trailing_bracket) = self.parse_data_type_helper()?;
@@ -8235,7 +8256,9 @@ impl<'a> Parser<'a> {
82358256
Keyword::BIGDECIMAL => Ok(DataType::BigDecimal(
82368257
self.parse_exact_number_optional_precision_scale()?,
82378258
)),
8238-
Keyword::ENUM => Ok(DataType::Enum(self.parse_string_values()?)),
8259+
Keyword::ENUM => Ok(DataType::Enum(self.parse_enum_values()?, None)),
8260+
Keyword::ENUM8 => Ok(DataType::Enum(self.parse_enum_values()?, Some(8))),
8261+
Keyword::ENUM16 => Ok(DataType::Enum(self.parse_enum_values()?, Some(16))),
82398262
Keyword::SET => Ok(DataType::Set(self.parse_string_values()?)),
82408263
Keyword::ARRAY => {
82418264
if dialect_of!(self is SnowflakeDialect) {

tests/sqlparser_clickhouse.rs

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1621,6 +1621,86 @@ fn parse_explain_table() {
16211621
}
16221622
}
16231623

1624+
#[test]
1625+
fn parse_create_table_with_enum_types() {
1626+
let sql = "CREATE TABLE t0 (foo ENUM8('a' = 1, 'b' = 2), bar ENUM16('a' = 1, 'b' = 2), baz ENUM('a', 'b'))";
1627+
match clickhouse().verified_stmt(sql) {
1628+
Statement::CreateTable(CreateTable { name, columns, .. }) => {
1629+
assert_eq!(name.to_string(), "t0");
1630+
assert_eq!(
1631+
vec![
1632+
ColumnDef {
1633+
name: Ident::new("foo"),
1634+
data_type: DataType::Enum(
1635+
vec![
1636+
EnumValue::Pair(
1637+
"a".to_string(),
1638+
Number("1".parse().unwrap(), false)
1639+
),
1640+
EnumValue::Pair(
1641+
"b".to_string(),
1642+
Number("2".parse().unwrap(), false)
1643+
)
1644+
],
1645+
Some(8)
1646+
),
1647+
collation: None,
1648+
options: vec![],
1649+
},
1650+
ColumnDef {
1651+
name: Ident::new("bar"),
1652+
data_type: DataType::Enum(
1653+
vec![
1654+
EnumValue::Pair(
1655+
"a".to_string(),
1656+
Number("1".parse().unwrap(), false)
1657+
),
1658+
EnumValue::Pair(
1659+
"b".to_string(),
1660+
Number("2".parse().unwrap(), false)
1661+
)
1662+
],
1663+
Some(16)
1664+
),
1665+
collation: None,
1666+
options: vec![],
1667+
},
1668+
ColumnDef {
1669+
name: Ident::new("baz"),
1670+
data_type: DataType::Enum(
1671+
vec![
1672+
EnumValue::String("a".to_string()),
1673+
EnumValue::String("b".to_string())
1674+
],
1675+
None
1676+
),
1677+
collation: None,
1678+
options: vec![],
1679+
}
1680+
],
1681+
columns
1682+
);
1683+
}
1684+
_ => unreachable!(),
1685+
}
1686+
1687+
// invalid case missing value for enum pair
1688+
assert_eq!(
1689+
clickhouse_and_generic()
1690+
.parse_sql_statements("CREATE TABLE t0 (foo ENUM8('a' = 1, 'b' = ))")
1691+
.unwrap_err(),
1692+
ParserError("Expected: a value, found: )".to_string())
1693+
);
1694+
1695+
// invalid case that name is not a string
1696+
assert_eq!(
1697+
clickhouse_and_generic()
1698+
.parse_sql_statements("CREATE TABLE t0 (foo ENUM8('a' = 1, 2))")
1699+
.unwrap_err(),
1700+
ParserError("Expected: literal string, found: 2".to_string())
1701+
);
1702+
}
1703+
16241704
fn clickhouse() -> TestedDialects {
16251705
TestedDialects::new(vec![Box::new(ClickHouseDialect {})])
16261706
}

tests/sqlparser_mysql.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -685,7 +685,7 @@ fn table_constraint_unique_primary_ctor(
685685
#[test]
686686
fn parse_create_table_primary_and_unique_key() {
687687
let sqls = ["UNIQUE KEY", "PRIMARY KEY"]
688-
.map(|key_ty|format!("CREATE TABLE foo (id INT PRIMARY KEY AUTO_INCREMENT, bar INT NOT NULL, CONSTRAINT bar_key {key_ty} (bar))"));
688+
.map(|key_ty| format!("CREATE TABLE foo (id INT PRIMARY KEY AUTO_INCREMENT, bar INT NOT NULL, CONSTRAINT bar_key {key_ty} (bar))"));
689689

690690
let index_type_display = [Some(KeyOrIndexDisplay::Key), None];
691691

@@ -753,7 +753,7 @@ fn parse_create_table_primary_and_unique_key() {
753753
#[test]
754754
fn parse_create_table_primary_and_unique_key_with_index_options() {
755755
let sqls = ["UNIQUE INDEX", "PRIMARY KEY"]
756-
.map(|key_ty|format!("CREATE TABLE foo (bar INT, var INT, CONSTRAINT constr {key_ty} index_name (bar, var) USING HASH COMMENT 'yes, ' USING BTREE COMMENT 'MySQL allows')"));
756+
.map(|key_ty| format!("CREATE TABLE foo (bar INT, var INT, CONSTRAINT constr {key_ty} index_name (bar, var) USING HASH COMMENT 'yes, ' USING BTREE COMMENT 'MySQL allows')"));
757757

758758
let index_type_display = [Some(KeyOrIndexDisplay::Index), None];
759759

@@ -827,7 +827,7 @@ fn parse_create_table_primary_and_unique_key_with_index_type() {
827827
#[test]
828828
fn parse_create_table_primary_and_unique_key_characteristic_test() {
829829
let sqls = ["UNIQUE INDEX", "PRIMARY KEY"]
830-
.map(|key_ty|format!("CREATE TABLE x (y INT, CONSTRAINT constr {key_ty} (y) NOT DEFERRABLE INITIALLY IMMEDIATE)"));
830+
.map(|key_ty| format!("CREATE TABLE x (y INT, CONSTRAINT constr {key_ty} (y) NOT DEFERRABLE INITIALLY IMMEDIATE)"));
831831
for sql in &sqls {
832832
mysql_and_generic().verified_stmt(sql);
833833
}
@@ -890,7 +890,13 @@ fn parse_create_table_set_enum() {
890890
},
891891
ColumnDef {
892892
name: Ident::new("baz"),
893-
data_type: DataType::Enum(vec!["a".to_string(), "b".to_string()]),
893+
data_type: DataType::Enum(
894+
vec![
895+
EnumValue::String("a".to_string()),
896+
EnumValue::String("b".to_string())
897+
],
898+
None
899+
),
894900
collation: None,
895901
options: vec![],
896902
}

0 commit comments

Comments
 (0)