From 03282a14d6f30129433dbabd59057c88c2a6fdcb Mon Sep 17 00:00:00 2001 From: "aleksei.p" Date: Fri, 31 May 2024 11:49:30 +0200 Subject: [PATCH 1/2] ClickHouse data types --- src/ast/data_type.rs | 191 ++++++++++++++++++++++++++++++++-- src/ast/mod.rs | 2 +- src/keywords.rs | 18 ++++ src/parser/mod.rs | 155 ++++++++++++++++++++++++--- tests/sqlparser_clickhouse.rs | 190 +++++++++++++++++++++++++++++++++ 5 files changed, 532 insertions(+), 24 deletions(-) diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index d71900bff..7d0aec8fc 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -22,7 +22,7 @@ use sqlparser_derive::{Visit, VisitMut}; use crate::ast::{display_comma_separated, ObjectName, StructField}; -use super::value::escape_single_quote_string; +use super::{value::escape_single_quote_string, ColumnDef}; /// SQL data types #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -129,10 +129,39 @@ pub enum DataType { /// /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html Int4(Option), - /// Integer type in [bigquery] + /// Int8 as alias for Bigint in [postgresql] and integer type in [clickhouse] + /// Note: Int8 mean 8 bytes in [postgresql] (not 8 bits) + /// Int8 with optional display width e.g. INT8 or INT8(11) + /// Note: Int8 mean 8 bits in [clickhouse] + /// + /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + Int8(Option), + /// Integer type in [clickhouse] + /// Note: Int16 mean 16 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + Int16, + /// Integer type in [clickhouse] + /// Note: Int16 mean 32 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + Int32, + /// Integer type in [bigquery], [clickhouse] /// /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint Int64, + /// Integer type in [clickhouse] + /// Note: Int128 mean 128 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + Int128, + /// Integer type in [clickhouse] + /// Note: Int256 mean 256 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + Int256, /// Integer with optional display width e.g. INTEGER or INTEGER(11) Integer(Option), /// Unsigned int with optional display width e.g. INT UNSIGNED or INT(11) UNSIGNED @@ -141,25 +170,54 @@ pub enum DataType { UnsignedInt4(Option), /// Unsigned integer with optional display width e.g. INTGER UNSIGNED or INTEGER(11) UNSIGNED UnsignedInteger(Option), + /// Unsigned integer type in [clickhouse] + /// Note: UInt8 mean 8 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + UInt8, + /// Unsigned integer type in [clickhouse] + /// Note: UInt16 mean 16 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + UInt16, + /// Unsigned integer type in [clickhouse] + /// Note: UInt32 mean 32 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + UInt32, + /// Unsigned integer type in [clickhouse] + /// Note: UInt64 mean 64 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + UInt64, + /// Unsigned integer type in [clickhouse] + /// Note: UInt128 mean 128 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + UInt128, + /// Unsigned integer type in [clickhouse] + /// Note: UInt256 mean 256 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + UInt256, /// Big integer with optional display width e.g. BIGINT or BIGINT(20) BigInt(Option), /// Unsigned big integer with optional display width e.g. BIGINT UNSIGNED or BIGINT(20) UNSIGNED UnsignedBigInt(Option), - /// Int8 as alias for Bigint in [postgresql] - /// Note: Int8 mean 8 bytes in postgres (not 8 bits) - /// Int8 with optional display width e.g. INT8 or INT8(11) - /// - /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html - Int8(Option), /// Unsigned Int8 with optional display width e.g. INT8 UNSIGNED or INT8(11) UNSIGNED UnsignedInt8(Option), /// Float4 as alias for Real in [postgresql] /// /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html Float4, + /// Floating point in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/float + Float32, /// Floating point in [bigquery] /// /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/float Float64, /// Floating point e.g. REAL Real, @@ -182,6 +240,10 @@ pub enum DataType { Boolean, /// Date Date, + /// Date32 with the same range as Datetime64 + /// + /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/date32 + Date32, /// Time with optional time precision and time zone information e.g. [standard][1]. /// /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type @@ -190,6 +252,10 @@ pub enum DataType { /// /// [1]: https://dev.mysql.com/doc/refman/8.0/en/datetime.html Datetime(Option), + /// Datetime with time precision and optional timezone e.g. [ClickHouse][1]. + /// + /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/datetime64 + Datetime64(u64, Option), /// Timestamp with optional time precision and time zone information e.g. [standard][1]. /// /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type @@ -206,12 +272,28 @@ pub enum DataType { Text, /// String with optional length. String(Option), + /// A fixed-length string e.g [ClickHouse][1]. + /// + /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/fixedstring + FixedString(u64), /// Bytea Bytea, /// Custom type such as enums Custom(ObjectName, Vec), /// Arrays Array(ArrayElemTypeDef), + /// Map + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/map + Map(Box, Box), + /// Tuple + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple + Tuple(Vec), + /// Nested + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nested-data-structures/nested + Nested(Vec), /// Enums Enum(Vec), /// Set @@ -221,6 +303,14 @@ pub enum DataType { /// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type Struct(Vec), + /// Nullable - special marker NULL represents in ClickHouse as a data type. + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nullable + Nullable(Box), + /// LowCardinality - changes the internal representation of other data types to be dictionary-encoded. + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/lowcardinality + LowCardinality(Box), /// No type specified - only used with /// [`SQLiteDialect`](crate::dialect::SQLiteDialect), from statements such /// as `CREATE TABLE t1 (a)`. @@ -296,9 +386,24 @@ impl fmt::Display for DataType { DataType::Int4(zerofill) => { format_type_with_optional_length(f, "INT4", zerofill, false) } + DataType::Int8(zerofill) => { + format_type_with_optional_length(f, "INT8", zerofill, false) + } + DataType::Int16 => { + write!(f, "Int16") + } + DataType::Int32 => { + write!(f, "Int32") + } DataType::Int64 => { write!(f, "INT64") } + DataType::Int128 => { + write!(f, "Int128") + } + DataType::Int256 => { + write!(f, "Int256") + } DataType::UnsignedInt4(zerofill) => { format_type_with_optional_length(f, "INT4", zerofill, true) } @@ -314,14 +419,30 @@ impl fmt::Display for DataType { DataType::UnsignedBigInt(zerofill) => { format_type_with_optional_length(f, "BIGINT", zerofill, true) } - DataType::Int8(zerofill) => { - format_type_with_optional_length(f, "INT8", zerofill, false) - } DataType::UnsignedInt8(zerofill) => { format_type_with_optional_length(f, "INT8", zerofill, true) } + DataType::UInt8 => { + write!(f, "UInt8") + } + DataType::UInt16 => { + write!(f, "UInt16") + } + DataType::UInt32 => { + write!(f, "UInt32") + } + DataType::UInt64 => { + write!(f, "UInt64") + } + DataType::UInt128 => { + write!(f, "UInt128") + } + DataType::UInt256 => { + write!(f, "UInt256") + } DataType::Real => write!(f, "REAL"), DataType::Float4 => write!(f, "FLOAT4"), + DataType::Float32 => write!(f, "Float32"), DataType::Float64 => write!(f, "FLOAT64"), DataType::Double => write!(f, "DOUBLE"), DataType::Float8 => write!(f, "FLOAT8"), @@ -329,6 +450,7 @@ impl fmt::Display for DataType { DataType::Bool => write!(f, "BOOL"), DataType::Boolean => write!(f, "BOOLEAN"), DataType::Date => write!(f, "DATE"), + DataType::Date32 => write!(f, "Date32"), DataType::Time(precision, timezone_info) => { format_datetime_precision_and_tz(f, "TIME", precision, timezone_info) } @@ -338,6 +460,14 @@ impl fmt::Display for DataType { DataType::Timestamp(precision, timezone_info) => { format_datetime_precision_and_tz(f, "TIMESTAMP", precision, timezone_info) } + DataType::Datetime64(precision, timezone) => { + format_clickhouse_datetime_precision_and_timezone( + f, + "DateTime64", + precision, + timezone, + ) + } DataType::Interval => write!(f, "INTERVAL"), DataType::JSON => write!(f, "JSON"), DataType::JSONB => write!(f, "JSONB"), @@ -350,6 +480,7 @@ impl fmt::Display for DataType { ArrayElemTypeDef::SquareBracket(t, None) => write!(f, "{t}[]"), ArrayElemTypeDef::SquareBracket(t, Some(size)) => write!(f, "{t}[{size}]"), ArrayElemTypeDef::AngleBracket(t) => write!(f, "ARRAY<{t}>"), + ArrayElemTypeDef::Parenthesis(t) => write!(f, "Array({t})"), }, DataType::Custom(ty, modifiers) => { if modifiers.is_empty() { @@ -385,6 +516,25 @@ impl fmt::Display for DataType { write!(f, "STRUCT") } } + // ClickHouse + DataType::Nullable(data_type) => { + write!(f, "Nullable({})", data_type) + } + DataType::FixedString(character_length) => { + write!(f, "FixedString({})", character_length) + } + DataType::LowCardinality(data_type) => { + write!(f, "LowCardinality({})", data_type) + } + DataType::Map(key_data_type, value_data_type) => { + write!(f, "Map({}, {})", key_data_type, value_data_type) + } + DataType::Tuple(fields) => { + write!(f, "Tuple({})", display_comma_separated(fields)) + } + DataType::Nested(fields) => { + write!(f, "Nested({})", display_comma_separated(fields)) + } DataType::Unspecified => Ok(()), } } @@ -439,6 +589,23 @@ fn format_datetime_precision_and_tz( Ok(()) } +fn format_clickhouse_datetime_precision_and_timezone( + f: &mut fmt::Formatter, + sql_type: &'static str, + len: &u64, + time_zone: &Option, +) -> fmt::Result { + write!(f, "{sql_type}({len}")?; + + if let Some(time_zone) = time_zone { + write!(f, ", '{time_zone}'")?; + } + + write!(f, ")")?; + + Ok(()) +} + /// Timestamp and Time data types information about TimeZone formatting. /// /// This is more related to a display information than real differences between each variant. To @@ -593,4 +760,6 @@ pub enum ArrayElemTypeDef { AngleBracket(Box), /// `INT[]` or `INT[2]` SquareBracket(Box, Option), + /// `Array(Int64)` + Parenthesis(Box), } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index ee39294a3..6dada1013 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -273,7 +273,7 @@ impl fmt::Display for Interval { } } -/// A field definition within a struct. +/// A field definition within a struct /// /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] diff --git a/src/keywords.rs b/src/keywords.rs index 06086297c..292b994aa 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -201,7 +201,9 @@ define_keywords!( DATA, DATABASE, DATE, + DATE32, DATETIME, + DATETIME64, DAY, DAYOFWEEK, DAYOFYEAR, @@ -291,7 +293,9 @@ define_keywords!( FILTER, FIRST, FIRST_VALUE, + FIXEDSTRING, FLOAT, + FLOAT32, FLOAT4, FLOAT64, FLOAT8, @@ -361,7 +365,11 @@ define_keywords!( INSERT, INSTALL, INT, + INT128, + INT16, INT2, + INT256, + INT32, INT4, INT64, INT8, @@ -410,6 +418,7 @@ define_keywords!( LOCKED, LOGIN, LOGS, + LOWCARDINALITY, LOWER, LOW_PRIORITY, MACRO, @@ -453,6 +462,7 @@ define_keywords!( NATURAL, NCHAR, NCLOB, + NESTED, NEW, NEXT, NO, @@ -473,6 +483,7 @@ define_keywords!( NTH_VALUE, NTILE, NULL, + NULLABLE, NULLIF, NULLS, NUMERIC, @@ -711,8 +722,15 @@ define_keywords!( TRUE, TRUNCATE, TRY_CAST, + TUPLE, TYPE, UESCAPE, + UINT128, + UINT16, + UINT256, + UINT32, + UINT64, + UINT8, UNBOUNDED, UNCACHE, UNCOMMITTED, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index fef307106..14b8d78ab 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2099,7 +2099,7 @@ impl<'a> Parser<'a> { /// ``` fn parse_bigquery_struct_literal(&mut self) -> Result { let (fields, trailing_bracket) = - self.parse_struct_type_def(Self::parse_big_query_struct_field_def)?; + self.parse_struct_type_def(Self::parse_struct_field_def)?; if trailing_bracket.0 { return parser_err!("unmatched > in STRUCT literal", self.peek_token().location); } @@ -2194,13 +2194,16 @@ impl<'a> Parser<'a> { )) } - /// Parse a field definition in a BigQuery struct. + /// Parse a field definition in a struct [1] or tuple [2]. /// Syntax: /// /// ```sql /// [field_name] field_type /// ``` - fn parse_big_query_struct_field_def( + /// + /// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declaring_a_struct_type + /// [2]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple + fn parse_struct_field_def( &mut self, ) -> Result<(StructField, MatchedTrailingBracket), ParserError> { // Look beyond the next item to infer whether both field name @@ -2266,6 +2269,47 @@ impl<'a> Parser<'a> { }) } + /// Parse clickhouse map [1] + /// Syntax + /// ```sql + /// Map(key_data_type, value_data_type) + /// ``` + /// + /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/map + fn parse_click_house_map(&mut self) -> Result<(DataType, DataType), ParserError> { + self.expect_keyword(Keyword::MAP)?; + self.expect_token(&Token::LParen)?; + let key_data_type = self.parse_data_type()?; + self.expect_token(&Token::Comma)?; + let value_data_type = self.parse_data_type()?; + self.expect_token(&Token::RParen)?; + + Ok((key_data_type, value_data_type)) + } + + /// Parse clickhouse tuple [1] + /// Syntax + /// ```sql + /// Tuple([field_name] field_type, ...) + /// ``` + /// + /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple + fn parse_click_house_tuple(&mut self) -> Result, ParserError> { + self.expect_keyword(Keyword::TUPLE)?; + self.expect_token(&Token::LParen)?; + let mut field_defs = vec![]; + loop { + let (def, _) = self.parse_struct_field_def()?; + field_defs.push(def); + if !self.consume_token(&Token::Comma) { + break; + } + } + self.expect_token(&Token::RParen)?; + + Ok(field_defs) + } + /// For nested types that use the angle bracket syntax, this matches either /// `>`, `>>` or nothing depending on which variant is expected (specified by the previously /// matched `trailing_bracket` argument). It returns whether there is a trailing @@ -6737,6 +6781,7 @@ impl<'a> Parser<'a> { Keyword::FLOAT => Ok(DataType::Float(self.parse_optional_precision()?)), Keyword::REAL => Ok(DataType::Real), Keyword::FLOAT4 => Ok(DataType::Float4), + Keyword::FLOAT32 => Ok(DataType::Float32), Keyword::FLOAT64 => Ok(DataType::Float64), Keyword::FLOAT8 => Ok(DataType::Float8), Keyword::DOUBLE => { @@ -6794,7 +6839,23 @@ impl<'a> Parser<'a> { Ok(DataType::Int4(optional_precision?)) } } + Keyword::INT8 => { + if dialect_of!(self is ClickHouseDialect) { + Ok(DataType::Int8(None)) + } else { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedInt8(optional_precision?)) + } else { + Ok(DataType::Int8(optional_precision?)) + } + } + } + Keyword::INT16 => Ok(DataType::Int16), + Keyword::INT32 => Ok(DataType::Int32), Keyword::INT64 => Ok(DataType::Int64), + Keyword::INT128 => Ok(DataType::Int128), + Keyword::INT256 => Ok(DataType::Int256), Keyword::INTEGER => { let optional_precision = self.parse_optional_precision(); if self.parse_keyword(Keyword::UNSIGNED) { @@ -6811,14 +6872,12 @@ impl<'a> Parser<'a> { Ok(DataType::BigInt(optional_precision?)) } } - Keyword::INT8 => { - let optional_precision = self.parse_optional_precision(); - if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedInt8(optional_precision?)) - } else { - Ok(DataType::Int8(optional_precision?)) - } - } + Keyword::UINT8 => Ok(DataType::UInt8), + Keyword::UINT16 => Ok(DataType::UInt16), + Keyword::UINT32 => Ok(DataType::UInt32), + Keyword::UINT64 => Ok(DataType::UInt64), + Keyword::UINT128 => Ok(DataType::UInt128), + Keyword::UINT256 => Ok(DataType::UInt256), Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_character_length()?)), Keyword::NVARCHAR => { Ok(DataType::Nvarchar(self.parse_optional_character_length()?)) @@ -6854,7 +6913,13 @@ impl<'a> Parser<'a> { Keyword::BYTES => Ok(DataType::Bytes(self.parse_optional_precision()?)), Keyword::UUID => Ok(DataType::Uuid), Keyword::DATE => Ok(DataType::Date), + Keyword::DATE32 => Ok(DataType::Date32), Keyword::DATETIME => Ok(DataType::Datetime(self.parse_optional_precision()?)), + Keyword::DATETIME64 => { + self.prev_token(); + let (precision, time_zone) = self.parse_datetime_64()?; + Ok(DataType::Datetime64(precision, time_zone)) + } Keyword::TIMESTAMP => { let precision = self.parse_optional_precision()?; let tz = if self.parse_keyword(Keyword::WITH) { @@ -6897,6 +6962,12 @@ impl<'a> Parser<'a> { Keyword::JSONB => Ok(DataType::JSONB), Keyword::REGCLASS => Ok(DataType::Regclass), Keyword::STRING => Ok(DataType::String(self.parse_optional_precision()?)), + Keyword::FIXEDSTRING => { + self.expect_token(&Token::LParen)?; + let character_length = self.parse_literal_uint()?; + self.expect_token(&Token::RParen)?; + Ok(DataType::FixedString(character_length)) + } Keyword::TEXT => Ok(DataType::Text), Keyword::BYTEA => Ok(DataType::Bytea), Keyword::NUMERIC => Ok(DataType::Numeric( @@ -6919,6 +6990,10 @@ impl<'a> Parser<'a> { Keyword::ARRAY => { if dialect_of!(self is SnowflakeDialect) { Ok(DataType::Array(ArrayElemTypeDef::None)) + } else if dialect_of!(self is ClickHouseDialect) { + Ok(self.parse_sub_type(|internal_type| { + DataType::Array(ArrayElemTypeDef::Parenthesis(internal_type)) + })?) } else { self.expect_token(&Token::Lt)?; let (inside_type, _trailing_bracket) = self.parse_data_type_helper()?; @@ -6931,10 +7006,35 @@ impl<'a> Parser<'a> { Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => { self.prev_token(); let (field_defs, _trailing_bracket) = - self.parse_struct_type_def(Self::parse_big_query_struct_field_def)?; + self.parse_struct_type_def(Self::parse_struct_field_def)?; trailing_bracket = _trailing_bracket; Ok(DataType::Struct(field_defs)) } + Keyword::NULLABLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + Ok(self.parse_sub_type(DataType::Nullable)?) + } + Keyword::LOWCARDINALITY if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + Ok(self.parse_sub_type(DataType::LowCardinality)?) + } + Keyword::MAP if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + self.prev_token(); + let (key_data_type, value_data_type) = self.parse_click_house_map()?; + Ok(DataType::Map( + Box::new(key_data_type), + Box::new(value_data_type), + )) + } + Keyword::NESTED if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + self.expect_token(&Token::LParen)?; + let field_defs = self.parse_comma_separated(Parser::parse_column_def)?; + self.expect_token(&Token::RParen)?; + Ok(DataType::Nested(field_defs)) + } + Keyword::TUPLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + self.prev_token(); + let field_defs = self.parse_click_house_tuple()?; + Ok(DataType::Tuple(field_defs)) + } _ => { self.prev_token(); let type_name = self.parse_object_name(false)?; @@ -7333,6 +7433,26 @@ impl<'a> Parser<'a> { } } + /// Parse datetime64 [1] + /// Syntax + /// ```sql + /// DateTime64(precision[, timezone]) + /// ``` + /// + /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/datetime64 + pub fn parse_datetime_64(&mut self) -> Result<(u64, Option), ParserError> { + self.expect_keyword(Keyword::DATETIME64)?; + self.expect_token(&Token::LParen)?; + let precision = self.parse_literal_uint()?; + let time_zone = if self.consume_token(&Token::Comma) { + Some(self.parse_literal_string()?) + } else { + None + }; + self.expect_token(&Token::RParen)?; + Ok((precision, time_zone)) + } + pub fn parse_optional_character_length( &mut self, ) -> Result, ParserError> { @@ -7425,6 +7545,17 @@ impl<'a> Parser<'a> { } } + /// Parse a parenthesized sub data type + fn parse_sub_type(&mut self, parent_type: F) -> Result + where + F: FnOnce(Box) -> DataType, + { + self.expect_token(&Token::LParen)?; + let inside_type = self.parse_data_type()?; + self.expect_token(&Token::RParen)?; + Ok(parent_type(inside_type.into())) + } + pub fn parse_delete(&mut self) -> Result { let (tables, with_from_keyword) = if !self.parse_keyword(Keyword::FROM) { // `FROM` keyword is optional in BigQuery SQL. diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index a693936bc..20c3d0569 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -220,6 +220,196 @@ fn parse_create_table() { ); } +fn column_def(name: Ident, data_type: DataType) -> ColumnDef { + ColumnDef { + name, + data_type, + collation: None, + options: vec![], + } +} + +#[test] +fn parse_clickhouse_data_types() { + let sql = concat!( + "CREATE TABLE table (", + "a1 UInt8, a2 UInt16, a3 UInt32, a4 UInt64, a5 UInt128, a6 UInt256,", + " b1 Int8, b2 Int16, b3 Int32, b4 Int64, b5 Int128, b6 Int256,", + " c1 Float32, c2 Float64,", + " d1 Date32, d2 DateTime64(3), d3 DateTime64(3, 'UTC'),", + " e1 FixedString(255),", + " f1 LowCardinality(Int32)", + ") ORDER BY (a1)", + ); + // ClickHouse has a case-sensitive definition of data type, but canonical representation is not + let canonical_sql = sql + .replace(" Int8", " INT8") + .replace(" Int64", " INT64") + .replace(" Float64", " FLOAT64"); + + match clickhouse_and_generic().one_statement_parses_to(sql, &canonical_sql) { + Statement::CreateTable { name, columns, .. } => { + assert_eq!(name, ObjectName(vec!["table".into()])); + assert_eq!( + columns, + vec![ + column_def("a1".into(), DataType::UInt8), + column_def("a2".into(), DataType::UInt16), + column_def("a3".into(), DataType::UInt32), + column_def("a4".into(), DataType::UInt64), + column_def("a5".into(), DataType::UInt128), + column_def("a6".into(), DataType::UInt256), + column_def("b1".into(), DataType::Int8(None)), + column_def("b2".into(), DataType::Int16), + column_def("b3".into(), DataType::Int32), + column_def("b4".into(), DataType::Int64), + column_def("b5".into(), DataType::Int128), + column_def("b6".into(), DataType::Int256), + column_def("c1".into(), DataType::Float32), + column_def("c2".into(), DataType::Float64), + column_def("d1".into(), DataType::Date32), + column_def("d2".into(), DataType::Datetime64(3, None)), + column_def("d3".into(), DataType::Datetime64(3, Some("UTC".into()))), + column_def("e1".into(), DataType::FixedString(255)), + column_def( + "f1".into(), + DataType::LowCardinality(Box::new(DataType::Int32)) + ), + ] + ); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_table_with_nullable() { + let sql = r#"CREATE TABLE table (k UInt8, `a` Nullable(String), `b` Nullable(DateTime64(9, 'UTC')), c Nullable(DateTime64(9)), d Date32 NULL) ENGINE=MergeTree ORDER BY (`k`)"#; + // ClickHouse has a case-sensitive definition of data type, but canonical representation is not + let canonical_sql = sql.replace("String", "STRING"); + + match clickhouse_and_generic().one_statement_parses_to(sql, &canonical_sql) { + Statement::CreateTable { name, columns, .. } => { + assert_eq!(name, ObjectName(vec!["table".into()])); + assert_eq!( + columns, + vec![ + column_def("k".into(), DataType::UInt8), + column_def( + Ident::with_quote('`', "a"), + DataType::Nullable(Box::new(DataType::String(None))) + ), + column_def( + Ident::with_quote('`', "b"), + DataType::Nullable(Box::new(DataType::Datetime64( + 9, + Some("UTC".to_string()) + ))) + ), + column_def( + "c".into(), + DataType::Nullable(Box::new(DataType::Datetime64(9, None))) + ), + ColumnDef { + name: "d".into(), + data_type: DataType::Date32, + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Null + }], + } + ] + ); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_table_with_nested_data_types() { + let sql = concat!( + "CREATE TABLE table (", + " i Nested(a Array(Int16), b LowCardinality(String)),", + " k Array(Tuple(FixedString(128), Int128)),", + " l Tuple(a DateTime64(9), b Array(UUID)),", + " m Map(String, UInt16)", + ") ENGINE=MergeTree ORDER BY (k)" + ); + + match clickhouse().one_statement_parses_to(sql, "") { + Statement::CreateTable { name, columns, .. } => { + assert_eq!(name, ObjectName(vec!["table".into()])); + assert_eq!( + columns, + vec![ + ColumnDef { + name: Ident::new("i"), + data_type: DataType::Nested(vec![ + column_def( + "a".into(), + DataType::Array(ArrayElemTypeDef::Parenthesis(Box::new( + DataType::Int16 + ),)) + ), + column_def( + "b".into(), + DataType::LowCardinality(Box::new(DataType::String(None))) + ) + ]), + collation: None, + options: vec![], + }, + ColumnDef { + name: Ident::new("k"), + data_type: DataType::Array(ArrayElemTypeDef::Parenthesis(Box::new( + DataType::Tuple(vec![ + StructField { + field_name: None, + field_type: DataType::FixedString(128) + }, + StructField { + field_name: None, + field_type: DataType::Int128 + } + ]) + ))), + collation: None, + options: vec![], + }, + ColumnDef { + name: Ident::new("l"), + data_type: DataType::Tuple(vec![ + StructField { + field_name: Some("a".into()), + field_type: DataType::Datetime64(9, None), + }, + StructField { + field_name: Some("b".into()), + field_type: DataType::Array(ArrayElemTypeDef::Parenthesis( + Box::new(DataType::Uuid) + )) + }, + ]), + collation: None, + options: vec![], + }, + ColumnDef { + name: Ident::new("m"), + data_type: DataType::Map( + Box::new(DataType::String(None)), + Box::new(DataType::UInt16) + ), + collation: None, + options: vec![], + }, + ] + ); + } + _ => unreachable!(), + } +} + #[test] fn parse_create_view_with_fields_data_types() { match clickhouse().verified_stmt(r#"CREATE VIEW v (i "int", f "String") AS SELECT * FROM t"#) { From 3d7f369c3b3e8fc379a2853e92d82a15e1b9688a Mon Sep 17 00:00:00 2001 From: "aleksei.p" Date: Tue, 4 Jun 2024 16:38:59 +0200 Subject: [PATCH 2/2] update with renaming and simplification --- src/parser/mod.rs | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 14b8d78ab..5884cec8f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2276,7 +2276,7 @@ impl<'a> Parser<'a> { /// ``` /// /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/map - fn parse_click_house_map(&mut self) -> Result<(DataType, DataType), ParserError> { + fn parse_click_house_map_def(&mut self) -> Result<(DataType, DataType), ParserError> { self.expect_keyword(Keyword::MAP)?; self.expect_token(&Token::LParen)?; let key_data_type = self.parse_data_type()?; @@ -2294,7 +2294,7 @@ impl<'a> Parser<'a> { /// ``` /// /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple - fn parse_click_house_tuple(&mut self) -> Result, ParserError> { + fn parse_click_house_tuple_def(&mut self) -> Result, ParserError> { self.expect_keyword(Keyword::TUPLE)?; self.expect_token(&Token::LParen)?; let mut field_defs = vec![]; @@ -6840,15 +6840,11 @@ impl<'a> Parser<'a> { } } Keyword::INT8 => { - if dialect_of!(self is ClickHouseDialect) { - Ok(DataType::Int8(None)) + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedInt8(optional_precision?)) } else { - let optional_precision = self.parse_optional_precision(); - if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedInt8(optional_precision?)) - } else { - Ok(DataType::Int8(optional_precision?)) - } + Ok(DataType::Int8(optional_precision?)) } } Keyword::INT16 => Ok(DataType::Int16), @@ -7018,7 +7014,7 @@ impl<'a> Parser<'a> { } Keyword::MAP if dialect_of!(self is ClickHouseDialect | GenericDialect) => { self.prev_token(); - let (key_data_type, value_data_type) = self.parse_click_house_map()?; + let (key_data_type, value_data_type) = self.parse_click_house_map_def()?; Ok(DataType::Map( Box::new(key_data_type), Box::new(value_data_type), @@ -7032,7 +7028,7 @@ impl<'a> Parser<'a> { } Keyword::TUPLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { self.prev_token(); - let field_defs = self.parse_click_house_tuple()?; + let field_defs = self.parse_click_house_tuple_def()?; Ok(DataType::Tuple(field_defs)) } _ => {