Skip to content

Commit 41a6b70

Browse files
Aleksei Piianinlustefaniak
Aleksei Piianin
authored andcommitted
ClickHouse data types (apache#1285)
1 parent a4cd74e commit 41a6b70

File tree

7 files changed

+645
-91
lines changed

7 files changed

+645
-91
lines changed

src/ast/data_type.rs

Lines changed: 207 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use sqlparser_derive::{Visit, VisitMut};
2222

2323
use crate::ast::{display_comma_separated, ObjectName, StructField};
2424

25-
use super::value::escape_single_quote_string;
25+
use super::{value::escape_single_quote_string, ColumnDef};
2626

2727
/// SQL data types
2828
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
@@ -129,10 +129,39 @@ pub enum DataType {
129129
///
130130
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
131131
Int4(Option<u64>),
132-
/// Integer type in [bigquery]
132+
/// Int8 as alias for Bigint in [postgresql] and integer type in [clickhouse]
133+
/// Note: Int8 mean 8 bytes in [postgresql] (not 8 bits)
134+
/// Int8 with optional display width e.g. INT8 or INT8(11)
135+
/// Note: Int8 mean 8 bits in [clickhouse]
136+
///
137+
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
138+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
139+
Int8(Option<u64>),
140+
/// Integer type in [clickhouse]
141+
/// Note: Int16 mean 16 bits in [clickhouse]
142+
///
143+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
144+
Int16,
145+
/// Integer type in [clickhouse]
146+
/// Note: Int16 mean 32 bits in [clickhouse]
147+
///
148+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
149+
Int32,
150+
/// Integer type in [bigquery], [clickhouse]
133151
///
134152
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types
153+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
135154
Int64,
155+
/// Integer type in [clickhouse]
156+
/// Note: Int128 mean 128 bits in [clickhouse]
157+
///
158+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
159+
Int128,
160+
/// Integer type in [clickhouse]
161+
/// Note: Int256 mean 256 bits in [clickhouse]
162+
///
163+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
164+
Int256,
136165
/// Integer with optional display width e.g. INTEGER or INTEGER(11)
137166
Integer(Option<u64>),
138167
/// Unsigned int with optional display width e.g. INT UNSIGNED or INT(11) UNSIGNED
@@ -141,25 +170,54 @@ pub enum DataType {
141170
UnsignedInt4(Option<u64>),
142171
/// Unsigned integer with optional display width e.g. INTGER UNSIGNED or INTEGER(11) UNSIGNED
143172
UnsignedInteger(Option<u64>),
173+
/// Unsigned integer type in [clickhouse]
174+
/// Note: UInt8 mean 8 bits in [clickhouse]
175+
///
176+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
177+
UInt8,
178+
/// Unsigned integer type in [clickhouse]
179+
/// Note: UInt16 mean 16 bits in [clickhouse]
180+
///
181+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
182+
UInt16,
183+
/// Unsigned integer type in [clickhouse]
184+
/// Note: UInt32 mean 32 bits in [clickhouse]
185+
///
186+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
187+
UInt32,
188+
/// Unsigned integer type in [clickhouse]
189+
/// Note: UInt64 mean 64 bits in [clickhouse]
190+
///
191+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
192+
UInt64,
193+
/// Unsigned integer type in [clickhouse]
194+
/// Note: UInt128 mean 128 bits in [clickhouse]
195+
///
196+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
197+
UInt128,
198+
/// Unsigned integer type in [clickhouse]
199+
/// Note: UInt256 mean 256 bits in [clickhouse]
200+
///
201+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
202+
UInt256,
144203
/// Big integer with optional display width e.g. BIGINT or BIGINT(20)
145204
BigInt(Option<u64>),
146205
/// Unsigned big integer with optional display width e.g. BIGINT UNSIGNED or BIGINT(20) UNSIGNED
147206
UnsignedBigInt(Option<u64>),
148-
/// Int8 as alias for Bigint in [postgresql]
149-
/// Note: Int8 mean 8 bytes in postgres (not 8 bits)
150-
/// Int8 with optional display width e.g. INT8 or INT8(11)
151-
///
152-
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
153-
Int8(Option<u64>),
154207
/// Unsigned Int8 with optional display width e.g. INT8 UNSIGNED or INT8(11) UNSIGNED
155208
UnsignedInt8(Option<u64>),
156209
/// Float4 as alias for Real in [postgresql]
157210
///
158211
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
159212
Float4,
213+
/// Floating point in [clickhouse]
214+
///
215+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/float
216+
Float32,
160217
/// Floating point in [bigquery]
161218
///
162219
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types
220+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/float
163221
Float64,
164222
/// Floating point e.g. REAL
165223
Real,
@@ -182,14 +240,23 @@ pub enum DataType {
182240
Boolean,
183241
/// Date
184242
Date,
243+
/// Date32 with the same range as Datetime64
244+
///
245+
/// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/date32
246+
Date32,
185247
/// Time with optional time precision and time zone information e.g. [standard][1].
186248
///
187249
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type
188250
Time(Option<u64>, TimezoneInfo),
189-
/// Datetime with optional time precision e.g. [MySQL][1].
251+
/// Datetime with optional time precision e.g. [MySQL][1] or timezone [Clickhouse][2].
190252
///
191253
/// [1]: https://dev.mysql.com/doc/refman/8.0/en/datetime.html
192-
Datetime(Option<u64>),
254+
/// [2]: https://clickhouse.com/docs/en/sql-reference/data-types/datetime
255+
Datetime(Option<u64>, Option<String>),
256+
/// Datetime with time precision and optional timezone e.g. [ClickHouse][1].
257+
///
258+
/// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/datetime64
259+
Datetime64(u64, Option<String>),
193260
/// Timestamp with optional time precision and time zone information e.g. [standard][1].
194261
///
195262
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type
@@ -204,12 +271,28 @@ pub enum DataType {
204271
Text,
205272
/// String with optional length.
206273
String(Option<u64>),
274+
/// A fixed-length string e.g [ClickHouse][1].
275+
///
276+
/// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/fixedstring
277+
FixedString(u64),
207278
/// Bytea
208279
Bytea,
209280
/// Custom type such as enums
210281
Custom(ObjectName, Vec<String>),
211282
/// Arrays
212283
Array(ArrayElemTypeDef),
284+
/// Map
285+
///
286+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/map
287+
Map(Box<DataType>, Box<DataType>),
288+
/// Tuple
289+
///
290+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple
291+
Tuple(Vec<StructField>),
292+
/// Nested
293+
///
294+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nested-data-structures/nested
295+
Nested(Vec<ColumnDef>),
213296
/// Enums
214297
Enum(Vec<String>),
215298
/// Set
@@ -221,6 +304,18 @@ pub enum DataType {
221304
Struct(Vec<StructField>),
222305
/// MAP<>
223306
DatabricksMap(Vec<StructField>),
307+
/// Nullable - special marker NULL represents in ClickHouse as a data type.
308+
///
309+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nullable
310+
Nullable(Box<DataType>),
311+
/// LowCardinality - changes the internal representation of other data types to be dictionary-encoded.
312+
///
313+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/lowcardinality
314+
LowCardinality(Box<DataType>),
315+
/// No type specified - only used with
316+
/// [`SQLiteDialect`](crate::dialect::SQLiteDialect), from statements such
317+
/// as `CREATE TABLE t1 (a)`.
318+
Unspecified,
224319
}
225320

226321
impl fmt::Display for DataType {
@@ -294,9 +389,24 @@ impl fmt::Display for DataType {
294389
DataType::Int4(zerofill) => {
295390
format_type_with_optional_length(f, "INT4", zerofill, false)
296391
}
392+
DataType::Int8(zerofill) => {
393+
format_type_with_optional_length(f, "INT8", zerofill, false)
394+
}
395+
DataType::Int16 => {
396+
write!(f, "Int16")
397+
}
398+
DataType::Int32 => {
399+
write!(f, "Int32")
400+
}
297401
DataType::Int64 => {
298402
write!(f, "INT64")
299403
}
404+
DataType::Int128 => {
405+
write!(f, "Int128")
406+
}
407+
DataType::Int256 => {
408+
write!(f, "Int256")
409+
}
300410
DataType::UnsignedInt4(zerofill) => {
301411
format_type_with_optional_length(f, "INT4", zerofill, true)
302412
}
@@ -312,30 +422,59 @@ impl fmt::Display for DataType {
312422
DataType::UnsignedBigInt(zerofill) => {
313423
format_type_with_optional_length(f, "BIGINT", zerofill, true)
314424
}
315-
DataType::Int8(zerofill) => {
316-
format_type_with_optional_length(f, "INT8", zerofill, false)
317-
}
318425
DataType::UnsignedInt8(zerofill) => {
319426
format_type_with_optional_length(f, "INT8", zerofill, true)
320427
}
428+
DataType::UInt8 => {
429+
write!(f, "UInt8")
430+
}
431+
DataType::UInt16 => {
432+
write!(f, "UInt16")
433+
}
434+
DataType::UInt32 => {
435+
write!(f, "UInt32")
436+
}
437+
DataType::UInt64 => {
438+
write!(f, "UInt64")
439+
}
440+
DataType::UInt128 => {
441+
write!(f, "UInt128")
442+
}
443+
DataType::UInt256 => {
444+
write!(f, "UInt256")
445+
}
321446
DataType::Real => write!(f, "REAL"),
322447
DataType::Float4 => write!(f, "FLOAT4"),
448+
DataType::Float32 => write!(f, "Float32"),
323449
DataType::Float64 => write!(f, "FLOAT64"),
324450
DataType::Double => write!(f, "DOUBLE"),
325451
DataType::Float8 => write!(f, "FLOAT8"),
326452
DataType::DoublePrecision => write!(f, "DOUBLE PRECISION"),
327453
DataType::Bool => write!(f, "BOOL"),
328454
DataType::Boolean => write!(f, "BOOLEAN"),
329455
DataType::Date => write!(f, "DATE"),
456+
DataType::Date32 => write!(f, "Date32"),
330457
DataType::Time(precision, timezone_info) => {
331458
format_datetime_precision_and_tz(f, "TIME", precision, timezone_info)
332459
}
333-
DataType::Datetime(precision) => {
334-
format_type_with_optional_length(f, "DATETIME", precision, false)
460+
DataType::Datetime(precision, timezone_info) => {
461+
if let Some(timezone_info) = timezone_info {
462+
format_clickhouse_datetime_timezone(f, "DATETIME", timezone_info)
463+
} else {
464+
format_type_with_optional_length(f, "DATETIME", precision, false)
465+
}
335466
}
336467
DataType::Timestamp(precision, timezone_info) => {
337468
format_datetime_precision_and_tz(f, "TIMESTAMP", precision, timezone_info)
338469
}
470+
DataType::Datetime64(precision, timezone) => {
471+
format_clickhouse_datetime_precision_and_timezone(
472+
f,
473+
"DateTime64",
474+
precision,
475+
timezone,
476+
)
477+
}
339478
DataType::Interval => write!(f, "INTERVAL"),
340479
DataType::JSON => write!(f, "JSON"),
341480
DataType::Regclass => write!(f, "REGCLASS"),
@@ -344,8 +483,11 @@ impl fmt::Display for DataType {
344483
DataType::Bytea => write!(f, "BYTEA"),
345484
DataType::Array(ty) => match ty {
346485
ArrayElemTypeDef::None => write!(f, "ARRAY"),
347-
ArrayElemTypeDef::SquareBracket(t) => write!(f, "{t}[]"),
486+
ArrayElemTypeDef::SquareBracket(t) => {
487+
write!(f, "{t}[]")
488+
}
348489
ArrayElemTypeDef::AngleBracket(t) => write!(f, "ARRAY<{t}>"),
490+
ArrayElemTypeDef::Parenthesis(t) => write!(f, "Array({t})"),
349491
},
350492
DataType::Custom(ty, modifiers) => {
351493
if modifiers.is_empty() {
@@ -388,6 +530,26 @@ impl fmt::Display for DataType {
388530
write!(f, "MAP")
389531
}
390532
}
533+
// ClickHouse
534+
DataType::Nullable(data_type) => {
535+
write!(f, "Nullable({})", data_type)
536+
}
537+
DataType::FixedString(character_length) => {
538+
write!(f, "FixedString({})", character_length)
539+
}
540+
DataType::LowCardinality(data_type) => {
541+
write!(f, "LowCardinality({})", data_type)
542+
}
543+
DataType::Map(key_data_type, value_data_type) => {
544+
write!(f, "Map({}, {})", key_data_type, value_data_type)
545+
}
546+
DataType::Tuple(fields) => {
547+
write!(f, "Tuple({})", display_comma_separated(fields))
548+
}
549+
DataType::Nested(fields) => {
550+
write!(f, "Nested({})", display_comma_separated(fields))
551+
}
552+
DataType::Unspecified => Ok(()),
391553
}
392554
}
393555
}
@@ -441,6 +603,32 @@ fn format_datetime_precision_and_tz(
441603
Ok(())
442604
}
443605

606+
fn format_clickhouse_datetime_precision_and_timezone(
607+
f: &mut fmt::Formatter,
608+
sql_type: &'static str,
609+
len: &u64,
610+
time_zone: &Option<String>,
611+
) -> fmt::Result {
612+
write!(f, "{sql_type}({len}")?;
613+
614+
if let Some(time_zone) = time_zone {
615+
write!(f, ", '{time_zone}'")?;
616+
}
617+
618+
write!(f, ")")?;
619+
620+
Ok(())
621+
}
622+
623+
fn format_clickhouse_datetime_timezone(
624+
f: &mut fmt::Formatter,
625+
sql_type: &'static str,
626+
time_zone: &String,
627+
) -> fmt::Result {
628+
write!(f, "{sql_type}('{time_zone}')")?;
629+
Ok(())
630+
}
631+
444632
/// Timestamp and Time data types information about TimeZone formatting.
445633
///
446634
/// This is more related to a display information than real differences between each variant. To
@@ -593,6 +781,8 @@ pub enum ArrayElemTypeDef {
593781
None,
594782
/// `ARRAY<INT>`
595783
AngleBracket(Box<DataType>),
596-
/// `[]INT`
784+
/// `INT[]` or `INT[2]`
597785
SquareBracket(Box<DataType>),
786+
/// `Array(Int64)`
787+
Parenthesis(Box<DataType>),
598788
}

0 commit comments

Comments
 (0)