Skip to content

Commit 03282a1

Browse files
author
aleksei.p
committed
ClickHouse data types
1 parent 375742d commit 03282a1

File tree

5 files changed

+532
-24
lines changed

5 files changed

+532
-24
lines changed

src/ast/data_type.rs

Lines changed: 180 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use sqlparser_derive::{Visit, VisitMut};
2222

2323
use crate::ast::{display_comma_separated, ObjectName, StructField};
2424

25-
use super::value::escape_single_quote_string;
25+
use super::{value::escape_single_quote_string, ColumnDef};
2626

2727
/// SQL data types
2828
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
@@ -129,10 +129,39 @@ pub enum DataType {
129129
///
130130
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
131131
Int4(Option<u64>),
132-
/// Integer type in [bigquery]
132+
/// Int8 as alias for Bigint in [postgresql] and integer type in [clickhouse]
133+
/// Note: Int8 mean 8 bytes in [postgresql] (not 8 bits)
134+
/// Int8 with optional display width e.g. INT8 or INT8(11)
135+
/// Note: Int8 mean 8 bits in [clickhouse]
136+
///
137+
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
138+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
139+
Int8(Option<u64>),
140+
/// Integer type in [clickhouse]
141+
/// Note: Int16 mean 16 bits in [clickhouse]
142+
///
143+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
144+
Int16,
145+
/// Integer type in [clickhouse]
146+
/// Note: Int16 mean 32 bits in [clickhouse]
147+
///
148+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
149+
Int32,
150+
/// Integer type in [bigquery], [clickhouse]
133151
///
134152
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types
153+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
135154
Int64,
155+
/// Integer type in [clickhouse]
156+
/// Note: Int128 mean 128 bits in [clickhouse]
157+
///
158+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
159+
Int128,
160+
/// Integer type in [clickhouse]
161+
/// Note: Int256 mean 256 bits in [clickhouse]
162+
///
163+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
164+
Int256,
136165
/// Integer with optional display width e.g. INTEGER or INTEGER(11)
137166
Integer(Option<u64>),
138167
/// Unsigned int with optional display width e.g. INT UNSIGNED or INT(11) UNSIGNED
@@ -141,25 +170,54 @@ pub enum DataType {
141170
UnsignedInt4(Option<u64>),
142171
/// Unsigned integer with optional display width e.g. INTGER UNSIGNED or INTEGER(11) UNSIGNED
143172
UnsignedInteger(Option<u64>),
173+
/// Unsigned integer type in [clickhouse]
174+
/// Note: UInt8 mean 8 bits in [clickhouse]
175+
///
176+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
177+
UInt8,
178+
/// Unsigned integer type in [clickhouse]
179+
/// Note: UInt16 mean 16 bits in [clickhouse]
180+
///
181+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
182+
UInt16,
183+
/// Unsigned integer type in [clickhouse]
184+
/// Note: UInt32 mean 32 bits in [clickhouse]
185+
///
186+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
187+
UInt32,
188+
/// Unsigned integer type in [clickhouse]
189+
/// Note: UInt64 mean 64 bits in [clickhouse]
190+
///
191+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
192+
UInt64,
193+
/// Unsigned integer type in [clickhouse]
194+
/// Note: UInt128 mean 128 bits in [clickhouse]
195+
///
196+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
197+
UInt128,
198+
/// Unsigned integer type in [clickhouse]
199+
/// Note: UInt256 mean 256 bits in [clickhouse]
200+
///
201+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
202+
UInt256,
144203
/// Big integer with optional display width e.g. BIGINT or BIGINT(20)
145204
BigInt(Option<u64>),
146205
/// Unsigned big integer with optional display width e.g. BIGINT UNSIGNED or BIGINT(20) UNSIGNED
147206
UnsignedBigInt(Option<u64>),
148-
/// Int8 as alias for Bigint in [postgresql]
149-
/// Note: Int8 mean 8 bytes in postgres (not 8 bits)
150-
/// Int8 with optional display width e.g. INT8 or INT8(11)
151-
///
152-
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
153-
Int8(Option<u64>),
154207
/// Unsigned Int8 with optional display width e.g. INT8 UNSIGNED or INT8(11) UNSIGNED
155208
UnsignedInt8(Option<u64>),
156209
/// Float4 as alias for Real in [postgresql]
157210
///
158211
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
159212
Float4,
213+
/// Floating point in [clickhouse]
214+
///
215+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/float
216+
Float32,
160217
/// Floating point in [bigquery]
161218
///
162219
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types
220+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/float
163221
Float64,
164222
/// Floating point e.g. REAL
165223
Real,
@@ -182,6 +240,10 @@ pub enum DataType {
182240
Boolean,
183241
/// Date
184242
Date,
243+
/// Date32 with the same range as Datetime64
244+
///
245+
/// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/date32
246+
Date32,
185247
/// Time with optional time precision and time zone information e.g. [standard][1].
186248
///
187249
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type
@@ -190,6 +252,10 @@ pub enum DataType {
190252
///
191253
/// [1]: https://dev.mysql.com/doc/refman/8.0/en/datetime.html
192254
Datetime(Option<u64>),
255+
/// Datetime with time precision and optional timezone e.g. [ClickHouse][1].
256+
///
257+
/// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/datetime64
258+
Datetime64(u64, Option<String>),
193259
/// Timestamp with optional time precision and time zone information e.g. [standard][1].
194260
///
195261
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type
@@ -206,12 +272,28 @@ pub enum DataType {
206272
Text,
207273
/// String with optional length.
208274
String(Option<u64>),
275+
/// A fixed-length string e.g [ClickHouse][1].
276+
///
277+
/// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/fixedstring
278+
FixedString(u64),
209279
/// Bytea
210280
Bytea,
211281
/// Custom type such as enums
212282
Custom(ObjectName, Vec<String>),
213283
/// Arrays
214284
Array(ArrayElemTypeDef),
285+
/// Map
286+
///
287+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/map
288+
Map(Box<DataType>, Box<DataType>),
289+
/// Tuple
290+
///
291+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple
292+
Tuple(Vec<StructField>),
293+
/// Nested
294+
///
295+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nested-data-structures/nested
296+
Nested(Vec<ColumnDef>),
215297
/// Enums
216298
Enum(Vec<String>),
217299
/// Set
@@ -221,6 +303,14 @@ pub enum DataType {
221303
/// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html
222304
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
223305
Struct(Vec<StructField>),
306+
/// Nullable - special marker NULL represents in ClickHouse as a data type.
307+
///
308+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nullable
309+
Nullable(Box<DataType>),
310+
/// LowCardinality - changes the internal representation of other data types to be dictionary-encoded.
311+
///
312+
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/lowcardinality
313+
LowCardinality(Box<DataType>),
224314
/// No type specified - only used with
225315
/// [`SQLiteDialect`](crate::dialect::SQLiteDialect), from statements such
226316
/// as `CREATE TABLE t1 (a)`.
@@ -296,9 +386,24 @@ impl fmt::Display for DataType {
296386
DataType::Int4(zerofill) => {
297387
format_type_with_optional_length(f, "INT4", zerofill, false)
298388
}
389+
DataType::Int8(zerofill) => {
390+
format_type_with_optional_length(f, "INT8", zerofill, false)
391+
}
392+
DataType::Int16 => {
393+
write!(f, "Int16")
394+
}
395+
DataType::Int32 => {
396+
write!(f, "Int32")
397+
}
299398
DataType::Int64 => {
300399
write!(f, "INT64")
301400
}
401+
DataType::Int128 => {
402+
write!(f, "Int128")
403+
}
404+
DataType::Int256 => {
405+
write!(f, "Int256")
406+
}
302407
DataType::UnsignedInt4(zerofill) => {
303408
format_type_with_optional_length(f, "INT4", zerofill, true)
304409
}
@@ -314,21 +419,38 @@ impl fmt::Display for DataType {
314419
DataType::UnsignedBigInt(zerofill) => {
315420
format_type_with_optional_length(f, "BIGINT", zerofill, true)
316421
}
317-
DataType::Int8(zerofill) => {
318-
format_type_with_optional_length(f, "INT8", zerofill, false)
319-
}
320422
DataType::UnsignedInt8(zerofill) => {
321423
format_type_with_optional_length(f, "INT8", zerofill, true)
322424
}
425+
DataType::UInt8 => {
426+
write!(f, "UInt8")
427+
}
428+
DataType::UInt16 => {
429+
write!(f, "UInt16")
430+
}
431+
DataType::UInt32 => {
432+
write!(f, "UInt32")
433+
}
434+
DataType::UInt64 => {
435+
write!(f, "UInt64")
436+
}
437+
DataType::UInt128 => {
438+
write!(f, "UInt128")
439+
}
440+
DataType::UInt256 => {
441+
write!(f, "UInt256")
442+
}
323443
DataType::Real => write!(f, "REAL"),
324444
DataType::Float4 => write!(f, "FLOAT4"),
445+
DataType::Float32 => write!(f, "Float32"),
325446
DataType::Float64 => write!(f, "FLOAT64"),
326447
DataType::Double => write!(f, "DOUBLE"),
327448
DataType::Float8 => write!(f, "FLOAT8"),
328449
DataType::DoublePrecision => write!(f, "DOUBLE PRECISION"),
329450
DataType::Bool => write!(f, "BOOL"),
330451
DataType::Boolean => write!(f, "BOOLEAN"),
331452
DataType::Date => write!(f, "DATE"),
453+
DataType::Date32 => write!(f, "Date32"),
332454
DataType::Time(precision, timezone_info) => {
333455
format_datetime_precision_and_tz(f, "TIME", precision, timezone_info)
334456
}
@@ -338,6 +460,14 @@ impl fmt::Display for DataType {
338460
DataType::Timestamp(precision, timezone_info) => {
339461
format_datetime_precision_and_tz(f, "TIMESTAMP", precision, timezone_info)
340462
}
463+
DataType::Datetime64(precision, timezone) => {
464+
format_clickhouse_datetime_precision_and_timezone(
465+
f,
466+
"DateTime64",
467+
precision,
468+
timezone,
469+
)
470+
}
341471
DataType::Interval => write!(f, "INTERVAL"),
342472
DataType::JSON => write!(f, "JSON"),
343473
DataType::JSONB => write!(f, "JSONB"),
@@ -350,6 +480,7 @@ impl fmt::Display for DataType {
350480
ArrayElemTypeDef::SquareBracket(t, None) => write!(f, "{t}[]"),
351481
ArrayElemTypeDef::SquareBracket(t, Some(size)) => write!(f, "{t}[{size}]"),
352482
ArrayElemTypeDef::AngleBracket(t) => write!(f, "ARRAY<{t}>"),
483+
ArrayElemTypeDef::Parenthesis(t) => write!(f, "Array({t})"),
353484
},
354485
DataType::Custom(ty, modifiers) => {
355486
if modifiers.is_empty() {
@@ -385,6 +516,25 @@ impl fmt::Display for DataType {
385516
write!(f, "STRUCT")
386517
}
387518
}
519+
// ClickHouse
520+
DataType::Nullable(data_type) => {
521+
write!(f, "Nullable({})", data_type)
522+
}
523+
DataType::FixedString(character_length) => {
524+
write!(f, "FixedString({})", character_length)
525+
}
526+
DataType::LowCardinality(data_type) => {
527+
write!(f, "LowCardinality({})", data_type)
528+
}
529+
DataType::Map(key_data_type, value_data_type) => {
530+
write!(f, "Map({}, {})", key_data_type, value_data_type)
531+
}
532+
DataType::Tuple(fields) => {
533+
write!(f, "Tuple({})", display_comma_separated(fields))
534+
}
535+
DataType::Nested(fields) => {
536+
write!(f, "Nested({})", display_comma_separated(fields))
537+
}
388538
DataType::Unspecified => Ok(()),
389539
}
390540
}
@@ -439,6 +589,23 @@ fn format_datetime_precision_and_tz(
439589
Ok(())
440590
}
441591

592+
fn format_clickhouse_datetime_precision_and_timezone(
593+
f: &mut fmt::Formatter,
594+
sql_type: &'static str,
595+
len: &u64,
596+
time_zone: &Option<String>,
597+
) -> fmt::Result {
598+
write!(f, "{sql_type}({len}")?;
599+
600+
if let Some(time_zone) = time_zone {
601+
write!(f, ", '{time_zone}'")?;
602+
}
603+
604+
write!(f, ")")?;
605+
606+
Ok(())
607+
}
608+
442609
/// Timestamp and Time data types information about TimeZone formatting.
443610
///
444611
/// This is more related to a display information than real differences between each variant. To
@@ -593,4 +760,6 @@ pub enum ArrayElemTypeDef {
593760
AngleBracket(Box<DataType>),
594761
/// `INT[]` or `INT[2]`
595762
SquareBracket(Box<DataType>, Option<u64>),
763+
/// `Array(Int64)`
764+
Parenthesis(Box<DataType>),
596765
}

src/ast/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ impl fmt::Display for Interval {
273273
}
274274
}
275275

276-
/// A field definition within a struct.
276+
/// A field definition within a struct
277277
///
278278
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
279279
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]

0 commit comments

Comments
 (0)