Skip to content

Commit f9ab8dc

Browse files
authored
Support for DuckDB Union datatype (#1322)
1 parent f5ccef6 commit f9ab8dc

File tree

4 files changed

+151
-1
lines changed

4 files changed

+151
-1
lines changed

src/ast/data_type.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use serde::{Deserialize, Serialize};
2020
#[cfg(feature = "visitor")]
2121
use sqlparser_derive::{Visit, VisitMut};
2222

23-
use crate::ast::{display_comma_separated, ObjectName, StructField};
23+
use crate::ast::{display_comma_separated, ObjectName, StructField, UnionField};
2424

2525
use super::{value::escape_single_quote_string, ColumnDef};
2626

@@ -303,6 +303,10 @@ pub enum DataType {
303303
/// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html
304304
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
305305
Struct(Vec<StructField>),
306+
/// Union
307+
///
308+
/// [duckdb]: https://duckdb.org/docs/sql/data_types/union.html
309+
Union(Vec<UnionField>),
306310
/// Nullable - special marker NULL represents in ClickHouse as a data type.
307311
///
308312
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nullable
@@ -516,6 +520,9 @@ impl fmt::Display for DataType {
516520
write!(f, "STRUCT")
517521
}
518522
}
523+
DataType::Union(fields) => {
524+
write!(f, "UNION({})", display_comma_separated(fields))
525+
}
519526
// ClickHouse
520527
DataType::Nullable(data_type) => {
521528
write!(f, "Nullable({})", data_type)

src/ast/mod.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,23 @@ impl fmt::Display for StructField {
294294
}
295295
}
296296

297+
/// A field definition within a union
298+
///
299+
/// [duckdb]: https://duckdb.org/docs/sql/data_types/union.html
300+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
301+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
302+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
303+
pub struct UnionField {
304+
pub field_name: Ident,
305+
pub field_type: DataType,
306+
}
307+
308+
impl fmt::Display for UnionField {
309+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
310+
write!(f, "{} {}", self.field_name, self.field_type)
311+
}
312+
}
313+
297314
/// A dictionary field within a dictionary.
298315
///
299316
/// [duckdb]: https://duckdb.org/docs/sql/data_types/struct#creating-structs

src/parser/mod.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2246,6 +2246,32 @@ impl<'a> Parser<'a> {
22462246
))
22472247
}
22482248

2249+
/// DuckDB specific: Parse a Union type definition as a sequence of field-value pairs.
2250+
///
2251+
/// Syntax:
2252+
///
2253+
/// ```sql
2254+
/// UNION(field_name field_type[,...])
2255+
/// ```
2256+
///
2257+
/// [1]: https://duckdb.org/docs/sql/data_types/union.html
2258+
fn parse_union_type_def(&mut self) -> Result<Vec<UnionField>, ParserError> {
2259+
self.expect_keyword(Keyword::UNION)?;
2260+
2261+
self.expect_token(&Token::LParen)?;
2262+
2263+
let fields = self.parse_comma_separated(|p| {
2264+
Ok(UnionField {
2265+
field_name: p.parse_identifier(false)?,
2266+
field_type: p.parse_data_type()?,
2267+
})
2268+
})?;
2269+
2270+
self.expect_token(&Token::RParen)?;
2271+
2272+
Ok(fields)
2273+
}
2274+
22492275
/// DuckDB specific: Parse a duckdb dictionary [1]
22502276
///
22512277
/// Syntax:
@@ -7136,6 +7162,11 @@ impl<'a> Parser<'a> {
71367162
trailing_bracket = _trailing_bracket;
71377163
Ok(DataType::Struct(field_defs))
71387164
}
7165+
Keyword::UNION if dialect_of!(self is DuckDbDialect | GenericDialect) => {
7166+
self.prev_token();
7167+
let fields = self.parse_union_type_def()?;
7168+
Ok(DataType::Union(fields))
7169+
}
71397170
Keyword::NULLABLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => {
71407171
Ok(self.parse_sub_type(DataType::Nullable)?)
71417172
}

tests/sqlparser_duckdb.rs

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,3 +545,98 @@ fn test_array_index() {
545545
expr
546546
);
547547
}
548+
549+
#[test]
550+
fn test_duckdb_union_datatype() {
551+
let sql = "CREATE TABLE tbl1 (one UNION(a INT), two UNION(a INT, b INT), nested UNION(a UNION(b INT)))";
552+
let stmt = duckdb_and_generic().verified_stmt(sql);
553+
assert_eq!(
554+
Statement::CreateTable(CreateTable {
555+
or_replace: Default::default(),
556+
temporary: Default::default(),
557+
external: Default::default(),
558+
global: Default::default(),
559+
if_not_exists: Default::default(),
560+
transient: Default::default(),
561+
volatile: Default::default(),
562+
name: ObjectName(vec!["tbl1".into()]),
563+
columns: vec![
564+
ColumnDef {
565+
name: "one".into(),
566+
data_type: DataType::Union(vec![UnionField {
567+
field_name: "a".into(),
568+
field_type: DataType::Int(None)
569+
}]),
570+
collation: Default::default(),
571+
options: Default::default()
572+
},
573+
ColumnDef {
574+
name: "two".into(),
575+
data_type: DataType::Union(vec![
576+
UnionField {
577+
field_name: "a".into(),
578+
field_type: DataType::Int(None)
579+
},
580+
UnionField {
581+
field_name: "b".into(),
582+
field_type: DataType::Int(None)
583+
}
584+
]),
585+
collation: Default::default(),
586+
options: Default::default()
587+
},
588+
ColumnDef {
589+
name: "nested".into(),
590+
data_type: DataType::Union(vec![UnionField {
591+
field_name: "a".into(),
592+
field_type: DataType::Union(vec![UnionField {
593+
field_name: "b".into(),
594+
field_type: DataType::Int(None)
595+
}])
596+
}]),
597+
collation: Default::default(),
598+
options: Default::default()
599+
}
600+
],
601+
constraints: Default::default(),
602+
hive_distribution: HiveDistributionStyle::NONE,
603+
hive_formats: Some(HiveFormat {
604+
row_format: Default::default(),
605+
serde_properties: Default::default(),
606+
storage: Default::default(),
607+
location: Default::default()
608+
}),
609+
table_properties: Default::default(),
610+
with_options: Default::default(),
611+
file_format: Default::default(),
612+
location: Default::default(),
613+
query: Default::default(),
614+
without_rowid: Default::default(),
615+
like: Default::default(),
616+
clone: Default::default(),
617+
engine: Default::default(),
618+
comment: Default::default(),
619+
auto_increment_offset: Default::default(),
620+
default_charset: Default::default(),
621+
collation: Default::default(),
622+
on_commit: Default::default(),
623+
on_cluster: Default::default(),
624+
primary_key: Default::default(),
625+
order_by: Default::default(),
626+
partition_by: Default::default(),
627+
cluster_by: Default::default(),
628+
options: Default::default(),
629+
strict: Default::default(),
630+
copy_grants: Default::default(),
631+
enable_schema_evolution: Default::default(),
632+
change_tracking: Default::default(),
633+
data_retention_time_in_days: Default::default(),
634+
max_data_extension_time_in_days: Default::default(),
635+
default_ddl_collation: Default::default(),
636+
with_aggregation_policy: Default::default(),
637+
with_row_access_policy: Default::default(),
638+
with_tags: Default::default()
639+
}),
640+
stmt
641+
);
642+
}

0 commit comments

Comments
 (0)