Skip to content

Commit eb408e7

Browse files
git-hulklustefaniak
authored andcommitted
Add support of FORMAT clause for ClickHouse parser (apache#1335)
1 parent 8e86b4b commit eb408e7

10 files changed

+225
-20
lines changed

src/ast/data_type.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -792,7 +792,6 @@ pub enum ArrayElemTypeDef {
792792
Parenthesis(Box<DataType>),
793793
}
794794

795-
796795
/// String enum values with optional integer value.
797796
///
798797
/// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/enum
@@ -812,9 +811,9 @@ impl fmt::Display for EnumTypeValue {
812811
Self::Name(n) => {
813812
write!(f, "'{}'", escape_single_quote_string(n))
814813
}
815-
Self::NameWithValue(n,v) => {
816-
write!(f, "'{}' = {}", escape_single_quote_string(n),v)
814+
Self::NameWithValue(n, v) => {
815+
write!(f, "'{}' = {}", escape_single_quote_string(n), v)
817816
}
818817
}
819818
}
820-
}
819+
}

src/ast/mod.rs

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ use serde::{Deserialize, Serialize};
2929
use sqlparser_derive::{Visit, VisitMut};
3030

3131
pub use self::data_type::{
32-
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo, EnumTypeValue,
32+
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, EnumTypeValue, ExactNumberInfo,
33+
TimezoneInfo,
3334
};
3435
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue};
3536
pub use self::ddl::{
@@ -40,13 +41,13 @@ pub use self::ddl::{
4041
};
4142
pub use self::operator::{BinaryOperator, UnaryOperator};
4243
pub use self::query::{
43-
AggregateItem, Cte, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch, GroupByExpr,
44-
IdentWithAlias, Interpolate, InterpolateExpr, Join, JoinConstraint, JoinOperator, LateralView,
45-
LockClause, LockType, NamedWindowDefinition, NonBlock, Offset, OffsetRows, OrderBy,
46-
OrderByExpr, Query, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, SamplingMethod,
47-
Select, SelectInto, SelectItem, SelectionCount, SetExpr, SetOperator, SetQuantifier, Table,
48-
TableAlias, TableFactor, TableSampleSeed, TableVersion, TableWithJoins, Top, ValueTableMode,
49-
Values, WildcardAdditionalOptions, With, WithFill,
44+
AggregateItem, Cte, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch, FormatClause,
45+
GroupByExpr, IdentWithAlias, Interpolate, InterpolateExpr, Join, JoinConstraint, JoinOperator,
46+
LateralView, LockClause, LockType, NamedWindowDefinition, NonBlock, Offset, OffsetRows,
47+
OrderBy, OrderByExpr, Query, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem,
48+
SamplingMethod, Select, SelectInto, SelectItem, SelectionCount, SetExpr, SetOperator,
49+
SetQuantifier, Setting, Table, TableAlias, TableFactor, TableSampleSeed, TableVersion,
50+
TableWithJoins, Top, ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill,
5051
};
5152
pub use self::value::{
5253
escape_quoted_string, DateTimeField, DollarQuotedString, ObjectConstantKeyValue,

src/ast/query.rs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,15 @@ pub struct Query {
4545
pub fetch: Option<Fetch>,
4646
/// `FOR { UPDATE | SHARE } [ OF table_name ] [ SKIP LOCKED | NOWAIT ]`
4747
pub locks: Vec<LockClause>,
48+
/// ClickHouse syntax: `SELECT * FROM t SETTINGS key1 = value1, key2 = value2`
49+
///
50+
/// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select#settings-in-select-query)
51+
pub settings: Option<Vec<Setting>>,
52+
/// `SELECT * FROM t FORMAT JSONCompact`
53+
///
54+
/// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/format)
55+
/// (ClickHouse-specific)
56+
pub format_clause: Option<FormatClause>,
4857
}
4958

5059
impl fmt::Display for Query {
@@ -80,6 +89,9 @@ impl fmt::Display for Query {
8089
if !self.locks.is_empty() {
8190
write!(f, " {}", display_separated(&self.locks, " "))?;
8291
}
92+
if let Some(ref format) = self.format_clause {
93+
write!(f, " {}", format)?;
94+
}
8395
Ok(())
8496
}
8597
}
@@ -705,6 +717,20 @@ impl fmt::Display for TableWithJoins {
705717
}
706718
}
707719

720+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
721+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
722+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
723+
pub struct Setting {
724+
pub key: WithSpan<Ident>,
725+
pub value: Value,
726+
}
727+
728+
impl fmt::Display for Setting {
729+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
730+
write!(f, "{} = {}", self.key, self.value)
731+
}
732+
}
733+
708734
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
709735
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
710736
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
@@ -1577,6 +1603,26 @@ impl fmt::Display for GroupByExpr {
15771603
}
15781604
}
15791605

1606+
/// FORMAT identifier or FORMAT NULL clause, specific to ClickHouse.
1607+
///
1608+
/// [ClickHouse]: <https://clickhouse.com/docs/en/sql-reference/statements/select/format>
1609+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1610+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1611+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1612+
pub enum FormatClause {
1613+
Identifier(WithSpan<Ident>),
1614+
Null,
1615+
}
1616+
1617+
impl fmt::Display for FormatClause {
1618+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1619+
match self {
1620+
FormatClause::Identifier(ident) => write!(f, "FORMAT {}", ident),
1621+
FormatClause::Null => write!(f, "FORMAT NULL"),
1622+
}
1623+
}
1624+
}
1625+
15801626
/// BigQuery supports ValueTables which have 2 modes:
15811627
/// `SELECT AS STRUCT`
15821628
/// `SELECT AS VALUE`

src/keywords.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,7 @@ define_keywords!(
417417
MASK,
418418
MATCH,
419419
MATCHED,
420+
MATCH_RECOGNIZE,
420421
MATERIALIZED,
421422
MAX,
422423
MAXVALUE,
@@ -528,6 +529,7 @@ define_keywords!(
528529
PRECISION,
529530
PREPARE,
530531
PRESERVE,
532+
PREWHERE,
531533
PRIMARY,
532534
PRIOR,
533535
PRIVILEGES,
@@ -804,6 +806,17 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
804806
// for Snowflake TABLESAMPLE
805807
Keyword::TABLESAMPLE,
806808
Keyword::SAMPLE,
809+
// for Clickhouse PREWHERE
810+
Keyword::PREWHERE,
811+
// for ClickHouse SELECT * FROM t SETTINGS ...
812+
Keyword::SETTINGS,
813+
// for ClickHouse SELECT * FROM t FORMAT...
814+
Keyword::FORMAT,
815+
// for Snowflake START WITH .. CONNECT BY
816+
Keyword::START,
817+
Keyword::CONNECT,
818+
// Reserved for snowflake MATCH_RECOGNIZE
819+
Keyword::MATCH_RECOGNIZE,
807820
];
808821

809822
/// Can't be used as a column alias, so that `SELECT <expr> alias`

src/parser/mod.rs

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5902,10 +5902,14 @@ impl<'a> Parser<'a> {
59025902

59035903
/// Parse an literal integer/long
59045904
pub fn parse_literal_int(&mut self) -> Result<i64, ParserError> {
5905-
let neg = if self.consume_token(&Token::Minus) { -1 } else { 1 };
5905+
let neg = if self.consume_token(&Token::Minus) {
5906+
-1
5907+
} else {
5908+
1
5909+
};
59065910
let next_token = self.next_token();
59075911
match next_token.token {
5908-
Token::Number(s, _) => s.parse::<i64>().map(|n| n*neg).map_err(|e| {
5912+
Token::Number(s, _) => s.parse::<i64>().map(|n| n * neg).map_err(|e| {
59095913
ParserError::ParserError(format!("Could not parse '{s}' as i64: {e}"))
59105914
}),
59115915
_ => self.expected("literal int", next_token),
@@ -6307,7 +6311,7 @@ impl<'a> Parser<'a> {
63076311

63086312
pub fn parse_enum_values(&mut self) -> Result<Vec<EnumTypeValue>, ParserError> {
63096313
self.expect_token(&Token::LParen)?;
6310-
let mut values:Vec<EnumTypeValue> = Vec::new();
6314+
let mut values: Vec<EnumTypeValue> = Vec::new();
63116315
loop {
63126316
let next_token = self.next_token();
63136317
let name = match next_token.token {
@@ -6317,7 +6321,10 @@ impl<'a> Parser<'a> {
63176321
let next_token = self.next_token();
63186322
match next_token.token {
63196323
Token::Eq => {
6320-
values.push(EnumTypeValue::NameWithValue(name, self.parse_literal_int()?));
6324+
values.push(EnumTypeValue::NameWithValue(
6325+
name,
6326+
self.parse_literal_int()?,
6327+
));
63216328
if self.consume_token(&Token::RParen) {
63226329
break;
63236330
}
@@ -6328,8 +6335,8 @@ impl<'a> Parser<'a> {
63286335
Token::Comma => values.push(EnumTypeValue::Name(name)),
63296336
Token::RParen => {
63306337
values.push(EnumTypeValue::Name(name));
6331-
break
6332-
},
6338+
break;
6339+
}
63336340
_ => self.expected(", or }", next_token)?,
63346341
}
63356342
}
@@ -6977,6 +6984,8 @@ impl<'a> Parser<'a> {
69776984
offset: None,
69786985
fetch: None,
69796986
locks: vec![],
6987+
settings: None,
6988+
format_clause: None,
69806989
})
69816990
} else if self.parse_keyword(Keyword::UPDATE) {
69826991
Ok(Query {
@@ -6988,6 +6997,8 @@ impl<'a> Parser<'a> {
69886997
offset: None,
69896998
fetch: None,
69906999
locks: vec![],
7000+
settings: None,
7001+
format_clause: None,
69917002
})
69927003
} else {
69937004
let body = self.parse_boxed_query_body(0)?;
@@ -7042,6 +7053,8 @@ impl<'a> Parser<'a> {
70427053
vec![]
70437054
};
70447055

7056+
let settings = self.parse_settings()?;
7057+
70457058
let fetch = if self.parse_keyword(Keyword::FETCH) {
70467059
Some(self.parse_fetch()?)
70477060
} else {
@@ -7052,6 +7065,18 @@ impl<'a> Parser<'a> {
70527065
while self.parse_keyword(Keyword::FOR) {
70537066
locks.push(self.parse_lock()?);
70547067
}
7068+
let format_clause = if dialect_of!(self is ClickHouseDialect | GenericDialect)
7069+
&& self.parse_keyword(Keyword::FORMAT)
7070+
{
7071+
if self.parse_keyword(Keyword::NULL) {
7072+
Some(FormatClause::Null)
7073+
} else {
7074+
let ident = self.parse_identifier(false)?;
7075+
Some(FormatClause::Identifier(ident))
7076+
}
7077+
} else {
7078+
None
7079+
};
70557080

70567081
Ok(Query {
70577082
with,
@@ -7062,10 +7087,29 @@ impl<'a> Parser<'a> {
70627087
offset,
70637088
fetch,
70647089
locks,
7090+
settings,
7091+
format_clause,
70657092
})
70667093
}
70677094
}
70687095

7096+
fn parse_settings(&mut self) -> Result<Option<Vec<Setting>>, ParserError> {
7097+
let settings = if dialect_of!(self is ClickHouseDialect|GenericDialect)
7098+
&& self.parse_keyword(Keyword::SETTINGS)
7099+
{
7100+
let key_values = self.parse_comma_separated(|p| {
7101+
let key = p.parse_identifier(false)?;
7102+
p.expect_token(&Token::Eq)?;
7103+
let value = p.parse_value()?;
7104+
Ok(Setting { key, value })
7105+
})?;
7106+
Some(key_values)
7107+
} else {
7108+
None
7109+
};
7110+
Ok(settings)
7111+
}
7112+
70697113
/// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`)
70707114
pub fn parse_cte(&mut self) -> Result<Cte, ParserError> {
70717115
let name = self.parse_identifier(false)?;
@@ -7973,6 +8017,8 @@ impl<'a> Parser<'a> {
79738017
offset: None,
79748018
fetch: None,
79758019
locks: vec![],
8020+
settings: None,
8021+
format_clause: None,
79768022
}),
79778023
alias,
79788024
})

tests/sqlparser_clickhouse.rs

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -701,7 +701,11 @@ fn parse_create_table_with_variant_default_expressions() {
701701
},
702702
ColumnDef {
703703
name: Ident::new("x").empty_span(),
704-
data_type: DataType::Enum8(vec![EnumTypeValue::NameWithValue("hello".to_string(), 1), EnumTypeValue::Name("world".to_string()), EnumTypeValue::NameWithValue("foo".to_string(), -3)]),
704+
data_type: DataType::Enum8(vec![
705+
EnumTypeValue::NameWithValue("hello".to_string(), 1),
706+
EnumTypeValue::Name("world".to_string()),
707+
EnumTypeValue::NameWithValue("foo".to_string(), -3)
708+
]),
705709
collation: None,
706710
codec: None,
707711
options: vec![],
@@ -1281,6 +1285,38 @@ fn parse_in_with_dangling_comma() {
12811285
);
12821286
}
12831287

1288+
#[test]
1289+
fn test_query_with_format_clause() {
1290+
let format_options = vec!["TabSeparated", "JSONCompact", "NULL"];
1291+
for format in &format_options {
1292+
let sql = format!("SELECT * FROM t FORMAT {}", format);
1293+
match clickhouse_and_generic().verified_stmt(&sql) {
1294+
Statement::Query(query) => {
1295+
if *format == "NULL" {
1296+
assert_eq!(query.format_clause, Some(FormatClause::Null));
1297+
} else {
1298+
assert_eq!(
1299+
query.format_clause,
1300+
Some(FormatClause::Identifier(Ident::new(*format).empty_span()))
1301+
);
1302+
}
1303+
}
1304+
_ => unreachable!(),
1305+
}
1306+
}
1307+
1308+
let invalid_cases = [
1309+
"SELECT * FROM t FORMAT",
1310+
"SELECT * FROM t FORMAT TabSeparated JSONCompact",
1311+
"SELECT * FROM t FORMAT TabSeparated TabSeparated",
1312+
];
1313+
for sql in &invalid_cases {
1314+
clickhouse_and_generic()
1315+
.parse_sql_statements(sql)
1316+
.expect_err("Expected: FORMAT {identifier}, found: ");
1317+
}
1318+
}
1319+
12841320
fn clickhouse() -> TestedDialects {
12851321
TestedDialects {
12861322
dialects: vec![Box::new(ClickHouseDialect {})],

0 commit comments

Comments
 (0)