diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml new file mode 100644 index 000000000..231252682 --- /dev/null +++ b/.github/workflows/stale.yml @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: "Close stale PRs" +on: + schedule: + - cron: "30 1 * * *" + +jobs: + close-stale-prs: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@v9 + with: + stale-pr-message: "Thank you for your contribution. Unfortunately, this pull request is stale because it has been open 60 days with no activity. Please remove the stale label or comment or this will be closed in 7 days." + days-before-pr-stale: 60 + days-before-pr-close: 7 + # do not close stale issues + days-before-issue-stale: -1 + days-before-issue-close: -1 + repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/CHANGELOG.md b/CHANGELOG.md index ab654525f..cf2d1321b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,91 @@ changes that break via addition as "Added". ## [Unreleased] Check https://github.com/sqlparser-rs/sqlparser-rs/commits/main for undocumented changes. +## [0.49.0] 2024-07-23 +As always, huge props to @iffyio @jmhain and @lovasoa for their help reviewing and merging PRs! + +We are in the process of moving sqlparser to governed as part of the Apache +DataFusion project: https://github.com/sqlparser-rs/sqlparser-rs/issues/1294 + +### Fixed +* Fix quoted identifier regression edge-case with "from" in SELECT (#1346) - Thanks @alexander-beedie +* Fix `AS` query clause should be after the create table options (#1339) - Thanks @git-hulk + +### Added + +* Support `MATERIALIZED`/`ALIAS`/`EPHERMERAL` default column options for ClickHouse (#1348) - Thanks @git-hulk +* Support `()` as the `GROUP BY` nothing (#1347) - Thanks @git-hulk +* Support Map literal syntax for DuckDB and Generic (#1344) - Thanks @goldmedal +* Support subquery expression in `SET` expressions (#1343) - Thanks @iffyio +* Support `WITH FILL` for ClickHouse (#1330) - Thanks @nickpresta +* Support `PARTITION BY` for PostgreSQL in `CREATE TABLE` statement (#1338) - Thanks @git-hulk +* Support of table function `WITH ORDINALITY` modifier for Postgres (#1337) - Thanks @git-hulk + + +## [0.48.0] 2024-07-09 + +Huge shout out to @iffyio @jmhain and @lovasoa for their help reviewing and merging PRs! + +### Fixed +* Fix CI error message in CI (#1333) - Thanks @alamb +* Fix typo in sqlparser-derive README (#1310) - Thanks @leoyvens +* Re-enable trailing commas in DCL (#1318) - Thanks @MohamedAbdeen21 +* Fix a few typos in comment lines (#1316) - Thanks @git-hulk +* Fix Snowflake `SELECT * wildcard REPLACE ... RENAME` order (#1321) - Thanks @alexander-beedie +* Allow semi-colon at the end of UNCACHE statement (#1320) - Thanks @LorrensP-2158466 +* Return errors, not panic, when integers fail to parse in `AUTO_INCREMENT` and `TOP` (#1305) - Thanks @eejbyfeldt + +### Added +* Support `OWNER TO` clause in Postgres (#1314) - Thanks @gainings +* Support `FORMAT` clause for ClickHouse (#1335) - Thanks @git-hulk +* Support `DROP PROCEDURE` statement (#1324) - Thanks @LorrensP-2158466 +* Support `PREWHERE` condition for ClickHouse dialect (#1328) - Thanks @git-hulk +* Support `SETTINGS` pairs for ClickHouse dialect (#1327) - Thanks @git-hulk +* Support `GROUP BY WITH MODIFIER` for ClickHouse dialect (#1323) - Thanks @git-hulk +* Support DuckDB Union datatype (#1322) - Thanks @gstvg +* Support parametric arguments to `FUNCTION` for ClickHouse dialect (#1315) - Thanks @git-hulk +* Support `TO` in `CREATE VIEW` clause for Clickhouse (#1313) - Thanks @Bidaya0 +* Support `UPDATE` statements that contain tuple assignments (#1317) - Thanks @lovasoa +* Support `BY NAME quantifier across all set ops (#1309) - Thanks @alexander-beedie +* Support SnowFlake exclusive `CREATE TABLE` options (#1233) - Thanks @balliegojr +* Support ClickHouse `CREATE TABLE` with primary key and parametrised table engine (#1289) - Thanks @7phs +* Support custom operators in Postgres (#1302) - Thanks @lovasoa +* Support ClickHouse data types (#1285) - Thanks @7phs + +### Changed +* Add stale PR github workflow (#1331) - Thanks @alamb +* Refine docs (#1326) - Thanks @emilsivervik +* Improve error messages with additional colons (#1319) - Thanks @LorrensP-2158466 +* Move Display fmt to struct for `CreateIndex` (#1307) - Thanks @philipcristiano +* Enhancing Trailing Comma Option (#1212) - Thanks @MohamedAbdeen21 +* Encapsulate `CreateTable`, `CreateIndex` into specific structs (#1291) - Thanks @philipcristiano + +## [0.47.0] 2024-06-01 + +### Fixed +* Re-support Postgres array slice syntax (#1290) - Thanks @jmhain +* Fix DoubleColon cast skipping AT TIME ZONE #1266 (#1267) - Thanks @dmitrybugakov +* Fix for values as table name in Databricks and generic (#1278) - Thanks @jmhain + +### Added +* Support `ASOF` joins in Snowflake (#1288) - Thanks @jmhain +* Support `CREATE VIEW` with fields and data types ClickHouse (#1292) - Thanks @7phs +* Support view comments for Snowflake (#1287) - Thanks @bombsimon +* Support dynamic pivot in Snowflake (#1280) - Thanks @jmhain +* Support `CREATE FUNCTION` for BigQuery, generalize AST (#1253) - Thanks @iffyio +* Support expression in `AT TIME ZONE` and fix precedence (#1272) - Thanks @jmhain +* Support `IGNORE/RESPECT NULLS` inside function argument list for Databricks (#1263) - Thanks @jmhain +* Support `SELECT * EXCEPT` Databricks (#1261) - Thanks @jmhain +* Support triple quoted strings (#1262) - Thanks @iffyio +* Support array indexing for duckdb (#1265) - Thanks @JichaoS +* Support multiple SET variables (#1252) - Thanks @iffyio +* Support `ANY_VALUE` `HAVING` clause (#1258) in BigQuery - Thanks @jmhain +* Support keywords as field names in BigQuery struct syntax (#1254) - Thanks @iffyio +* Support `GROUP_CONCAT()` in MySQL (#1256) - Thanks @jmhain +* Support lambda functions in Databricks (#1257) - Thanks @jmhain +* Add const generic peek_tokens method to parser (#1255) - Thanks @jmhain + + ## [0.46.0] 2024-05-03 ### Changed diff --git a/Cargo.toml b/Cargo.toml index c9bf58bbb..4c510a8c6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.46.0" +version = "0.49.0" authors = ["Andy Grove "] homepage = "https://github.com/sqlparser-rs/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" diff --git a/README.md b/README.md index 512f5f6c0..3226b9549 100644 --- a/README.md +++ b/README.md @@ -114,13 +114,12 @@ $ cargo run --features json_example --example cli FILENAME.sql [--dialectname] ## Users -This parser is currently being used by the [DataFusion] query engine, -[LocustDB], [Ballista], [GlueSQL], [Opteryx], [PRQL], [Qrlew], [JumpWire], and [ParadeDB]. +This parser is currently being used by the [DataFusion] query engine, [LocustDB], +[Ballista], [GlueSQL], [Opteryx], [Polars], [PRQL], [Qrlew], [JumpWire], and [ParadeDB]. If your project is using sqlparser-rs feel free to make a PR to add it to this list. - ## Design The core expression parser uses the [Pratt Parser] design, which is a top-down @@ -210,6 +209,7 @@ licensed as above, without any additional terms or conditions. [Ballista]: https://github.com/apache/arrow-ballista [GlueSQL]: https://github.com/gluesql/gluesql [Opteryx]: https://github.com/mabel-dev/opteryx +[Polars]: https://pola.rs/ [PRQL]: https://github.com/PRQL/prql [Qrlew]: https://github.com/Qrlew/qrlew [JumpWire]: https://github.com/extragoodlabs/jumpwire diff --git a/derive/README.md b/derive/README.md index ad4978a89..ffb5d266e 100644 --- a/derive/README.md +++ b/derive/README.md @@ -97,7 +97,7 @@ impl Visit for TableFactor { match self { Self::Table { name, alias } => { visitor.pre_visit_relation(name)?; - alias.visit(name)?; + name.visit(visitor)?; visitor.post_visit_relation(name)?; alias.visit(visitor)?; } diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index d71900bff..e6477f56b 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -20,9 +20,9 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; -use crate::ast::{display_comma_separated, ObjectName, StructField}; +use crate::ast::{display_comma_separated, ObjectName, StructField, UnionField}; -use super::value::escape_single_quote_string; +use super::{value::escape_single_quote_string, ColumnDef}; /// SQL data types #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -129,37 +129,95 @@ pub enum DataType { /// /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html Int4(Option), - /// Integer type in [bigquery] + /// Int8 as alias for Bigint in [postgresql] and integer type in [clickhouse] + /// Note: Int8 mean 8 bytes in [postgresql] (not 8 bits) + /// Int8 with optional display width e.g. INT8 or INT8(11) + /// Note: Int8 mean 8 bits in [clickhouse] + /// + /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + Int8(Option), + /// Integer type in [clickhouse] + /// Note: Int16 mean 16 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + Int16, + /// Integer type in [clickhouse] + /// Note: Int16 mean 32 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + Int32, + /// Integer type in [bigquery], [clickhouse] /// /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint Int64, + /// Integer type in [clickhouse] + /// Note: Int128 mean 128 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + Int128, + /// Integer type in [clickhouse] + /// Note: Int256 mean 256 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + Int256, /// Integer with optional display width e.g. INTEGER or INTEGER(11) Integer(Option), /// Unsigned int with optional display width e.g. INT UNSIGNED or INT(11) UNSIGNED UnsignedInt(Option), /// Unsigned int4 with optional display width e.g. INT4 UNSIGNED or INT4(11) UNSIGNED UnsignedInt4(Option), - /// Unsigned integer with optional display width e.g. INTGER UNSIGNED or INTEGER(11) UNSIGNED + /// Unsigned integer with optional display width e.g. INTEGER UNSIGNED or INTEGER(11) UNSIGNED UnsignedInteger(Option), + /// Unsigned integer type in [clickhouse] + /// Note: UInt8 mean 8 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + UInt8, + /// Unsigned integer type in [clickhouse] + /// Note: UInt16 mean 16 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + UInt16, + /// Unsigned integer type in [clickhouse] + /// Note: UInt32 mean 32 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + UInt32, + /// Unsigned integer type in [clickhouse] + /// Note: UInt64 mean 64 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + UInt64, + /// Unsigned integer type in [clickhouse] + /// Note: UInt128 mean 128 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + UInt128, + /// Unsigned integer type in [clickhouse] + /// Note: UInt256 mean 256 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + UInt256, /// Big integer with optional display width e.g. BIGINT or BIGINT(20) BigInt(Option), /// Unsigned big integer with optional display width e.g. BIGINT UNSIGNED or BIGINT(20) UNSIGNED UnsignedBigInt(Option), - /// Int8 as alias for Bigint in [postgresql] - /// Note: Int8 mean 8 bytes in postgres (not 8 bits) - /// Int8 with optional display width e.g. INT8 or INT8(11) - /// - /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html - Int8(Option), /// Unsigned Int8 with optional display width e.g. INT8 UNSIGNED or INT8(11) UNSIGNED UnsignedInt8(Option), /// Float4 as alias for Real in [postgresql] /// /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html Float4, + /// Floating point in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/float + Float32, /// Floating point in [bigquery] /// /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/float Float64, /// Floating point e.g. REAL Real, @@ -182,6 +240,10 @@ pub enum DataType { Boolean, /// Date Date, + /// Date32 with the same range as Datetime64 + /// + /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/date32 + Date32, /// Time with optional time precision and time zone information e.g. [standard][1]. /// /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type @@ -190,6 +252,10 @@ pub enum DataType { /// /// [1]: https://dev.mysql.com/doc/refman/8.0/en/datetime.html Datetime(Option), + /// Datetime with time precision and optional timezone e.g. [ClickHouse][1]. + /// + /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/datetime64 + Datetime64(u64, Option), /// Timestamp with optional time precision and time zone information e.g. [standard][1]. /// /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type @@ -206,12 +272,28 @@ pub enum DataType { Text, /// String with optional length. String(Option), + /// A fixed-length string e.g [ClickHouse][1]. + /// + /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/fixedstring + FixedString(u64), /// Bytea Bytea, /// Custom type such as enums Custom(ObjectName, Vec), /// Arrays Array(ArrayElemTypeDef), + /// Map + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/map + Map(Box, Box), + /// Tuple + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple + Tuple(Vec), + /// Nested + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nested-data-structures/nested + Nested(Vec), /// Enums Enum(Vec), /// Set @@ -221,6 +303,18 @@ pub enum DataType { /// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type Struct(Vec), + /// Union + /// + /// [duckdb]: https://duckdb.org/docs/sql/data_types/union.html + Union(Vec), + /// Nullable - special marker NULL represents in ClickHouse as a data type. + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nullable + Nullable(Box), + /// LowCardinality - changes the internal representation of other data types to be dictionary-encoded. + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/lowcardinality + LowCardinality(Box), /// No type specified - only used with /// [`SQLiteDialect`](crate::dialect::SQLiteDialect), from statements such /// as `CREATE TABLE t1 (a)`. @@ -296,9 +390,24 @@ impl fmt::Display for DataType { DataType::Int4(zerofill) => { format_type_with_optional_length(f, "INT4", zerofill, false) } + DataType::Int8(zerofill) => { + format_type_with_optional_length(f, "INT8", zerofill, false) + } + DataType::Int16 => { + write!(f, "Int16") + } + DataType::Int32 => { + write!(f, "Int32") + } DataType::Int64 => { write!(f, "INT64") } + DataType::Int128 => { + write!(f, "Int128") + } + DataType::Int256 => { + write!(f, "Int256") + } DataType::UnsignedInt4(zerofill) => { format_type_with_optional_length(f, "INT4", zerofill, true) } @@ -314,14 +423,30 @@ impl fmt::Display for DataType { DataType::UnsignedBigInt(zerofill) => { format_type_with_optional_length(f, "BIGINT", zerofill, true) } - DataType::Int8(zerofill) => { - format_type_with_optional_length(f, "INT8", zerofill, false) - } DataType::UnsignedInt8(zerofill) => { format_type_with_optional_length(f, "INT8", zerofill, true) } + DataType::UInt8 => { + write!(f, "UInt8") + } + DataType::UInt16 => { + write!(f, "UInt16") + } + DataType::UInt32 => { + write!(f, "UInt32") + } + DataType::UInt64 => { + write!(f, "UInt64") + } + DataType::UInt128 => { + write!(f, "UInt128") + } + DataType::UInt256 => { + write!(f, "UInt256") + } DataType::Real => write!(f, "REAL"), DataType::Float4 => write!(f, "FLOAT4"), + DataType::Float32 => write!(f, "Float32"), DataType::Float64 => write!(f, "FLOAT64"), DataType::Double => write!(f, "DOUBLE"), DataType::Float8 => write!(f, "FLOAT8"), @@ -329,6 +454,7 @@ impl fmt::Display for DataType { DataType::Bool => write!(f, "BOOL"), DataType::Boolean => write!(f, "BOOLEAN"), DataType::Date => write!(f, "DATE"), + DataType::Date32 => write!(f, "Date32"), DataType::Time(precision, timezone_info) => { format_datetime_precision_and_tz(f, "TIME", precision, timezone_info) } @@ -338,6 +464,14 @@ impl fmt::Display for DataType { DataType::Timestamp(precision, timezone_info) => { format_datetime_precision_and_tz(f, "TIMESTAMP", precision, timezone_info) } + DataType::Datetime64(precision, timezone) => { + format_clickhouse_datetime_precision_and_timezone( + f, + "DateTime64", + precision, + timezone, + ) + } DataType::Interval => write!(f, "INTERVAL"), DataType::JSON => write!(f, "JSON"), DataType::JSONB => write!(f, "JSONB"), @@ -350,6 +484,7 @@ impl fmt::Display for DataType { ArrayElemTypeDef::SquareBracket(t, None) => write!(f, "{t}[]"), ArrayElemTypeDef::SquareBracket(t, Some(size)) => write!(f, "{t}[{size}]"), ArrayElemTypeDef::AngleBracket(t) => write!(f, "ARRAY<{t}>"), + ArrayElemTypeDef::Parenthesis(t) => write!(f, "Array({t})"), }, DataType::Custom(ty, modifiers) => { if modifiers.is_empty() { @@ -385,6 +520,28 @@ impl fmt::Display for DataType { write!(f, "STRUCT") } } + DataType::Union(fields) => { + write!(f, "UNION({})", display_comma_separated(fields)) + } + // ClickHouse + DataType::Nullable(data_type) => { + write!(f, "Nullable({})", data_type) + } + DataType::FixedString(character_length) => { + write!(f, "FixedString({})", character_length) + } + DataType::LowCardinality(data_type) => { + write!(f, "LowCardinality({})", data_type) + } + DataType::Map(key_data_type, value_data_type) => { + write!(f, "Map({}, {})", key_data_type, value_data_type) + } + DataType::Tuple(fields) => { + write!(f, "Tuple({})", display_comma_separated(fields)) + } + DataType::Nested(fields) => { + write!(f, "Nested({})", display_comma_separated(fields)) + } DataType::Unspecified => Ok(()), } } @@ -439,6 +596,23 @@ fn format_datetime_precision_and_tz( Ok(()) } +fn format_clickhouse_datetime_precision_and_timezone( + f: &mut fmt::Formatter, + sql_type: &'static str, + len: &u64, + time_zone: &Option, +) -> fmt::Result { + write!(f, "{sql_type}({len}")?; + + if let Some(time_zone) = time_zone { + write!(f, ", '{time_zone}'")?; + } + + write!(f, ")")?; + + Ok(()) +} + /// Timestamp and Time data types information about TimeZone formatting. /// /// This is more related to a display information than real differences between each variant. To @@ -532,7 +706,7 @@ pub enum CharacterLength { /// Optional unit. If not informed, the ANSI handles it as CHARACTERS implicitly unit: Option, }, - /// VARCHAR(MAX) or NVARCHAR(MAX), used in T-SQL (Miscrosoft SQL Server) + /// VARCHAR(MAX) or NVARCHAR(MAX), used in T-SQL (Microsoft SQL Server) Max, } @@ -593,4 +767,6 @@ pub enum ArrayElemTypeDef { AngleBracket(Box), /// `INT[]` or `INT[2]` SquareBracket(Box, Option), + /// `Array(Int64)` + Parenthesis(Box), } diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index de514550b..5cc671cf5 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -157,6 +157,32 @@ pub enum AlterTableOperation { SwapWith { table_name: ObjectName }, /// 'SET TBLPROPERTIES ( { property_key [ = ] property_val } [, ...] )' SetTblProperties { table_properties: Vec }, + + /// `OWNER TO { | CURRENT_ROLE | CURRENT_USER | SESSION_USER }` + /// + /// Note: this is PostgreSQL-specific + OwnerTo { new_owner: Owner }, +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum Owner { + Ident(Ident), + CurrentRole, + CurrentUser, + SessionUser, +} + +impl fmt::Display for Owner { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Owner::Ident(ident) => write!(f, "{}", ident), + Owner::CurrentRole => write!(f, "CURRENT_ROLE"), + Owner::CurrentUser => write!(f, "CURRENT_USER"), + Owner::SessionUser => write!(f, "SESSION_USER"), + } + } } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -322,6 +348,9 @@ impl fmt::Display for AlterTableOperation { AlterTableOperation::SwapWith { table_name } => { write!(f, "SWAP WITH {table_name}") } + AlterTableOperation::OwnerTo { new_owner } => { + write!(f, "OWNER TO {new_owner}") + } AlterTableOperation::SetTblProperties { table_properties } => { write!( f, @@ -815,7 +844,7 @@ impl fmt::Display for ColumnDef { /// /// Syntax /// ```markdown -/// [OPTIONS(option, ...)] +/// [data_type][OPTIONS(option, ...)] /// /// option: = /// ``` @@ -824,18 +853,23 @@ impl fmt::Display for ColumnDef { /// ```sql /// name /// age OPTIONS(description = "age column", tag = "prod") +/// created_at DateTime64 /// ``` #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ViewColumnDef { pub name: Ident, + pub data_type: Option, pub options: Option>, } impl fmt::Display for ViewColumnDef { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", self.name)?; + if let Some(data_type) = self.data_type.as_ref() { + write!(f, " {}", data_type)?; + } if let Some(options) = self.options.as_ref() { write!( f, @@ -889,6 +923,18 @@ pub enum ColumnOption { NotNull, /// `DEFAULT ` Default(Expr), + + /// ClickHouse supports `MATERIALIZE`, `EPHEMERAL` and `ALIAS` expr to generate default values. + /// Syntax: `b INT MATERIALIZE (a + 1)` + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/create/table#default_values) + + /// `MATERIALIZE ` + Materialized(Expr), + /// `EPHEMERAL []` + Ephemeral(Option), + /// `ALIAS ` + Alias(Expr), + /// `{ PRIMARY KEY | UNIQUE } []` Unique { is_primary: bool, @@ -944,6 +990,15 @@ impl fmt::Display for ColumnOption { Null => write!(f, "NULL"), NotNull => write!(f, "NOT NULL"), Default(expr) => write!(f, "DEFAULT {expr}"), + Materialized(expr) => write!(f, "MATERIALIZED {expr}"), + Ephemeral(expr) => { + if let Some(e) = expr { + write!(f, "EPHEMERAL {e}") + } else { + write!(f, "EPHEMERAL") + } + } + Alias(expr) => write!(f, "ALIAS {expr}"), Unique { is_primary, characteristics, diff --git a/src/ast/dml.rs b/src/ast/dml.rs index badc58a7d..0ebbaa3e9 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -11,18 +11,439 @@ // limitations under the License. #[cfg(not(feature = "std"))] -use alloc::{boxed::Box, vec::Vec}; +use alloc::{boxed::Box, string::String, vec::Vec}; +use core::fmt::{self, Display}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; +pub use super::ddl::{ColumnDef, TableConstraint}; + use super::{ - Expr, FromTable, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnInsert, OrderByExpr, - Query, SelectItem, SqliteOnConflict, TableWithJoins, + display_comma_separated, display_separated, CommentDef, Expr, FileFormat, FromTable, + HiveDistributionStyle, HiveFormat, HiveIOFormat, HiveRowFormat, Ident, InsertAliases, + MysqlInsertPriority, ObjectName, OnCommit, OnInsert, OneOrManyWithParens, OrderByExpr, Query, + RowAccessPolicy, SelectItem, SqlOption, SqliteOnConflict, TableEngine, TableWithJoins, Tag, + WrappedCollection, }; +/// CREATE INDEX statement. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct CreateIndex { + /// index name + pub name: Option, + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + pub table_name: ObjectName, + pub using: Option, + pub columns: Vec, + pub unique: bool, + pub concurrently: bool, + pub if_not_exists: bool, + pub include: Vec, + pub nulls_distinct: Option, + pub predicate: Option, +} + +impl Display for CreateIndex { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "CREATE {unique}INDEX {concurrently}{if_not_exists}", + unique = if self.unique { "UNIQUE " } else { "" }, + concurrently = if self.concurrently { + "CONCURRENTLY " + } else { + "" + }, + if_not_exists = if self.if_not_exists { + "IF NOT EXISTS " + } else { + "" + }, + )?; + if let Some(value) = &self.name { + write!(f, "{value} ")?; + } + write!(f, "ON {}", self.table_name)?; + if let Some(value) = &self.using { + write!(f, " USING {value} ")?; + } + write!(f, "({})", display_separated(&self.columns, ","))?; + if !self.include.is_empty() { + write!(f, " INCLUDE ({})", display_separated(&self.include, ","))?; + } + if let Some(value) = self.nulls_distinct { + if value { + write!(f, " NULLS DISTINCT")?; + } else { + write!(f, " NULLS NOT DISTINCT")?; + } + } + if let Some(predicate) = &self.predicate { + write!(f, " WHERE {predicate}")?; + } + Ok(()) + } +} + +/// CREATE TABLE statement. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct CreateTable { + pub or_replace: bool, + pub temporary: bool, + pub external: bool, + pub global: Option, + pub if_not_exists: bool, + pub transient: bool, + pub volatile: bool, + /// Table name + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + pub name: ObjectName, + /// Optional schema + pub columns: Vec, + pub constraints: Vec, + pub hive_distribution: HiveDistributionStyle, + pub hive_formats: Option, + pub table_properties: Vec, + pub with_options: Vec, + pub file_format: Option, + pub location: Option, + pub query: Option>, + pub without_rowid: bool, + pub like: Option, + pub clone: Option, + pub engine: Option, + pub comment: Option, + pub auto_increment_offset: Option, + pub default_charset: Option, + pub collation: Option, + pub on_commit: Option, + /// ClickHouse "ON CLUSTER" clause: + /// + pub on_cluster: Option, + /// ClickHouse "PRIMARY KEY " clause. + /// + pub primary_key: Option>, + /// ClickHouse "ORDER BY " clause. Note that omitted ORDER BY is different + /// than empty (represented as ()), the latter meaning "no sorting". + /// + pub order_by: Option>, + /// BigQuery: A partition expression for the table. + /// + pub partition_by: Option>, + /// BigQuery: Table clustering column list. + /// + pub cluster_by: Option>>, + /// BigQuery: Table options list. + /// + pub options: Option>, + /// SQLite "STRICT" clause. + /// if the "STRICT" table-option keyword is added to the end, after the closing ")", + /// then strict typing rules apply to that table. + pub strict: bool, + /// Snowflake "COPY GRANTS" clause + /// + pub copy_grants: bool, + /// Snowflake "ENABLE_SCHEMA_EVOLUTION" clause + /// + pub enable_schema_evolution: Option, + /// Snowflake "CHANGE_TRACKING" clause + /// + pub change_tracking: Option, + /// Snowflake "DATA_RETENTION_TIME_IN_DAYS" clause + /// + pub data_retention_time_in_days: Option, + /// Snowflake "MAX_DATA_EXTENSION_TIME_IN_DAYS" clause + /// + pub max_data_extension_time_in_days: Option, + /// Snowflake "DEFAULT_DDL_COLLATION" clause + /// + pub default_ddl_collation: Option, + /// Snowflake "WITH AGGREGATION POLICY" clause + /// + pub with_aggregation_policy: Option, + /// Snowflake "WITH ROW ACCESS POLICY" clause + /// + pub with_row_access_policy: Option, + /// Snowflake "WITH TAG" clause + /// + pub with_tags: Option>, +} + +impl Display for CreateTable { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + // We want to allow the following options + // Empty column list, allowed by PostgreSQL: + // `CREATE TABLE t ()` + // No columns provided for CREATE TABLE AS: + // `CREATE TABLE t AS SELECT a from t2` + // Columns provided for CREATE TABLE AS: + // `CREATE TABLE t (a INT) AS SELECT a from t2` + write!( + f, + "CREATE {or_replace}{external}{global}{temporary}{transient}{volatile}TABLE {if_not_exists}{name}", + or_replace = if self.or_replace { "OR REPLACE " } else { "" }, + external = if self.external { "EXTERNAL " } else { "" }, + global = self.global + .map(|global| { + if global { + "GLOBAL " + } else { + "LOCAL " + } + }) + .unwrap_or(""), + if_not_exists = if self.if_not_exists { "IF NOT EXISTS " } else { "" }, + temporary = if self.temporary { "TEMPORARY " } else { "" }, + transient = if self.transient { "TRANSIENT " } else { "" }, + volatile = if self.volatile { "VOLATILE " } else { "" }, + name = self.name, + )?; + if let Some(on_cluster) = &self.on_cluster { + write!( + f, + " ON CLUSTER {}", + on_cluster.replace('{', "'{").replace('}', "}'") + )?; + } + if !self.columns.is_empty() || !self.constraints.is_empty() { + write!(f, " ({}", display_comma_separated(&self.columns))?; + if !self.columns.is_empty() && !self.constraints.is_empty() { + write!(f, ", ")?; + } + write!(f, "{})", display_comma_separated(&self.constraints))?; + } else if self.query.is_none() && self.like.is_none() && self.clone.is_none() { + // PostgreSQL allows `CREATE TABLE t ();`, but requires empty parens + write!(f, " ()")?; + } + // Only for SQLite + if self.without_rowid { + write!(f, " WITHOUT ROWID")?; + } + + // Only for Hive + if let Some(l) = &self.like { + write!(f, " LIKE {l}")?; + } + + if let Some(c) = &self.clone { + write!(f, " CLONE {c}")?; + } + + match &self.hive_distribution { + HiveDistributionStyle::PARTITIONED { columns } => { + write!(f, " PARTITIONED BY ({})", display_comma_separated(columns))?; + } + HiveDistributionStyle::CLUSTERED { + columns, + sorted_by, + num_buckets, + } => { + write!(f, " CLUSTERED BY ({})", display_comma_separated(columns))?; + if !sorted_by.is_empty() { + write!(f, " SORTED BY ({})", display_comma_separated(sorted_by))?; + } + if *num_buckets > 0 { + write!(f, " INTO {num_buckets} BUCKETS")?; + } + } + HiveDistributionStyle::SKEWED { + columns, + on, + stored_as_directories, + } => { + write!( + f, + " SKEWED BY ({})) ON ({})", + display_comma_separated(columns), + display_comma_separated(on) + )?; + if *stored_as_directories { + write!(f, " STORED AS DIRECTORIES")?; + } + } + _ => (), + } + + if let Some(HiveFormat { + row_format, + serde_properties, + storage, + location, + }) = &self.hive_formats + { + match row_format { + Some(HiveRowFormat::SERDE { class }) => write!(f, " ROW FORMAT SERDE '{class}'")?, + Some(HiveRowFormat::DELIMITED { delimiters }) => { + write!(f, " ROW FORMAT DELIMITED")?; + if !delimiters.is_empty() { + write!(f, " {}", display_separated(delimiters, " "))?; + } + } + None => (), + } + match storage { + Some(HiveIOFormat::IOF { + input_format, + output_format, + }) => write!( + f, + " STORED AS INPUTFORMAT {input_format} OUTPUTFORMAT {output_format}" + )?, + Some(HiveIOFormat::FileFormat { format }) if !self.external => { + write!(f, " STORED AS {format}")? + } + _ => (), + } + if let Some(serde_properties) = serde_properties.as_ref() { + write!( + f, + " WITH SERDEPROPERTIES ({})", + display_comma_separated(serde_properties) + )?; + } + if !self.external { + if let Some(loc) = location { + write!(f, " LOCATION '{loc}'")?; + } + } + } + if self.external { + if let Some(file_format) = self.file_format { + write!(f, " STORED AS {file_format}")?; + } + write!(f, " LOCATION '{}'", self.location.as_ref().unwrap())?; + } + if !self.table_properties.is_empty() { + write!( + f, + " TBLPROPERTIES ({})", + display_comma_separated(&self.table_properties) + )?; + } + if !self.with_options.is_empty() { + write!(f, " WITH ({})", display_comma_separated(&self.with_options))?; + } + if let Some(engine) = &self.engine { + write!(f, " ENGINE={engine}")?; + } + if let Some(comment_def) = &self.comment { + match comment_def { + CommentDef::WithEq(comment) => { + write!(f, " COMMENT = '{comment}'")?; + } + CommentDef::WithoutEq(comment) => { + write!(f, " COMMENT '{comment}'")?; + } + } + } + + if let Some(auto_increment_offset) = self.auto_increment_offset { + write!(f, " AUTO_INCREMENT {auto_increment_offset}")?; + } + if let Some(primary_key) = &self.primary_key { + write!(f, " PRIMARY KEY {}", primary_key)?; + } + if let Some(order_by) = &self.order_by { + write!(f, " ORDER BY {}", order_by)?; + } + if let Some(partition_by) = self.partition_by.as_ref() { + write!(f, " PARTITION BY {partition_by}")?; + } + if let Some(cluster_by) = self.cluster_by.as_ref() { + write!(f, " CLUSTER BY {cluster_by}")?; + } + + if let Some(options) = self.options.as_ref() { + write!( + f, + " OPTIONS({})", + display_comma_separated(options.as_slice()) + )?; + } + + if self.copy_grants { + write!(f, " COPY GRANTS")?; + } + + if let Some(is_enabled) = self.enable_schema_evolution { + write!( + f, + " ENABLE_SCHEMA_EVOLUTION={}", + if is_enabled { "TRUE" } else { "FALSE" } + )?; + } + + if let Some(is_enabled) = self.change_tracking { + write!( + f, + " CHANGE_TRACKING={}", + if is_enabled { "TRUE" } else { "FALSE" } + )?; + } + + if let Some(data_retention_time_in_days) = self.data_retention_time_in_days { + write!( + f, + " DATA_RETENTION_TIME_IN_DAYS={data_retention_time_in_days}", + )?; + } + + if let Some(max_data_extension_time_in_days) = self.max_data_extension_time_in_days { + write!( + f, + " MAX_DATA_EXTENSION_TIME_IN_DAYS={max_data_extension_time_in_days}", + )?; + } + + if let Some(default_ddl_collation) = &self.default_ddl_collation { + write!(f, " DEFAULT_DDL_COLLATION='{default_ddl_collation}'",)?; + } + + if let Some(with_aggregation_policy) = &self.with_aggregation_policy { + write!(f, " WITH AGGREGATION POLICY {with_aggregation_policy}",)?; + } + + if let Some(row_access_policy) = &self.with_row_access_policy { + write!(f, " {row_access_policy}",)?; + } + + if let Some(tag) = &self.with_tags { + write!(f, " WITH TAG ({})", display_comma_separated(tag.as_slice()))?; + } + + if let Some(default_charset) = &self.default_charset { + write!(f, " DEFAULT CHARSET={default_charset}")?; + } + if let Some(collation) = &self.collation { + write!(f, " COLLATE={collation}")?; + } + + if self.on_commit.is_some() { + let on_commit = match self.on_commit { + Some(OnCommit::DeleteRows) => "ON COMMIT DELETE ROWS", + Some(OnCommit::PreserveRows) => "ON COMMIT PRESERVE ROWS", + Some(OnCommit::Drop) => "ON COMMIT DROP", + None => "", + }; + write!(f, " {on_commit}")?; + } + if self.strict { + write!(f, " STRICT")?; + } + if let Some(query) = &self.query { + write!(f, " AS {query}")?; + } + Ok(()) + } +} + /// INSERT statement. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index 126542379..92c75e6a4 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -7,9 +7,11 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; +use super::super::dml::CreateTable; use crate::ast::{ - ColumnDef, Expr, FileFormat, HiveDistributionStyle, HiveFormat, Ident, ObjectName, OnCommit, - Query, SqlOption, Statement, TableConstraint, + ColumnDef, CommentDef, Expr, FileFormat, HiveDistributionStyle, HiveFormat, Ident, ObjectName, + OnCommit, OneOrManyWithParens, Query, RowAccessPolicy, SqlOption, Statement, TableConstraint, + TableEngine, Tag, WrappedCollection, }; use crate::parser::ParserError; @@ -51,6 +53,7 @@ pub struct CreateTableBuilder { pub global: Option, pub if_not_exists: bool, pub transient: bool, + pub volatile: bool, pub name: ObjectName, pub columns: Vec, pub constraints: Vec, @@ -64,18 +67,28 @@ pub struct CreateTableBuilder { pub without_rowid: bool, pub like: Option, pub clone: Option, - pub engine: Option, - pub comment: Option, + pub engine: Option, + pub comment: Option, pub auto_increment_offset: Option, pub default_charset: Option, pub collation: Option, pub on_commit: Option, pub on_cluster: Option, - pub order_by: Option>, + pub primary_key: Option>, + pub order_by: Option>, pub partition_by: Option>, - pub cluster_by: Option>, + pub cluster_by: Option>>, pub options: Option>, pub strict: bool, + pub copy_grants: bool, + pub enable_schema_evolution: Option, + pub change_tracking: Option, + pub data_retention_time_in_days: Option, + pub max_data_extension_time_in_days: Option, + pub default_ddl_collation: Option, + pub with_aggregation_policy: Option, + pub with_row_access_policy: Option, + pub with_tags: Option>, } impl CreateTableBuilder { @@ -87,6 +100,7 @@ impl CreateTableBuilder { global: None, if_not_exists: false, transient: false, + volatile: false, name, columns: vec![], constraints: vec![], @@ -107,11 +121,21 @@ impl CreateTableBuilder { collation: None, on_commit: None, on_cluster: None, + primary_key: None, order_by: None, partition_by: None, cluster_by: None, options: None, strict: false, + copy_grants: false, + enable_schema_evolution: None, + change_tracking: None, + data_retention_time_in_days: None, + max_data_extension_time_in_days: None, + default_ddl_collation: None, + with_aggregation_policy: None, + with_row_access_policy: None, + with_tags: None, } } pub fn or_replace(mut self, or_replace: bool) -> Self { @@ -144,6 +168,11 @@ impl CreateTableBuilder { self } + pub fn volatile(mut self, volatile: bool) -> Self { + self.volatile = volatile; + self + } + pub fn columns(mut self, columns: Vec) -> Self { self.columns = columns; self @@ -202,12 +231,12 @@ impl CreateTableBuilder { self } - pub fn engine(mut self, engine: Option) -> Self { + pub fn engine(mut self, engine: Option) -> Self { self.engine = engine; self } - pub fn comment(mut self, comment: Option) -> Self { + pub fn comment(mut self, comment: Option) -> Self { self.comment = comment; self } @@ -237,7 +266,12 @@ impl CreateTableBuilder { self } - pub fn order_by(mut self, order_by: Option>) -> Self { + pub fn primary_key(mut self, primary_key: Option>) -> Self { + self.primary_key = primary_key; + self + } + + pub fn order_by(mut self, order_by: Option>) -> Self { self.order_by = order_by; self } @@ -247,7 +281,7 @@ impl CreateTableBuilder { self } - pub fn cluster_by(mut self, cluster_by: Option>) -> Self { + pub fn cluster_by(mut self, cluster_by: Option>>) -> Self { self.cluster_by = cluster_by; self } @@ -262,14 +296,66 @@ impl CreateTableBuilder { self } + pub fn copy_grants(mut self, copy_grants: bool) -> Self { + self.copy_grants = copy_grants; + self + } + + pub fn enable_schema_evolution(mut self, enable_schema_evolution: Option) -> Self { + self.enable_schema_evolution = enable_schema_evolution; + self + } + + pub fn change_tracking(mut self, change_tracking: Option) -> Self { + self.change_tracking = change_tracking; + self + } + + pub fn data_retention_time_in_days(mut self, data_retention_time_in_days: Option) -> Self { + self.data_retention_time_in_days = data_retention_time_in_days; + self + } + + pub fn max_data_extension_time_in_days( + mut self, + max_data_extension_time_in_days: Option, + ) -> Self { + self.max_data_extension_time_in_days = max_data_extension_time_in_days; + self + } + + pub fn default_ddl_collation(mut self, default_ddl_collation: Option) -> Self { + self.default_ddl_collation = default_ddl_collation; + self + } + + pub fn with_aggregation_policy(mut self, with_aggregation_policy: Option) -> Self { + self.with_aggregation_policy = with_aggregation_policy; + self + } + + pub fn with_row_access_policy( + mut self, + with_row_access_policy: Option, + ) -> Self { + self.with_row_access_policy = with_row_access_policy; + self + } + + pub fn with_tags(mut self, with_tags: Option>) -> Self { + self.with_tags = with_tags; + self + } + pub fn build(self) -> Statement { - Statement::CreateTable { + Statement::CreateTable(CreateTable { or_replace: self.or_replace, temporary: self.temporary, external: self.external, global: self.global, if_not_exists: self.if_not_exists, transient: self.transient, + volatile: self.volatile, name: self.name, columns: self.columns, constraints: self.constraints, @@ -290,12 +376,22 @@ impl CreateTableBuilder { collation: self.collation, on_commit: self.on_commit, on_cluster: self.on_cluster, + primary_key: self.primary_key, order_by: self.order_by, partition_by: self.partition_by, cluster_by: self.cluster_by, options: self.options, strict: self.strict, - } + copy_grants: self.copy_grants, + enable_schema_evolution: self.enable_schema_evolution, + change_tracking: self.change_tracking, + data_retention_time_in_days: self.data_retention_time_in_days, + max_data_extension_time_in_days: self.max_data_extension_time_in_days, + default_ddl_collation: self.default_ddl_collation, + with_aggregation_policy: self.with_aggregation_policy, + with_row_access_policy: self.with_row_access_policy, + with_tags: self.with_tags, + }) } } @@ -306,13 +402,14 @@ impl TryFrom for CreateTableBuilder { // ownership. fn try_from(stmt: Statement) -> Result { match stmt { - Statement::CreateTable { + Statement::CreateTable(CreateTable { or_replace, temporary, external, global, if_not_exists, transient, + volatile, name, columns, constraints, @@ -333,12 +430,22 @@ impl TryFrom for CreateTableBuilder { collation, on_commit, on_cluster, + primary_key, order_by, partition_by, cluster_by, options, strict, - } => Ok(Self { + copy_grants, + enable_schema_evolution, + change_tracking, + data_retention_time_in_days, + max_data_extension_time_in_days, + default_ddl_collation, + with_aggregation_policy, + with_row_access_policy, + with_tags, + }) => Ok(Self { or_replace, temporary, external, @@ -365,11 +472,22 @@ impl TryFrom for CreateTableBuilder { collation, on_commit, on_cluster, + primary_key, order_by, partition_by, cluster_by, options, strict, + copy_grants, + enable_schema_evolution, + change_tracking, + data_retention_time_in_days, + max_data_extension_time_in_days, + default_ddl_collation, + with_aggregation_policy, + with_row_access_policy, + with_tags, + volatile, }), _ => Err(ParserError::ParserError(format!( "Expected create table statement, but received: {stmt}" @@ -378,11 +496,11 @@ impl TryFrom for CreateTableBuilder { } } -/// Helper return type when parsing configuration for a BigQuery `CREATE TABLE` statement. +/// Helper return type when parsing configuration for a `CREATE TABLE` statement. #[derive(Default)] -pub(crate) struct BigQueryTableConfiguration { +pub(crate) struct CreateTableConfiguration { pub partition_by: Option>, - pub cluster_by: Option>, + pub cluster_by: Option>>, pub options: Option>, } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d937b7275..6d40995fd 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -34,22 +34,24 @@ pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue} pub use self::ddl::{ AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnOption, ColumnOptionDef, ConstraintCharacteristics, DeferrableInitial, GeneratedAs, - GeneratedExpressionMode, IndexOption, IndexType, KeyOrIndexDisplay, Partition, ProcedureParam, - ReferentialAction, TableConstraint, UserDefinedTypeCompositeAttributeDef, + GeneratedExpressionMode, IndexOption, IndexType, KeyOrIndexDisplay, Owner, Partition, + ProcedureParam, ReferentialAction, TableConstraint, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, ViewColumnDef, }; -pub use self::dml::{Delete, Insert}; +pub use self::dml::{CreateIndex, CreateTable, Delete, Insert}; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, Fetch, ForClause, ForJson, ForXml, - GroupByExpr, IdentWithAlias, IlikeSelectItem, Join, JoinConstraint, JoinOperator, - JsonTableColumn, JsonTableColumnErrorHandling, LateralView, LockClause, LockType, - MatchRecognizePattern, MatchRecognizeSymbol, Measure, NamedWindowDefinition, NamedWindowExpr, - NonBlock, Offset, OffsetRows, OrderByExpr, Query, RenameSelectItem, RepetitionQuantifier, - ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, - SetOperator, SetQuantifier, SymbolDefinition, Table, TableAlias, TableFactor, TableVersion, - TableWithJoins, Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, + FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, Interpolate, + InterpolateExpr, Join, JoinConstraint, JoinOperator, JsonTableColumn, + JsonTableColumnErrorHandling, LateralView, LockClause, LockType, MatchRecognizePattern, + MatchRecognizeSymbol, Measure, NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, + OffsetRows, OrderBy, OrderByExpr, PivotValueSource, Query, RenameSelectItem, + RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, + SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, + TableAlias, TableFactor, TableFunctionArgs, TableVersion, TableWithJoins, Top, TopQuantity, + ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, }; pub use self::value::{ escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString, @@ -74,7 +76,7 @@ mod value; #[cfg(feature = "visitor")] mod visitor; -struct DisplaySeparated<'a, T> +pub struct DisplaySeparated<'a, T> where T: fmt::Display, { @@ -97,14 +99,14 @@ where } } -fn display_separated<'a, T>(slice: &'a [T], sep: &'static str) -> DisplaySeparated<'a, T> +pub fn display_separated<'a, T>(slice: &'a [T], sep: &'static str) -> DisplaySeparated<'a, T> where T: fmt::Display, { DisplaySeparated { slice, sep } } -fn display_comma_separated(slice: &[T]) -> DisplaySeparated<'_, T> +pub fn display_comma_separated(slice: &[T]) -> DisplaySeparated<'_, T> where T: fmt::Display, { @@ -272,7 +274,7 @@ impl fmt::Display for Interval { } } -/// A field definition within a struct. +/// A field definition within a struct /// /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -293,6 +295,23 @@ impl fmt::Display for StructField { } } +/// A field definition within a union +/// +/// [duckdb]: https://duckdb.org/docs/sql/data_types/union.html +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct UnionField { + pub field_name: Ident, + pub field_type: DataType, +} + +impl fmt::Display for UnionField { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} {}", self.field_name, self.field_type) + } +} + /// A dictionary field within a dictionary. /// /// [duckdb]: https://duckdb.org/docs/sql/data_types/struct#creating-structs @@ -310,6 +329,37 @@ impl fmt::Display for DictionaryField { } } +/// Represents a Map expression. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Map { + pub entries: Vec, +} + +impl Display for Map { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "MAP {{{}}}", display_comma_separated(&self.entries)) + } +} + +/// A map field within a map. +/// +/// [duckdb]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MapEntry { + pub key: Box, + pub value: Box, +} + +impl fmt::Display for MapEntry { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}: {}", self.key, self.value) + } +} + /// Options for `CAST` / `TRY_CAST` /// BigQuery: #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -584,7 +634,7 @@ pub enum Expr { /// AT a timestamp to a different timezone e.g. `FROM_UNIXTIME(0) AT TIME ZONE 'UTC-06:00'` AtTimeZone { timestamp: Box, - time_zone: String, + time_zone: Box, }, /// Extract a field from a timestamp e.g. `EXTRACT(MONTH FROM foo)` /// @@ -678,7 +728,7 @@ pub enum Expr { }, /// Access a map-like object by field (e.g. `column['field']` or `column[4]` /// Note that depending on the dialect, struct like accesses may be - /// parsed as [`ArrayIndex`](Self::ArrayIndex) or [`MapAccess`](Self::MapAccess) + /// parsed as [`Subscript`](Self::Subscript) or [`MapAccess`](Self::MapAccess) /// MapAccess { column: Box, @@ -745,10 +795,18 @@ pub enum Expr { /// ``` /// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs Dictionary(Vec), - /// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]` - ArrayIndex { - obj: Box, - indexes: Vec, + /// `DuckDB` specific `Map` literal expression [1] + /// + /// Syntax: + /// ```sql + /// syntax: Map {key1: value1[, ... ]} + /// ``` + /// [1]: https://duckdb.org/docs/sql/data_types/map#creating-maps + Map(Map), + /// An access of nested data using subscript syntax, for example `array[2]`. + Subscript { + expr: Box, + subscript: Box, }, /// An array expression e.g. `ARRAY[1, 2]` Array(Array), @@ -804,6 +862,68 @@ pub enum Expr { Lambda(LambdaFunction), } +/// The contents inside the `[` and `]` in a subscript expression. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum Subscript { + /// Accesses the element of the array at the given index. + Index { index: Expr }, + + /// Accesses a slice of an array on PostgreSQL, e.g. + /// + /// ```plaintext + /// => select (array[1,2,3,4,5,6])[2:5]; + /// ----------- + /// {2,3,4,5} + /// ``` + /// + /// The lower and/or upper bound can be omitted to slice from the start or + /// end of the array respectively. + /// + /// See . + /// + /// Also supports an optional "stride" as the last element (this is not + /// supported by postgres), e.g. + /// + /// ```plaintext + /// => select (array[1,2,3,4,5,6])[1:6:2]; + /// ----------- + /// {1,3,5} + /// ``` + Slice { + lower_bound: Option, + upper_bound: Option, + stride: Option, + }, +} + +impl fmt::Display for Subscript { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Subscript::Index { index } => write!(f, "{index}"), + Subscript::Slice { + lower_bound, + upper_bound, + stride, + } => { + if let Some(lower) = lower_bound { + write!(f, "{lower}")?; + } + write!(f, ":")?; + if let Some(upper) = upper_bound { + write!(f, "{upper}")?; + } + if let Some(stride) = stride { + write!(f, ":")?; + write!(f, "{stride}")?; + } + Ok(()) + } + } + } +} + /// A lambda function. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -1250,12 +1370,14 @@ impl fmt::Display for Expr { Expr::Dictionary(fields) => { write!(f, "{{{}}}", display_comma_separated(fields)) } - Expr::ArrayIndex { obj, indexes } => { - write!(f, "{obj}")?; - for i in indexes { - write!(f, "[{i}]")?; - } - Ok(()) + Expr::Map(map) => { + write!(f, "{map}") + } + Expr::Subscript { + expr, + subscript: key, + } => { + write!(f, "{expr}[{key}]") } Expr::Array(set) => { write!(f, "{set}") @@ -1270,7 +1392,7 @@ impl fmt::Display for Expr { timestamp, time_zone, } => { - write!(f, "{timestamp} AT TIME ZONE '{time_zone}'") + write!(f, "{timestamp} AT TIME ZONE {time_zone}") } Expr::Interval(interval) => { write!(f, "{interval}") @@ -1958,66 +2080,23 @@ pub enum Statement { query: Box, options: CreateTableOptions, cluster_by: Vec, + /// Snowflake: Views can have comments in Snowflake. + /// + comment: Option, /// if true, has RedShift [`WITH NO SCHEMA BINDING`] clause with_no_schema_binding: bool, /// if true, has SQLite `IF NOT EXISTS` clause if_not_exists: bool, /// if true, has SQLite `TEMP` or `TEMPORARY` clause temporary: bool, + /// if not None, has Clickhouse `TO` clause, specify the table into which to insert results + /// + to: Option, }, /// ```sql /// CREATE TABLE /// ``` - CreateTable { - or_replace: bool, - temporary: bool, - external: bool, - global: Option, - if_not_exists: bool, - transient: bool, - /// Table name - #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] - name: ObjectName, - /// Optional schema - columns: Vec, - constraints: Vec, - hive_distribution: HiveDistributionStyle, - hive_formats: Option, - table_properties: Vec, - with_options: Vec, - file_format: Option, - location: Option, - query: Option>, - without_rowid: bool, - like: Option, - clone: Option, - engine: Option, - comment: Option, - auto_increment_offset: Option, - default_charset: Option, - collation: Option, - on_commit: Option, - /// ClickHouse "ON CLUSTER" clause: - /// - on_cluster: Option, - /// ClickHouse "ORDER BY " clause. Note that omitted ORDER BY is different - /// than empty (represented as ()), the latter meaning "no sorting". - /// - order_by: Option>, - /// BigQuery: A partition expression for the table. - /// - partition_by: Option>, - /// BigQuery: Table clustering column list. - /// - cluster_by: Option>, - /// BigQuery: Table options list. - /// - options: Option>, - /// SQLite "STRICT" clause. - /// if the "STRICT" table-option keyword is added to the end, after the closing ")", - /// then strict typing rules apply to that table. - strict: bool, - }, + CreateTable(CreateTable), /// ```sql /// CREATE VIRTUAL TABLE .. USING ()` /// ``` @@ -2032,20 +2111,7 @@ pub enum Statement { /// ```sql /// `CREATE INDEX` /// ``` - CreateIndex { - /// index name - name: Option, - #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] - table_name: ObjectName, - using: Option, - columns: Vec, - unique: bool, - concurrently: bool, - if_not_exists: bool, - include: Vec, - nulls_distinct: Option, - predicate: Option, - }, + CreateIndex(CreateIndex), /// ```sql /// CREATE ROLE /// ``` @@ -2192,6 +2258,16 @@ pub enum Statement { option: Option, }, /// ```sql + /// DROP PROCEDURE + /// ``` + DropProcedure { + if_exists: bool, + /// One or more function to drop + proc_desc: Vec, + /// `CASCADE` or `RESTRICT` + option: Option, + }, + /// ```sql /// DROP SECRET /// ``` DropSecret { @@ -2262,7 +2338,7 @@ pub enum Statement { /// SET [ SESSION | LOCAL ] ROLE role_name /// ``` /// - /// Sets sesssion state. Examples: [ANSI][1], [Postgresql][2], [MySQL][3], and [Oracle][4] + /// Sets session state. Examples: [ANSI][1], [Postgresql][2], [MySQL][3], and [Oracle][4] /// /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#set-role-statement /// [2]: https://www.postgresql.org/docs/14/sql-set-role.html @@ -2280,7 +2356,7 @@ pub enum Statement { /// ``` /// /// Note: this is not a standard SQL statement, but it is supported by at - /// least MySQL and PostgreSQL. Not all MySQL-specific syntatic forms are + /// least MySQL and PostgreSQL. Not all MySQL-specific syntactic forms are /// supported yet. SetVariable { local: bool, @@ -2454,14 +2530,64 @@ pub enum Statement { /// Supported variants: /// 1. [Hive](https://cwiki.apache.org/confluence/display/hive/languagemanual+ddl#LanguageManualDDL-Create/Drop/ReloadFunction) /// 2. [Postgres](https://www.postgresql.org/docs/15/sql-createfunction.html) + /// 3. [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement) CreateFunction { or_replace: bool, temporary: bool, + if_not_exists: bool, name: ObjectName, args: Option>, return_type: Option, - /// Optional parameters. - params: CreateFunctionBody, + /// The expression that defines the function. + /// + /// Examples: + /// ```sql + /// AS ((SELECT 1)) + /// AS "console.log();" + /// ``` + function_body: Option, + /// Behavior attribute for the function + /// + /// IMMUTABLE | STABLE | VOLATILE + /// + /// [Postgres](https://www.postgresql.org/docs/current/sql-createfunction.html) + behavior: Option, + /// CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT + /// + /// [Postgres](https://www.postgresql.org/docs/current/sql-createfunction.html) + called_on_null: Option, + /// PARALLEL { UNSAFE | RESTRICTED | SAFE } + /// + /// [Postgres](https://www.postgresql.org/docs/current/sql-createfunction.html) + parallel: Option, + /// USING ... (Hive only) + using: Option, + /// Language used in a UDF definition. + /// + /// Example: + /// ```sql + /// CREATE FUNCTION foo() LANGUAGE js AS "console.log();" + /// ``` + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_a_javascript_udf) + language: Option, + /// Determinism keyword used for non-sql UDF definitions. + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_11) + determinism_specifier: Option, + /// List of options for creating the function. + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_11) + options: Option>, + /// Connection resource for a remote function. + /// + /// Example: + /// ```sql + /// CREATE FUNCTION foo() + /// RETURNS FLOAT64 + /// REMOTE WITH CONNECTION us.myconnection + /// ``` + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_a_remote_function) + remote_connection: Option, }, /// ```sql /// CREATE PROCEDURE @@ -3152,16 +3278,26 @@ impl fmt::Display for Statement { Statement::CreateFunction { or_replace, temporary, + if_not_exists, name, args, return_type, - params, + function_body, + language, + behavior, + called_on_null, + parallel, + using, + determinism_specifier, + options, + remote_connection, } => { write!( f, - "CREATE {or_replace}{temp}FUNCTION {name}", + "CREATE {or_replace}{temp}FUNCTION {if_not_exists}{name}", temp = if *temporary { "TEMPORARY " } else { "" }, or_replace = if *or_replace { "OR REPLACE " } else { "" }, + if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, )?; if let Some(args) = args { write!(f, "({})", display_comma_separated(args))?; @@ -3169,7 +3305,43 @@ impl fmt::Display for Statement { if let Some(return_type) = return_type { write!(f, " RETURNS {return_type}")?; } - write!(f, "{params}")?; + if let Some(determinism_specifier) = determinism_specifier { + write!(f, " {determinism_specifier}")?; + } + if let Some(language) = language { + write!(f, " LANGUAGE {language}")?; + } + if let Some(behavior) = behavior { + write!(f, " {behavior}")?; + } + if let Some(called_on_null) = called_on_null { + write!(f, " {called_on_null}")?; + } + if let Some(parallel) = parallel { + write!(f, " {parallel}")?; + } + if let Some(remote_connection) = remote_connection { + write!(f, " REMOTE WITH CONNECTION {remote_connection}")?; + } + if let Some(CreateFunctionBody::AsBeforeOptions(function_body)) = function_body { + write!(f, " AS {function_body}")?; + } + if let Some(CreateFunctionBody::Return(function_body)) = function_body { + write!(f, " RETURN {function_body}")?; + } + if let Some(using) = using { + write!(f, " {using}")?; + } + if let Some(options) = options { + write!( + f, + " OPTIONS({})", + display_comma_separated(options.as_slice()) + )?; + } + if let Some(CreateFunctionBody::AsAfterOptions(function_body)) = function_body { + write!(f, " AS {function_body}")?; + } Ok(()) } Statement::CreateProcedure { @@ -3226,19 +3398,32 @@ impl fmt::Display for Statement { materialized, options, cluster_by, + comment, with_no_schema_binding, if_not_exists, temporary, + to, } => { write!( f, - "CREATE {or_replace}{materialized}{temporary}VIEW {if_not_exists}{name}", + "CREATE {or_replace}{materialized}{temporary}VIEW {if_not_exists}{name}{to}", or_replace = if *or_replace { "OR REPLACE " } else { "" }, materialized = if *materialized { "MATERIALIZED " } else { "" }, name = name, temporary = if *temporary { "TEMPORARY " } else { "" }, - if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" } + if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, + to = to + .as_ref() + .map(|to| format!(" TO {to}")) + .unwrap_or_default() )?; + if let Some(comment) = comment { + write!( + f, + " COMMENT = '{}'", + value::escape_single_quote_string(comment) + )?; + } if matches!(options, CreateTableOptions::With(_)) { write!(f, " {options}")?; } @@ -3257,245 +3442,7 @@ impl fmt::Display for Statement { } Ok(()) } - Statement::CreateTable { - name, - columns, - constraints, - table_properties, - with_options, - or_replace, - if_not_exists, - transient, - hive_distribution, - hive_formats, - external, - global, - temporary, - file_format, - location, - query, - without_rowid, - like, - clone, - default_charset, - engine, - comment, - auto_increment_offset, - collation, - on_commit, - on_cluster, - order_by, - partition_by, - cluster_by, - options, - strict, - } => { - // We want to allow the following options - // Empty column list, allowed by PostgreSQL: - // `CREATE TABLE t ()` - // No columns provided for CREATE TABLE AS: - // `CREATE TABLE t AS SELECT a from t2` - // Columns provided for CREATE TABLE AS: - // `CREATE TABLE t (a INT) AS SELECT a from t2` - write!( - f, - "CREATE {or_replace}{external}{global}{temporary}{transient}TABLE {if_not_exists}{name}", - or_replace = if *or_replace { "OR REPLACE " } else { "" }, - external = if *external { "EXTERNAL " } else { "" }, - global = global - .map(|global| { - if global { - "GLOBAL " - } else { - "LOCAL " - } - }) - .unwrap_or(""), - if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, - temporary = if *temporary { "TEMPORARY " } else { "" }, - transient = if *transient { "TRANSIENT " } else { "" }, - name = name, - )?; - if let Some(on_cluster) = on_cluster { - write!( - f, - " ON CLUSTER {}", - on_cluster.replace('{', "'{").replace('}', "}'") - )?; - } - if !columns.is_empty() || !constraints.is_empty() { - write!(f, " ({}", display_comma_separated(columns))?; - if !columns.is_empty() && !constraints.is_empty() { - write!(f, ", ")?; - } - write!(f, "{})", display_comma_separated(constraints))?; - } else if query.is_none() && like.is_none() && clone.is_none() { - // PostgreSQL allows `CREATE TABLE t ();`, but requires empty parens - write!(f, " ()")?; - } - // Only for SQLite - if *without_rowid { - write!(f, " WITHOUT ROWID")?; - } - - // Only for Hive - if let Some(l) = like { - write!(f, " LIKE {l}")?; - } - - if let Some(c) = clone { - write!(f, " CLONE {c}")?; - } - - match hive_distribution { - HiveDistributionStyle::PARTITIONED { columns } => { - write!(f, " PARTITIONED BY ({})", display_comma_separated(columns))?; - } - HiveDistributionStyle::CLUSTERED { - columns, - sorted_by, - num_buckets, - } => { - write!(f, " CLUSTERED BY ({})", display_comma_separated(columns))?; - if !sorted_by.is_empty() { - write!(f, " SORTED BY ({})", display_comma_separated(sorted_by))?; - } - if *num_buckets > 0 { - write!(f, " INTO {num_buckets} BUCKETS")?; - } - } - HiveDistributionStyle::SKEWED { - columns, - on, - stored_as_directories, - } => { - write!( - f, - " SKEWED BY ({})) ON ({})", - display_comma_separated(columns), - display_comma_separated(on) - )?; - if *stored_as_directories { - write!(f, " STORED AS DIRECTORIES")?; - } - } - _ => (), - } - - if let Some(HiveFormat { - row_format, - serde_properties, - storage, - location, - }) = hive_formats - { - match row_format { - Some(HiveRowFormat::SERDE { class }) => { - write!(f, " ROW FORMAT SERDE '{class}'")? - } - Some(HiveRowFormat::DELIMITED { delimiters }) => { - write!(f, " ROW FORMAT DELIMITED")?; - if !delimiters.is_empty() { - write!(f, " {}", display_separated(delimiters, " "))?; - } - } - None => (), - } - match storage { - Some(HiveIOFormat::IOF { - input_format, - output_format, - }) => write!( - f, - " STORED AS INPUTFORMAT {input_format} OUTPUTFORMAT {output_format}" - )?, - Some(HiveIOFormat::FileFormat { format }) if !*external => { - write!(f, " STORED AS {format}")? - } - _ => (), - } - if let Some(serde_properties) = serde_properties.as_ref() { - write!( - f, - " WITH SERDEPROPERTIES ({})", - display_comma_separated(serde_properties) - )?; - } - if !*external { - if let Some(loc) = location { - write!(f, " LOCATION '{loc}'")?; - } - } - } - if *external { - if let Some(file_format) = &file_format { - write!(f, " STORED AS {file_format}")?; - } - write!(f, " LOCATION '{}'", location.as_ref().unwrap())?; - } - if !table_properties.is_empty() { - write!( - f, - " TBLPROPERTIES ({})", - display_comma_separated(table_properties) - )?; - } - if !with_options.is_empty() { - write!(f, " WITH ({})", display_comma_separated(with_options))?; - } - if let Some(engine) = engine { - write!(f, " ENGINE={engine}")?; - } - if let Some(comment) = comment { - write!(f, " COMMENT '{comment}'")?; - } - if let Some(auto_increment_offset) = auto_increment_offset { - write!(f, " AUTO_INCREMENT {auto_increment_offset}")?; - } - if let Some(order_by) = order_by { - write!(f, " ORDER BY ({})", display_comma_separated(order_by))?; - } - if let Some(partition_by) = partition_by.as_ref() { - write!(f, " PARTITION BY {partition_by}")?; - } - if let Some(cluster_by) = cluster_by.as_ref() { - write!( - f, - " CLUSTER BY {}", - display_comma_separated(cluster_by.as_slice()) - )?; - } - if let Some(options) = options.as_ref() { - write!( - f, - " OPTIONS({})", - display_comma_separated(options.as_slice()) - )?; - } - if let Some(query) = query { - write!(f, " AS {query}")?; - } - if let Some(default_charset) = default_charset { - write!(f, " DEFAULT CHARSET={default_charset}")?; - } - if let Some(collation) = collation { - write!(f, " COLLATE={collation}")?; - } - - if on_commit.is_some() { - let on_commit = match on_commit { - Some(OnCommit::DeleteRows) => "ON COMMIT DELETE ROWS", - Some(OnCommit::PreserveRows) => "ON COMMIT PRESERVE ROWS", - Some(OnCommit::Drop) => "ON COMMIT DROP", - None => "", - }; - write!(f, " {on_commit}")?; - } - if *strict { - write!(f, " STRICT")?; - } - Ok(()) - } + Statement::CreateTable(create_table) => create_table.fmt(f), Statement::CreateVirtualTable { name, if_not_exists, @@ -3514,48 +3461,7 @@ impl fmt::Display for Statement { } Ok(()) } - Statement::CreateIndex { - name, - table_name, - using, - columns, - unique, - concurrently, - if_not_exists, - include, - nulls_distinct, - predicate, - } => { - write!( - f, - "CREATE {unique}INDEX {concurrently}{if_not_exists}", - unique = if *unique { "UNIQUE " } else { "" }, - concurrently = if *concurrently { "CONCURRENTLY " } else { "" }, - if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, - )?; - if let Some(value) = name { - write!(f, "{value} ")?; - } - write!(f, "ON {table_name}")?; - if let Some(value) = using { - write!(f, " USING {value} ")?; - } - write!(f, "({})", display_separated(columns, ","))?; - if !include.is_empty() { - write!(f, " INCLUDE ({})", display_separated(include, ","))?; - } - if let Some(value) = nulls_distinct { - if *value { - write!(f, " NULLS DISTINCT")?; - } else { - write!(f, " NULLS NOT DISTINCT")?; - } - } - if let Some(predicate) = predicate { - write!(f, " WHERE {predicate}")?; - } - Ok(()) - } + Statement::CreateIndex(create_index) => create_index.fmt(f), Statement::CreateExtension { name, if_not_exists, @@ -3791,6 +3697,22 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::DropProcedure { + if_exists, + proc_desc, + option, + } => { + write!( + f, + "DROP PROCEDURE{} {}", + if *if_exists { " IF EXISTS" } else { "" }, + display_comma_separated(proc_desc), + )?; + if let Some(op) = option { + write!(f, " {op}")?; + } + Ok(()) + } Statement::DropSecret { if_exists, temporary, @@ -4725,13 +4647,35 @@ impl fmt::Display for GrantObjects { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Assignment { - pub id: Vec, + pub target: AssignmentTarget, pub value: Expr, } impl fmt::Display for Assignment { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{} = {}", display_separated(&self.id, "."), self.value) + write!(f, "{} = {}", self.target, self.value) + } +} + +/// Left-hand side of an assignment in an UPDATE statement, +/// e.g. `foo` in `foo = 5` (ColumnName assignment) or +/// `(a, b)` in `(a, b) = (1, 2)` (Tuple assignment). +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum AssignmentTarget { + /// A single column + ColumnName(ObjectName), + /// A tuple of columns + Tuple(Vec), +} + +impl fmt::Display for AssignmentTarget { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + AssignmentTarget::ColumnName(column) => write!(f, "{}", column), + AssignmentTarget::Tuple(columns) => write!(f, "({})", display_comma_separated(columns)), + } } } @@ -4837,6 +4781,16 @@ impl fmt::Display for CloseCursor { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Function { pub name: ObjectName, + /// The parameters to the function, including any options specified within the + /// delimiting parentheses. + /// + /// Example: + /// ```plaintext + /// HISTOGRAM(0.5, 0.6)(x, y) + /// ``` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/aggregate-functions/parametric-functions) + pub parameters: FunctionArguments, /// The arguments to the function, including any options specified within the /// delimiting parentheses. pub args: FunctionArguments, @@ -4865,7 +4819,7 @@ pub struct Function { impl fmt::Display for Function { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}{}", self.name, self.args)?; + write!(f, "{}{}{}", self.name, self.parameters, self.args)?; if !self.within_group.is_empty() { write!( @@ -4922,7 +4876,7 @@ impl fmt::Display for FunctionArguments { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct FunctionArgumentList { - /// `[ ALL | DISTINCT ] + /// `[ ALL | DISTINCT ]` pub duplicate_treatment: Option, /// The function arguments. pub args: Vec, @@ -6143,75 +6097,74 @@ impl fmt::Display for FunctionParallel { } } +/// [BigQuery] Determinism specifier used in a UDF definition. +/// +/// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_11 #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum FunctionDefinition { - SingleQuotedDef(String), - DoubleDollarDef(String), +pub enum FunctionDeterminismSpecifier { + Deterministic, + NotDeterministic, } -impl fmt::Display for FunctionDefinition { +impl fmt::Display for FunctionDeterminismSpecifier { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - FunctionDefinition::SingleQuotedDef(s) => write!(f, "'{s}'")?, - FunctionDefinition::DoubleDollarDef(s) => write!(f, "$${s}$$")?, + FunctionDeterminismSpecifier::Deterministic => { + write!(f, "DETERMINISTIC") + } + FunctionDeterminismSpecifier::NotDeterministic => { + write!(f, "NOT DETERMINISTIC") + } } - Ok(()) } } -/// Postgres specific feature. +/// Represent the expression body of a `CREATE FUNCTION` statement as well as +/// where within the statement, the body shows up. /// -/// See [Postgres docs](https://www.postgresql.org/docs/15/sql-createfunction.html) -/// for more details -#[derive(Debug, Default, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +/// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_11 +/// [Postgres]: https://www.postgresql.org/docs/15/sql-createfunction.html +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct CreateFunctionBody { - /// LANGUAGE lang_name - pub language: Option, - /// IMMUTABLE | STABLE | VOLATILE - pub behavior: Option, - /// CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT - pub called_on_null: Option, - /// PARALLEL { UNSAFE | RESTRICTED | SAFE } - pub parallel: Option, - /// AS 'definition' +pub enum CreateFunctionBody { + /// A function body expression using the 'AS' keyword and shows up + /// before any `OPTIONS` clause. /// - /// Note that Hive's `AS class_name` is also parsed here. - pub as_: Option, - /// RETURN expression - pub return_: Option, - /// USING ... (Hive only) - pub using: Option, -} - -impl fmt::Display for CreateFunctionBody { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if let Some(language) = &self.language { - write!(f, " LANGUAGE {language}")?; - } - if let Some(behavior) = &self.behavior { - write!(f, " {behavior}")?; - } - if let Some(called_on_null) = &self.called_on_null { - write!(f, " {called_on_null}")?; - } - if let Some(parallel) = &self.parallel { - write!(f, " {parallel}")?; - } - if let Some(definition) = &self.as_ { - write!(f, " AS {definition}")?; - } - if let Some(expr) = &self.return_ { - write!(f, " RETURN {expr}")?; - } - if let Some(using) = &self.using { - write!(f, " {using}")?; - } - Ok(()) - } + /// Example: + /// ```sql + /// CREATE FUNCTION myfunc(x FLOAT64, y FLOAT64) RETURNS FLOAT64 + /// AS (x * y) + /// OPTIONS(description="desc"); + /// ``` + /// + /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_11 + AsBeforeOptions(Expr), + /// A function body expression using the 'AS' keyword and shows up + /// after any `OPTIONS` clause. + /// + /// Example: + /// ```sql + /// CREATE FUNCTION myfunc(x FLOAT64, y FLOAT64) RETURNS FLOAT64 + /// OPTIONS(description="desc") + /// AS (x * y); + /// ``` + /// + /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_11 + AsAfterOptions(Expr), + /// Function body expression using the 'RETURN' keyword. + /// + /// Example: + /// ```sql + /// CREATE FUNCTION myfunc(a INTEGER, IN b INTEGER = 1) RETURNS INTEGER + /// LANGUAGE SQL + /// RETURN a + b; + /// ``` + /// + /// [Postgres]: https://www.postgresql.org/docs/current/sql-createfunction.html + Return(Expr), } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -6447,6 +6400,140 @@ impl Display for MySQLColumnPosition { } } +/// Engine of DB. Some warehouse has parameters of engine, e.g. [clickhouse] +/// +/// [clickhouse]: https://clickhouse.com/docs/en/engines/table-engines +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableEngine { + pub name: String, + pub parameters: Option>, +} + +impl Display for TableEngine { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.name)?; + + if let Some(parameters) = self.parameters.as_ref() { + write!(f, "({})", display_comma_separated(parameters))?; + } + + Ok(()) + } +} + +/// Snowflake `WITH ROW ACCESS POLICY policy_name ON (identifier, ...)` +/// +/// +/// +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct RowAccessPolicy { + pub policy: ObjectName, + pub on: Vec, +} + +impl RowAccessPolicy { + pub fn new(policy: ObjectName, on: Vec) -> Self { + Self { policy, on } + } +} + +impl Display for RowAccessPolicy { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "WITH ROW ACCESS POLICY {} ON ({})", + self.policy, + display_comma_separated(self.on.as_slice()) + ) + } +} + +/// Snowflake `WITH TAG ( tag_name = '', ...)` +/// +/// +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Tag { + pub key: Ident, + pub value: String, +} + +impl Tag { + pub fn new(key: Ident, value: String) -> Self { + Self { key, value } + } +} + +impl Display for Tag { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}='{}'", self.key, self.value) + } +} + +/// Helper to indicate if a comment includes the `=` in the display form +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum CommentDef { + /// Includes `=` when printing the comment, as `COMMENT = 'comment'` + /// Does not include `=` when printing the comment, as `COMMENT 'comment'` + WithEq(String), + WithoutEq(String), +} + +impl Display for CommentDef { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + CommentDef::WithEq(comment) | CommentDef::WithoutEq(comment) => write!(f, "{comment}"), + } + } +} + +/// Helper to indicate if a collection should be wrapped by a symbol in the display form +/// +/// [`Display`] is implemented for every [`Vec`] where `T: Display`. +/// The string output is a comma separated list for the vec items +/// +/// # Examples +/// ``` +/// # use sqlparser::ast::WrappedCollection; +/// let items = WrappedCollection::Parentheses(vec!["one", "two", "three"]); +/// assert_eq!("(one, two, three)", items.to_string()); +/// +/// let items = WrappedCollection::NoWrapping(vec!["one", "two", "three"]); +/// assert_eq!("one, two, three", items.to_string()); +/// ``` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum WrappedCollection { + /// Print the collection without wrapping symbols, as `item, item, item` + NoWrapping(T), + /// Wraps the collection in Parentheses, as `(item, item, item)` + Parentheses(T), +} + +impl Display for WrappedCollection> +where + T: Display, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + WrappedCollection::NoWrapping(inner) => { + write!(f, "{}", display_comma_separated(inner.as_slice())) + } + WrappedCollection::Parentheses(inner) => { + write!(f, "({})", display_comma_separated(inner.as_slice())) + } + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/ast/operator.rs b/src/ast/operator.rs index 3c4f192e3..e70df344a 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -111,7 +111,7 @@ pub enum BinaryOperator { DuckIntegerDivide, /// MySQL [`DIV`](https://dev.mysql.com/doc/refman/8.0/en/arithmetic-functions.html) integer division MyIntegerDivide, - /// Support for custom operators (built by parsers outside this crate) + /// Support for custom operators (such as Postgres custom operators) Custom(String), /// Bitwise XOR, e.g. `a # b` (PostgreSQL-specific) PGBitwiseXor, diff --git a/src/ast/query.rs b/src/ast/query.rs index 08a0bc5af..83d14ecb9 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -33,7 +33,7 @@ pub struct Query { /// SELECT or UNION / EXCEPT / INTERSECT pub body: Box, /// ORDER BY - pub order_by: Vec, + pub order_by: Option, /// `LIMIT { | ALL }` pub limit: Option, @@ -50,6 +50,15 @@ pub struct Query { /// `FOR JSON { AUTO | PATH } [ , INCLUDE_NULL_VALUES ]` /// (MSSQL-specific) pub for_clause: Option, + /// ClickHouse syntax: `SELECT * FROM t SETTINGS key1 = value1, key2 = value2` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select#settings-in-select-query) + pub settings: Option>, + /// `SELECT * FROM t FORMAT JSONCompact` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/format) + /// (ClickHouse-specific) + pub format_clause: Option, } impl fmt::Display for Query { @@ -58,8 +67,17 @@ impl fmt::Display for Query { write!(f, "{with} ")?; } write!(f, "{}", self.body)?; - if !self.order_by.is_empty() { - write!(f, " ORDER BY {}", display_comma_separated(&self.order_by))?; + if let Some(ref order_by) = self.order_by { + write!(f, " ORDER BY")?; + if !order_by.exprs.is_empty() { + write!(f, " {}", display_comma_separated(&order_by.exprs))?; + } + if let Some(ref interpolate) = order_by.interpolate { + match &interpolate.exprs { + Some(exprs) => write!(f, " INTERPOLATE ({})", display_comma_separated(exprs))?, + None => write!(f, " INTERPOLATE")?, + } + } } if let Some(ref limit) = self.limit { write!(f, " LIMIT {limit}")?; @@ -70,6 +88,9 @@ impl fmt::Display for Query { if !self.limit_by.is_empty() { write!(f, " BY {}", display_separated(&self.limit_by, ", "))?; } + if let Some(ref settings) = self.settings { + write!(f, " SETTINGS {}", display_comma_separated(settings))?; + } if let Some(ref fetch) = self.fetch { write!(f, " {fetch}")?; } @@ -79,6 +100,9 @@ impl fmt::Display for Query { if let Some(ref for_clause) = self.for_clause { write!(f, " {}", for_clause)?; } + if let Some(ref format) = self.format_clause { + write!(f, " {}", format)?; + } Ok(()) } } @@ -108,6 +132,17 @@ pub enum SetExpr { Table(Box), } +impl SetExpr { + /// If this `SetExpr` is a `SELECT`, returns the [`Select`]. + pub fn as_select(&self) -> Option<&Select> { + if let Self::Select(select) = self { + Some(&**select) + } else { + None + } + } +} + impl fmt::Display for SetExpr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { @@ -229,6 +264,11 @@ pub struct Select { pub from: Vec, /// LATERAL VIEWs pub lateral_views: Vec, + /// ClickHouse syntax: `PREWHERE a = 1 WHERE b = 2`, + /// and it can be used together with WHERE selection. + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/prewhere) + pub prewhere: Option, /// WHERE pub selection: Option, /// GROUP BY @@ -284,14 +324,17 @@ impl fmt::Display for Select { write!(f, "{lv}")?; } } + if let Some(ref prewhere) = self.prewhere { + write!(f, " PREWHERE {prewhere}")?; + } if let Some(ref selection) = self.selection { write!(f, " WHERE {selection}")?; } match &self.group_by { - GroupByExpr::All => write!(f, " GROUP BY ALL")?, - GroupByExpr::Expressions(exprs) => { + GroupByExpr::All(_) => write!(f, " {}", self.group_by)?, + GroupByExpr::Expressions(exprs, _) => { if !exprs.is_empty() { - write!(f, " GROUP BY {}", display_comma_separated(exprs))?; + write!(f, " {}", self.group_by)? } } } @@ -536,19 +579,20 @@ impl fmt::Display for IdentWithAlias { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct WildcardAdditionalOptions { /// `[ILIKE...]`. - /// Snowflake syntax: + /// Snowflake syntax: pub opt_ilike: Option, /// `[EXCLUDE...]`. pub opt_exclude: Option, /// `[EXCEPT...]`. /// Clickhouse syntax: pub opt_except: Option, - /// `[RENAME ...]`. - pub opt_rename: Option, /// `[REPLACE]` /// BigQuery syntax: /// Clickhouse syntax: + /// Snowflake syntax: pub opt_replace: Option, + /// `[RENAME ...]`. + pub opt_rename: Option, } impl fmt::Display for WildcardAdditionalOptions { @@ -562,12 +606,12 @@ impl fmt::Display for WildcardAdditionalOptions { if let Some(except) = &self.opt_except { write!(f, " {except}")?; } - if let Some(rename) = &self.opt_rename { - write!(f, " {rename}")?; - } if let Some(replace) = &self.opt_replace { write!(f, " {replace}")?; } + if let Some(rename) = &self.opt_rename { + write!(f, " {rename}")?; + } Ok(()) } } @@ -816,6 +860,20 @@ impl fmt::Display for ConnectBy { } } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Setting { + pub key: Ident, + pub value: Value, +} + +impl fmt::Display for Setting { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{} = {}", self.key, self.value) + } +} + /// An expression optionally followed by an alias. /// /// Example: @@ -841,6 +899,14 @@ impl fmt::Display for ExprWithAlias { } } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableFunctionArgs { + pub args: Vec, + pub settings: Option>, +} + /// A table name or a parenthesized subquery with an optional alias #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -858,12 +924,16 @@ pub enum TableFactor { /// This field's value is `Some(v)`, where `v` is a (possibly empty) /// vector of arguments, in the case of a table-valued function call, /// whereas it's `None` in the case of a regular table name. - args: Option>, + args: Option, /// MSSQL-specific `WITH (...)` hints such as NOLOCK. with_hints: Vec, /// Optional version qualifier to facilitate table time-travel, as /// supported by BigQuery and MSSQL. version: Option, + // Optional table function modifier to generate the ordinality for column. + /// For example, `SELECT * FROM generate_series(1, 10) WITH ORDINALITY AS t(a, b);` + /// [WITH ORDINALITY](https://www.postgresql.org/docs/current/functions-srf.html), supported by Postgres. + with_ordinality: bool, /// [Partition selection](https://dev.mysql.com/doc/refman/8.0/en/partitioning-selection.html), supported by MySQL. partitions: Vec, }, @@ -899,6 +969,7 @@ pub enum TableFactor { array_exprs: Vec, with_offset: bool, with_offset_alias: Option, + with_ordinality: bool, }, /// The `JSON_TABLE` table-valued function. /// Part of the SQL standard, but implemented only by MySQL, Oracle, and DB2. @@ -946,7 +1017,8 @@ pub enum TableFactor { table: Box, aggregate_functions: Vec, // Function expression value_column: Vec, - pivot_values: Vec, + value_source: PivotValueSource, + default_on_null: Option, alias: Option, }, /// An UNPIVOT operation on a table. @@ -987,6 +1059,41 @@ pub enum TableFactor { }, } +/// The source of values in a `PIVOT` operation. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum PivotValueSource { + /// Pivot on a static list of values. + /// + /// See . + List(Vec), + /// Pivot on all distinct values of the pivot column. + /// + /// See . + Any(Vec), + /// Pivot on all values returned by a subquery. + /// + /// See . + Subquery(Query), +} + +impl fmt::Display for PivotValueSource { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PivotValueSource::List(values) => write!(f, "{}", display_comma_separated(values)), + PivotValueSource::Any(order_by) => { + write!(f, "ANY")?; + if !order_by.is_empty() { + write!(f, " ORDER BY {}", display_comma_separated(order_by))?; + } + Ok(()) + } + PivotValueSource::Subquery(query) => write!(f, "{query}"), + } + } +} + /// An item in the `MEASURES` subclause of a `MATCH_RECOGNIZE` operation. /// /// See . @@ -1208,13 +1315,22 @@ impl fmt::Display for TableFactor { with_hints, version, partitions, + with_ordinality, } => { write!(f, "{name}")?; if !partitions.is_empty() { write!(f, "PARTITION ({})", display_comma_separated(partitions))?; } if let Some(args) = args { - write!(f, "({})", display_comma_separated(args))?; + write!(f, "(")?; + write!(f, "{}", display_comma_separated(&args.args))?; + if let Some(ref settings) = args.settings { + write!(f, ", SETTINGS {}", display_comma_separated(&settings))?; + } + write!(f, ")")?; + } + if *with_ordinality { + write!(f, " WITH ORDINALITY")?; } if let Some(alias) = alias { write!(f, " AS {alias}")?; @@ -1269,9 +1385,14 @@ impl fmt::Display for TableFactor { array_exprs, with_offset, with_offset_alias, + with_ordinality, } => { write!(f, "UNNEST({})", display_comma_separated(array_exprs))?; + if *with_ordinality { + write!(f, " WITH ORDINALITY")?; + } + if let Some(alias) = alias { write!(f, " AS {alias}")?; } @@ -1313,17 +1434,20 @@ impl fmt::Display for TableFactor { table, aggregate_functions, value_column, - pivot_values, + value_source, + default_on_null, alias, } => { write!( f, - "{} PIVOT({} FOR {} IN ({}))", - table, + "{table} PIVOT({} FOR {} IN ({value_source})", display_comma_separated(aggregate_functions), Expr::CompoundIdentifier(value_column.to_vec()), - display_comma_separated(pivot_values) )?; + if let Some(expr) = default_on_null { + write!(f, " DEFAULT ON NULL ({expr})")?; + } + write!(f, ")")?; if alias.is_some() { write!(f, " AS {}", alias.as_ref().unwrap())?; } @@ -1512,6 +1636,15 @@ impl fmt::Display for Join { ), JoinOperator::CrossApply => write!(f, " CROSS APPLY {}", self.relation), JoinOperator::OuterApply => write!(f, " OUTER APPLY {}", self.relation), + JoinOperator::AsOf { + match_condition, + constraint, + } => write!( + f, + " ASOF JOIN {} MATCH_CONDITION ({match_condition}){}", + self.relation, + suffix(constraint) + ), } } } @@ -1537,6 +1670,14 @@ pub enum JoinOperator { CrossApply, /// OUTER APPLY (non-standard) OuterApply, + /// `ASOF` joins are used for joining tables containing time-series data + /// whose timestamp columns do not match exactly. + /// + /// See . + AsOf { + match_condition: Expr, + constraint: JoinConstraint, + }, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -1549,6 +1690,18 @@ pub enum JoinConstraint { None, } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct OrderBy { + pub exprs: Vec, + /// Optional: `INTERPOLATE` + /// Supported by [ClickHouse syntax] + /// + /// [ClickHouse syntax]: + pub interpolate: Option, +} + /// An `ORDER BY` expression #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -1559,6 +1712,9 @@ pub struct OrderByExpr { pub asc: Option, /// Optional `NULLS FIRST` or `NULLS LAST` pub nulls_first: Option, + /// Optional: `WITH FILL` + /// Supported by [ClickHouse syntax]: + pub with_fill: Option, } impl fmt::Display for OrderByExpr { @@ -1574,6 +1730,67 @@ impl fmt::Display for OrderByExpr { Some(false) => write!(f, " NULLS LAST")?, None => (), } + if let Some(ref with_fill) = self.with_fill { + write!(f, " {}", with_fill)? + } + Ok(()) + } +} + +/// ClickHouse `WITH FILL` modifier for `ORDER BY` clause. +/// Supported by [ClickHouse syntax] +/// +/// [ClickHouse syntax]: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct WithFill { + pub from: Option, + pub to: Option, + pub step: Option, +} + +impl fmt::Display for WithFill { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "WITH FILL")?; + if let Some(ref from) = self.from { + write!(f, " FROM {}", from)?; + } + if let Some(ref to) = self.to { + write!(f, " TO {}", to)?; + } + if let Some(ref step) = self.step { + write!(f, " STEP {}", step)?; + } + Ok(()) + } +} + +/// ClickHouse `INTERPOLATE` clause for use in `ORDER BY` clause when using `WITH FILL` modifier. +/// Supported by [ClickHouse syntax] +/// +/// [ClickHouse syntax]: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct InterpolateExpr { + pub column: Ident, + pub expr: Option, +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Interpolate { + pub exprs: Option>, +} + +impl fmt::Display for InterpolateExpr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.column)?; + if let Some(ref expr) = self.expr { + write!(f, " AS {}", expr)?; + } Ok(()) } } @@ -1798,32 +2015,90 @@ impl fmt::Display for SelectInto { } } +/// ClickHouse supports GROUP BY WITH modifiers(includes ROLLUP|CUBE|TOTALS). +/// e.g. GROUP BY year WITH ROLLUP WITH TOTALS +/// +/// [ClickHouse]: +#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum GroupByWithModifier { + Rollup, + Cube, + Totals, +} + +impl fmt::Display for GroupByWithModifier { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + GroupByWithModifier::Rollup => write!(f, "WITH ROLLUP"), + GroupByWithModifier::Cube => write!(f, "WITH CUBE"), + GroupByWithModifier::Totals => write!(f, "WITH TOTALS"), + } + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum GroupByExpr { - /// ALL syntax of [Snowflake], and [DuckDB] + /// ALL syntax of [Snowflake], [DuckDB] and [ClickHouse]. /// /// [Snowflake]: /// [DuckDB]: - All, + /// [ClickHouse]: + /// + /// ClickHouse also supports WITH modifiers after GROUP BY ALL and expressions. + /// + /// [ClickHouse]: + All(Vec), /// Expressions - Expressions(Vec), + Expressions(Vec, Vec), } impl fmt::Display for GroupByExpr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - GroupByExpr::All => write!(f, "GROUP BY ALL"), - GroupByExpr::Expressions(col_names) => { + GroupByExpr::All(modifiers) => { + write!(f, "GROUP BY ALL")?; + if !modifiers.is_empty() { + write!(f, " {}", display_separated(modifiers, " "))?; + } + Ok(()) + } + GroupByExpr::Expressions(col_names, modifiers) => { let col_names = display_comma_separated(col_names); - write!(f, "GROUP BY ({col_names})") + write!(f, "GROUP BY {col_names}")?; + if !modifiers.is_empty() { + write!(f, " {}", display_separated(modifiers, " "))?; + } + Ok(()) } } } } +/// FORMAT identifier or FORMAT NULL clause, specific to ClickHouse. +/// +/// [ClickHouse]: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum FormatClause { + Identifier(Ident), + Null, +} + +impl fmt::Display for FormatClause { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + FormatClause::Identifier(ident) => write!(f, "FORMAT {}", ident), + FormatClause::Null => write!(f, "FORMAT NULL"), + } + } +} + /// FOR XML or FOR JSON clause, specific to MSSQL /// (formats the output of a query as XML or JSON) #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index 57dcca2e5..1b8a43802 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -533,6 +533,7 @@ where /// null_treatment: None, /// filter: None, /// over: None, +/// parameters: FunctionArguments::None, /// within_group: vec![], /// }); /// } diff --git a/src/dialect/bigquery.rs b/src/dialect/bigquery.rs index d65de3a47..d3673337f 100644 --- a/src/dialect/bigquery.rs +++ b/src/dialect/bigquery.rs @@ -22,6 +22,10 @@ impl Dialect for BigQueryDialect { ch == '`' } + fn supports_projection_trailing_commas(&self) -> bool { + true + } + fn is_identifier_start(&self, ch: char) -> bool { ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_' } diff --git a/src/dialect/duckdb.rs b/src/dialect/duckdb.rs index e141f941f..1fc211685 100644 --- a/src/dialect/duckdb.rs +++ b/src/dialect/duckdb.rs @@ -18,6 +18,10 @@ pub struct DuckDbDialect; // In most cases the redshift dialect is identical to [`PostgresSqlDialect`]. impl Dialect for DuckDbDialect { + fn supports_trailing_commas(&self) -> bool { + true + } + fn is_identifier_start(&self, ch: char) -> bool { ch.is_alphabetic() || ch == '_' } @@ -44,4 +48,11 @@ impl Dialect for DuckDbDialect { fn supports_dictionary_syntax(&self) -> bool { true } + + // DuckDB uses this syntax for `MAP`s. + // + // https://duckdb.org/docs/sql/data_types/map.html#creating-maps + fn support_map_literal_syntax(&self) -> bool { + true + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 33391d479..8d762d780 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -70,4 +70,8 @@ impl Dialect for GenericDialect { fn supports_select_wildcard_except(&self) -> bool { true } + + fn support_map_literal_syntax(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index da5c8c5ac..3ff7bb2a5 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -122,6 +122,12 @@ pub trait Dialect: Debug + Any { fn is_identifier_start(&self, ch: char) -> bool; /// Determine if a character is a valid unquoted identifier character fn is_identifier_part(&self, ch: char) -> bool; + + /// Most dialects do not have custom operators. Override this method to provide custom operators. + fn is_custom_operator_part(&self, _ch: char) -> bool { + false + } + /// Determine if the dialect supports escaping characters via '\' in string literals. /// /// Some dialects like BigQuery and Snowflake support this while others like @@ -209,6 +215,11 @@ pub trait Dialect: Debug + Any { fn supports_dictionary_syntax(&self) -> bool { false } + /// Returns true if the dialect supports defining object using the + /// syntax like `Map {1: 10, 2: 20}`. + fn support_map_literal_syntax(&self) -> bool { + false + } /// Returns true if the dialect supports lambda functions, for example: /// /// ```sql @@ -251,6 +262,14 @@ pub trait Dialect: Debug + Any { // return None to fall back to the default behavior None } + /// Does the dialect support trailing commas around the query? + fn supports_trailing_commas(&self) -> bool { + false + } + /// Does the dialect support trailing commas in the projection list? + fn supports_projection_trailing_commas(&self) -> bool { + self.supports_trailing_commas() + } /// Dialect-specific infix parser override fn parse_infix( &self, diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index f179111e0..0e04bfa27 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -25,6 +25,10 @@ impl Dialect for PostgreSqlDialect { Some('"') } + fn is_delimited_identifier_start(&self, ch: char) -> bool { + ch == '"' // Postgres does not support backticks to quote identifiers + } + fn is_identifier_start(&self, ch: char) -> bool { // See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS // We don't yet support identifiers beginning with "letters with @@ -36,6 +40,29 @@ impl Dialect for PostgreSqlDialect { ch.is_alphabetic() || ch.is_ascii_digit() || ch == '$' || ch == '_' } + /// See + fn is_custom_operator_part(&self, ch: char) -> bool { + matches!( + ch, + '+' | '-' + | '*' + | '/' + | '<' + | '>' + | '=' + | '~' + | '!' + | '@' + | '#' + | '%' + | '^' + | '&' + | '|' + | '`' + | '?' + ) + } + fn parse_statement(&self, parser: &mut Parser) -> Option> { if parser.parse_keyword(Keyword::COMMENT) { Some(parse_comment(parser)) diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 19267d7c5..9f1d7f27b 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -12,11 +12,14 @@ #[cfg(not(feature = "std"))] use crate::alloc::string::ToString; +use crate::ast::helpers::stmt_create_table::CreateTableBuilder; use crate::ast::helpers::stmt_data_loading::{ DataLoadingOption, DataLoadingOptionType, DataLoadingOptions, StageLoadSelectItem, StageParamsObject, }; -use crate::ast::{Ident, ObjectName, Statement}; +use crate::ast::{ + CommentDef, Ident, ObjectName, RowAccessPolicy, Statement, Tag, WrappedCollection, +}; use crate::dialect::Dialect; use crate::keywords::Keyword; use crate::parser::{Parser, ParserError}; @@ -38,6 +41,10 @@ impl Dialect for SnowflakeDialect { ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_' } + fn supports_projection_trailing_commas(&self) -> bool { + true + } + fn is_identifier_part(&self, ch: char) -> bool { ch.is_ascii_lowercase() || ch.is_ascii_uppercase() @@ -71,6 +78,12 @@ impl Dialect for SnowflakeDialect { true } + // Snowflake doesn't document this but `FIRST_VALUE(arg, { IGNORE | RESPECT } NULLS)` + // works (i.e. inside the argument list instead of after). + fn supports_window_function_null_treatment_arg(&self) -> bool { + true + } + /// See [doc](https://docs.snowflake.com/en/sql-reference/sql/set#syntax) fn supports_parenthesized_set_variables(&self) -> bool { true @@ -81,12 +94,36 @@ impl Dialect for SnowflakeDialect { // possibly CREATE STAGE //[ OR REPLACE ] let or_replace = parser.parse_keywords(&[Keyword::OR, Keyword::REPLACE]); - //[ TEMPORARY ] - let temporary = parser.parse_keyword(Keyword::TEMPORARY); + // LOCAL | GLOBAL + let global = match parser.parse_one_of_keywords(&[Keyword::LOCAL, Keyword::GLOBAL]) { + Some(Keyword::LOCAL) => Some(false), + Some(Keyword::GLOBAL) => Some(true), + _ => None, + }; + + let mut temporary = false; + let mut volatile = false; + let mut transient = false; + + match parser.parse_one_of_keywords(&[ + Keyword::TEMP, + Keyword::TEMPORARY, + Keyword::VOLATILE, + Keyword::TRANSIENT, + ]) { + Some(Keyword::TEMP | Keyword::TEMPORARY) => temporary = true, + Some(Keyword::VOLATILE) => volatile = true, + Some(Keyword::TRANSIENT) => transient = true, + _ => {} + } if parser.parse_keyword(Keyword::STAGE) { // OK - this is CREATE STAGE statement return Some(parse_create_stage(or_replace, temporary, parser)); + } else if parser.parse_keyword(Keyword::TABLE) { + return Some(parse_create_table( + or_replace, global, temporary, volatile, transient, parser, + )); } else { // need to go back with the cursor let mut back = 1; @@ -110,6 +147,196 @@ impl Dialect for SnowflakeDialect { } } +/// Parse snowflake create table statement. +/// +pub fn parse_create_table( + or_replace: bool, + global: Option, + temporary: bool, + volatile: bool, + transient: bool, + parser: &mut Parser, +) -> Result { + let if_not_exists = parser.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let table_name = parser.parse_object_name(false)?; + + let mut builder = CreateTableBuilder::new(table_name) + .or_replace(or_replace) + .if_not_exists(if_not_exists) + .temporary(temporary) + .transient(transient) + .volatile(volatile) + .global(global) + .hive_formats(Some(Default::default())); + + // Snowflake does not enforce order of the parameters in the statement. The parser needs to + // parse the statement in a loop. + // + // "CREATE TABLE x COPY GRANTS (c INT)" and "CREATE TABLE x (c INT) COPY GRANTS" are both + // accepted by Snowflake + + loop { + let next_token = parser.next_token(); + match &next_token.token { + Token::Word(word) => match word.keyword { + Keyword::COPY => { + parser.expect_keyword(Keyword::GRANTS)?; + builder = builder.copy_grants(true); + } + Keyword::COMMENT => { + parser.expect_token(&Token::Eq)?; + let next_token = parser.next_token(); + let comment = match next_token.token { + Token::SingleQuotedString(str) => Some(CommentDef::WithEq(str)), + _ => parser.expected("comment", next_token)?, + }; + builder = builder.comment(comment); + } + Keyword::AS => { + let query = parser.parse_boxed_query()?; + builder = builder.query(Some(query)); + break; + } + Keyword::CLONE => { + let clone = parser.parse_object_name(false).ok(); + builder = builder.clone_clause(clone); + break; + } + Keyword::LIKE => { + let like = parser.parse_object_name(false).ok(); + builder = builder.like(like); + break; + } + Keyword::CLUSTER => { + parser.expect_keyword(Keyword::BY)?; + parser.expect_token(&Token::LParen)?; + let cluster_by = Some(WrappedCollection::Parentheses( + parser.parse_comma_separated(|p| p.parse_identifier(false))?, + )); + parser.expect_token(&Token::RParen)?; + + builder = builder.cluster_by(cluster_by) + } + Keyword::ENABLE_SCHEMA_EVOLUTION => { + parser.expect_token(&Token::Eq)?; + let enable_schema_evolution = + match parser.parse_one_of_keywords(&[Keyword::TRUE, Keyword::FALSE]) { + Some(Keyword::TRUE) => true, + Some(Keyword::FALSE) => false, + _ => { + return parser.expected("TRUE or FALSE", next_token); + } + }; + + builder = builder.enable_schema_evolution(Some(enable_schema_evolution)); + } + Keyword::CHANGE_TRACKING => { + parser.expect_token(&Token::Eq)?; + let change_tracking = + match parser.parse_one_of_keywords(&[Keyword::TRUE, Keyword::FALSE]) { + Some(Keyword::TRUE) => true, + Some(Keyword::FALSE) => false, + _ => { + return parser.expected("TRUE or FALSE", next_token); + } + }; + + builder = builder.change_tracking(Some(change_tracking)); + } + Keyword::DATA_RETENTION_TIME_IN_DAYS => { + parser.expect_token(&Token::Eq)?; + let data_retention_time_in_days = parser.parse_literal_uint()?; + builder = + builder.data_retention_time_in_days(Some(data_retention_time_in_days)); + } + Keyword::MAX_DATA_EXTENSION_TIME_IN_DAYS => { + parser.expect_token(&Token::Eq)?; + let max_data_extension_time_in_days = parser.parse_literal_uint()?; + builder = builder + .max_data_extension_time_in_days(Some(max_data_extension_time_in_days)); + } + Keyword::DEFAULT_DDL_COLLATION => { + parser.expect_token(&Token::Eq)?; + let default_ddl_collation = parser.parse_literal_string()?; + builder = builder.default_ddl_collation(Some(default_ddl_collation)); + } + // WITH is optional, we just verify that next token is one of the expected ones and + // fallback to the default match statement + Keyword::WITH => { + parser.expect_one_of_keywords(&[ + Keyword::AGGREGATION, + Keyword::TAG, + Keyword::ROW, + ])?; + parser.prev_token(); + } + Keyword::AGGREGATION => { + parser.expect_keyword(Keyword::POLICY)?; + let aggregation_policy = parser.parse_object_name(false)?; + builder = builder.with_aggregation_policy(Some(aggregation_policy)); + } + Keyword::ROW => { + parser.expect_keywords(&[Keyword::ACCESS, Keyword::POLICY])?; + let policy = parser.parse_object_name(false)?; + parser.expect_keyword(Keyword::ON)?; + parser.expect_token(&Token::LParen)?; + let columns = parser.parse_comma_separated(|p| p.parse_identifier(false))?; + parser.expect_token(&Token::RParen)?; + + builder = + builder.with_row_access_policy(Some(RowAccessPolicy::new(policy, columns))) + } + Keyword::TAG => { + fn parse_tag(parser: &mut Parser) -> Result { + let name = parser.parse_identifier(false)?; + parser.expect_token(&Token::Eq)?; + let value = parser.parse_literal_string()?; + + Ok(Tag::new(name, value)) + } + + parser.expect_token(&Token::LParen)?; + let tags = parser.parse_comma_separated(parse_tag)?; + parser.expect_token(&Token::RParen)?; + builder = builder.with_tags(Some(tags)); + } + _ => { + return parser.expected("end of statement", next_token); + } + }, + Token::LParen => { + parser.prev_token(); + let (columns, constraints) = parser.parse_columns()?; + builder = builder.columns(columns).constraints(constraints); + } + Token::EOF => { + if builder.columns.is_empty() { + return Err(ParserError::ParserError( + "unexpected end of input".to_string(), + )); + } + + break; + } + Token::SemiColon => { + if builder.columns.is_empty() { + return Err(ParserError::ParserError( + "unexpected end of input".to_string(), + )); + } + + parser.prev_token(); + break; + } + _ => { + return parser.expected("end of statement", next_token); + } + } + } + + Ok(builder.build()) +} + pub fn parse_create_stage( or_replace: bool, temporary: bool, diff --git a/src/keywords.rs b/src/keywords.rs index e67fffd97..e59e49339 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -70,11 +70,14 @@ define_keywords!( ABORT, ABS, ABSOLUTE, + ACCESS, ACTION, ADD, ADMIN, AFTER, AGAINST, + AGGREGATION, + ALIAS, ALL, ALLOCATE, ALTER, @@ -91,6 +94,7 @@ define_keywords!( AS, ASC, ASENSITIVE, + ASOF, ASSERT, ASYMMETRIC, AT, @@ -137,6 +141,7 @@ define_keywords!( CENTURY, CHAIN, CHANGE, + CHANGE_TRACKING, CHANNEL, CHAR, CHARACTER, @@ -200,8 +205,11 @@ define_keywords!( CYCLE, DATA, DATABASE, + DATA_RETENTION_TIME_IN_DAYS, DATE, + DATE32, DATETIME, + DATETIME64, DAY, DAYOFWEEK, DAYOFYEAR, @@ -211,6 +219,7 @@ define_keywords!( DECIMAL, DECLARE, DEFAULT, + DEFAULT_DDL_COLLATION, DEFERRABLE, DEFERRED, DEFINE, @@ -248,6 +257,7 @@ define_keywords!( ELSE, EMPTY, ENABLE, + ENABLE_SCHEMA_EVOLUTION, ENCODING, ENCRYPTION, END, @@ -258,6 +268,7 @@ define_keywords!( ENFORCED, ENGINE, ENUM, + EPHEMERAL, EPOCH, EQUALS, ERROR, @@ -288,10 +299,13 @@ define_keywords!( FILE, FILES, FILE_FORMAT, + FILL, FILTER, FIRST, FIRST_VALUE, + FIXEDSTRING, FLOAT, + FLOAT32, FLOAT4, FLOAT64, FLOAT8, @@ -325,6 +339,7 @@ define_keywords!( GLOBAL, GRANT, GRANTED, + GRANTS, GRAPHVIZ, GROUP, GROUPING, @@ -361,11 +376,16 @@ define_keywords!( INSERT, INSTALL, INT, + INT128, + INT16, INT2, + INT256, + INT32, INT4, INT64, INT8, INTEGER, + INTERPOLATE, INTERSECT, INTERSECTION, INTERVAL, @@ -410,6 +430,7 @@ define_keywords!( LOCKED, LOGIN, LOGS, + LOWCARDINALITY, LOWER, LOW_PRIORITY, MACRO, @@ -418,10 +439,12 @@ define_keywords!( MATCH, MATCHED, MATCHES, + MATCH_CONDITION, MATCH_RECOGNIZE, MATERIALIZED, MAX, MAXVALUE, + MAX_DATA_EXTENSION_TIME_IN_DAYS, MEASURES, MEDIUMINT, MEMBER, @@ -453,6 +476,7 @@ define_keywords!( NATURAL, NCHAR, NCLOB, + NESTED, NEW, NEXT, NO, @@ -473,6 +497,7 @@ define_keywords!( NTH_VALUE, NTILE, NULL, + NULLABLE, NULLIF, NULLS, NUMERIC, @@ -497,6 +522,7 @@ define_keywords!( OR, ORC, ORDER, + ORDINALITY, OUT, OUTER, OUTPUTFORMAT, @@ -506,6 +532,7 @@ define_keywords!( OVERLAY, OVERWRITE, OWNED, + OWNER, PARALLEL, PARAMETER, PARQUET, @@ -526,6 +553,7 @@ define_keywords!( PIVOT, PLACING, PLANS, + POLICY, PORTION, POSITION, POSITION_REGEX, @@ -536,6 +564,7 @@ define_keywords!( PRECISION, PREPARE, PRESERVE, + PREWHERE, PRIMARY, PRIOR, PRIVILEGES, @@ -572,6 +601,7 @@ define_keywords!( RELATIVE, RELAY, RELEASE, + REMOTE, RENAME, REORG, REPAIR, @@ -627,6 +657,7 @@ define_keywords!( SESSION_USER, SET, SETS, + SETTINGS, SHARE, SHOW, SIMILAR, @@ -655,6 +686,7 @@ define_keywords!( STDDEV_SAMP, STDIN, STDOUT, + STEP, STORAGE_INTEGRATION, STORED, STRICT, @@ -676,6 +708,7 @@ define_keywords!( TABLE, TABLES, TABLESAMPLE, + TAG, TARGET, TBLPROPERTIES, TEMP, @@ -697,6 +730,7 @@ define_keywords!( TINYINT, TO, TOP, + TOTALS, TRAILING, TRANSACTION, TRANSIENT, @@ -710,8 +744,15 @@ define_keywords!( TRUE, TRUNCATE, TRY_CAST, + TUPLE, TYPE, UESCAPE, + UINT128, + UINT16, + UINT256, + UINT32, + UINT64, + UINT8, UNBOUNDED, UNCACHE, UNCOMMITTED, @@ -818,6 +859,12 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::FOR, // for MYSQL PARTITION SELECTION Keyword::PARTITION, + // for Clickhouse PREWHERE + Keyword::PREWHERE, + // for ClickHouse SELECT * FROM t SETTINGS ... + Keyword::SETTINGS, + // for ClickHouse SELECT * FROM t FORMAT... + Keyword::FORMAT, // for Snowflake START WITH .. CONNECT BY Keyword::START, Keyword::CONNECT, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a7ec4d093..e927b5a57 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -20,14 +20,18 @@ use alloc::{ vec, vec::Vec, }; -use core::fmt; +use core::{ + fmt::{self, Display}, + str::FromStr, +}; use log::debug; +use recursion::RecursionCounter; use IsLateral::*; use IsOptional::*; -use crate::ast::helpers::stmt_create_table::{BigQueryTableConfiguration, CreateTableBuilder}; +use crate::ast::helpers::stmt_create_table::{CreateTableBuilder, CreateTableConfiguration}; use crate::ast::*; use crate::dialect::*; use crate::keywords::{Keyword, ALL_KEYWORDS}; @@ -42,6 +46,9 @@ pub enum ParserError { RecursionLimitExceeded, } +// avoid clippy type_complexity warnings +type ParsedAction = (Keyword, Option>); + // Use `Parser::expected` instead, if possible macro_rules! parser_err { ($MSG:expr, $loc:expr) => { @@ -67,11 +74,11 @@ mod recursion { use super::ParserError; /// Tracks remaining recursion depth. This value is decremented on - /// each call to `try_decrease()`, when it reaches 0 an error will + /// each call to [`RecursionCounter::try_decrease()`], when it reaches 0 an error will /// be returned. /// - /// Note: Uses an Rc and Cell in order to satisfy the Rust - /// borrow checker so the automatic DepthGuard decrement a + /// Note: Uses an [`std::rc::Rc`] and [`std::cell::Cell`] in order to satisfy the Rust + /// borrow checker so the automatic [`DepthGuard`] decrement a /// reference to the counter. pub(crate) struct RecursionCounter { remaining_depth: Rc>, @@ -88,7 +95,7 @@ mod recursion { /// Decreases the remaining depth by 1. /// - /// Returns `Err` if the remaining depth falls to 0. + /// Returns [`Err`] if the remaining depth falls to 0. /// /// Returns a [`DepthGuard`] which will adds 1 to the /// remaining depth upon drop; @@ -104,7 +111,7 @@ mod recursion { } } - /// Guard that increass the remaining depth by 1 on drop + /// Guard that increases the remaining depth by 1 on drop pub struct DepthGuard { remaining_depth: Rc>, } @@ -127,7 +134,7 @@ mod recursion { /// Implementation [`RecursionCounter`] if std is NOT available (and does not /// guard against stack overflow). /// - /// Has the same API as the std RecursionCounter implementation + /// Has the same API as the std [`RecursionCounter`] implementation /// but does not actually limit stack depth. pub(crate) struct RecursionCounter {} @@ -143,8 +150,6 @@ mod recursion { pub struct DepthGuard {} } -use recursion::RecursionCounter; - #[derive(PartialEq, Eq)] pub enum IsOptional { Optional, @@ -192,7 +197,7 @@ const DEFAULT_REMAINING_DEPTH: usize = 50; /// nested such that the following declaration is possible: /// `ARRAY>` /// But the tokenizer recognizes the `>>` as a ShiftRight token. -/// We work-around that limitation when parsing a data type by accepting +/// We work around that limitation when parsing a data type by accepting /// either a `>` or `>>` token in such cases, remembering which variant we /// matched. /// In the latter case having matched a `>>`, the parent type will not look to @@ -268,17 +273,17 @@ enum ParserState { pub struct Parser<'a> { tokens: Vec, - /// The index of the first unprocessed token in `self.tokens` + /// The index of the first unprocessed token in [`Parser::tokens`]. index: usize, /// The current state of the parser. state: ParserState, - /// The current dialect to use + /// The current dialect to use. dialect: &'a dyn Dialect, /// Additional options that allow you to mix & match behavior /// otherwise constrained to certain dialects (e.g. trailing - /// commas) and/or format of parse (e.g. unescaping) + /// commas) and/or format of parse (e.g. unescaping). options: ParserOptions, - /// ensure the stack does not overflow by limiting recursion depth + /// Ensure the stack does not overflow by limiting recursion depth. recursion_counter: RecursionCounter, } @@ -305,13 +310,12 @@ impl<'a> Parser<'a> { state: ParserState::Normal, dialect, recursion_counter: RecursionCounter::new(DEFAULT_REMAINING_DEPTH), - options: ParserOptions::default(), + options: ParserOptions::new().with_trailing_commas(dialect.supports_trailing_commas()), } } /// Specify the maximum recursion limit while parsing. /// - /// /// [`Parser`] prevents stack overflows by returning /// [`ParserError::RecursionLimitExceeded`] if the parser exceeds /// this depth while processing the query. @@ -336,7 +340,6 @@ impl<'a> Parser<'a> { /// Specify additional parser options /// - /// /// [`Parser`] supports additional options ([`ParserOptions`]) /// that allow you to mix & match behavior otherwise constrained /// to certain dialects (e.g. trailing commas). @@ -822,7 +825,7 @@ impl<'a> Parser<'a> { }) } - /// Parse a new expression including wildcard & qualified wildcard + /// Parse a new expression including wildcard & qualified wildcard. pub fn parse_wildcard_expr(&mut self) -> Result { let index = self.index; @@ -865,13 +868,13 @@ impl<'a> Parser<'a> { self.parse_expr() } - /// Parse a new expression + /// Parse a new expression. pub fn parse_expr(&mut self) -> Result { let _guard = self.recursion_counter.try_decrease()?; self.parse_subexpr(0) } - /// Parse tokens until the precedence changes + /// Parse tokens until the precedence changes. pub fn parse_subexpr(&mut self, precedence: u8) -> Result { debug!("parsing expr"); let mut expr = self.parse_prefix()?; @@ -906,8 +909,7 @@ impl<'a> Parser<'a> { Ok(expr) } - /// Get the precedence of the next token - /// With AND, OR, and XOR + /// Get the precedence of the next token, with AND, OR, and XOR. pub fn get_next_interval_precedence(&self) -> Result { let token = self.peek_token(); @@ -942,7 +944,7 @@ impl<'a> Parser<'a> { Ok(Statement::ReleaseSavepoint { name }) } - /// Parse an expression prefix + /// Parse an expression prefix. pub fn parse_prefix(&mut self) -> Result { // allow the dialect to override prefix parsing if let Some(prefix) = self.dialect.parse_prefix(self) { @@ -999,6 +1001,7 @@ impl<'a> Parser<'a> { { Ok(Expr::Function(Function { name: ObjectName(vec![w.to_ident()]), + parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, filter: None, @@ -1055,6 +1058,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; Ok(Expr::Function(Function { name: ObjectName(vec![w.to_ident()]), + parameters: FunctionArguments::None, args: FunctionArguments::Subquery(query), filter: None, null_treatment: None, @@ -1074,7 +1078,10 @@ impl<'a> Parser<'a> { let expr = self.parse_subexpr(Self::PLUS_MINUS_PREC)?; Ok(Expr::Prior(Box::new(expr))) } - // Here `w` is a word, check if it's a part of a multi-part + Keyword::MAP if self.peek_token() == Token::LBrace && self.dialect.support_map_literal_syntax() => { + self.parse_duckdb_map_literal() + } + // Here `w` is a word, check if it's a part of a multipart // identifier, a function call, or a simple identifier: _ => match self.peek_token().token { Token::LParen | Token::Period => { @@ -1204,20 +1211,18 @@ impl<'a> Parser<'a> { Ok(Expr::Value(self.parse_value()?)) } Token::LParen => { - let expr = - if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) { - self.prev_token(); - Expr::Subquery(self.parse_boxed_query()?) - } else if let Some(lambda) = self.try_parse_lambda() { - return Ok(lambda); - } else { - let exprs = self.parse_comma_separated(Parser::parse_expr)?; - match exprs.len() { - 0 => unreachable!(), // parse_comma_separated ensures 1 or more - 1 => Expr::Nested(Box::new(exprs.into_iter().next().unwrap())), - _ => Expr::Tuple(exprs), - } - }; + let expr = if let Some(expr) = self.try_parse_expr_sub_query()? { + expr + } else if let Some(lambda) = self.try_parse_lambda() { + return Ok(lambda); + } else { + let exprs = self.parse_comma_separated(Parser::parse_expr)?; + match exprs.len() { + 0 => unreachable!(), // parse_comma_separated ensures 1 or more + 1 => Expr::Nested(Box::new(exprs.into_iter().next().unwrap())), + _ => Expr::Tuple(exprs), + } + }; self.expect_token(&Token::RParen)?; if !self.consume_token(&Token::Period) { Ok(expr) @@ -1259,6 +1264,18 @@ impl<'a> Parser<'a> { } } + fn try_parse_expr_sub_query(&mut self) -> Result, ParserError> { + if self + .parse_one_of_keywords(&[Keyword::SELECT, Keyword::WITH]) + .is_none() + { + return Ok(None); + } + self.prev_token(); + + Ok(Some(Expr::Subquery(self.parse_boxed_query()?))) + } + fn try_parse_lambda(&mut self) -> Option { if !self.dialect.supports_lambda_functions() { return None; @@ -1290,6 +1307,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; return Ok(Expr::Function(Function { name, + parameters: FunctionArguments::None, args: FunctionArguments::Subquery(subquery), filter: None, null_treatment: None, @@ -1298,7 +1316,16 @@ impl<'a> Parser<'a> { })); } - let args = self.parse_function_argument_list()?; + let mut args = self.parse_function_argument_list()?; + let mut parameters = FunctionArguments::None; + // ClickHouse aggregations support parametric functions like `HISTOGRAM(0.5, 0.6)(x, y)` + // which (0.5, 0.6) is a parameter to the function. + if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.consume_token(&Token::LParen) + { + parameters = FunctionArguments::List(args); + args = self.parse_function_argument_list()?; + } let within_group = if self.parse_keywords(&[Keyword::WITHIN, Keyword::GROUP]) { self.expect_token(&Token::LParen)?; @@ -1347,6 +1374,7 @@ impl<'a> Parser<'a> { Ok(Expr::Function(Function { name, + parameters, args: FunctionArguments::List(args), null_treatment, filter, @@ -1379,6 +1407,7 @@ impl<'a> Parser<'a> { }; Ok(Expr::Function(Function { name, + parameters: FunctionArguments::None, args, filter: None, over: None, @@ -1440,8 +1469,7 @@ impl<'a> Parser<'a> { } } - /// parse a group by expr. a group by expr can be one of group sets, roll up, cube, or simple - /// expr. + /// Parse a group by expr. Group by expr can be one of group sets, roll up, cube, or simple expr. fn parse_group_by_expr(&mut self) -> Result { if self.dialect.supports_group_by_expr() { if self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) { @@ -1459,6 +1487,11 @@ impl<'a> Parser<'a> { let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?; self.expect_token(&Token::RParen)?; Ok(Expr::Rollup(result)) + } else if self.consume_tokens(&[Token::LParen, Token::RParen]) { + // PostgreSQL allow to use empty tuple as a group by expression, + // e.g. `GROUP BY (), name`. Please refer to GROUP BY Clause section in + // [PostgreSQL](https://www.postgresql.org/docs/16/sql-select.html) + Ok(Expr::Tuple(vec![])) } else { self.parse_expr() } @@ -1468,7 +1501,7 @@ impl<'a> Parser<'a> { } } - /// parse a tuple with `(` and `)`. + /// Parse a tuple with `(` and `)`. /// If `lift_singleton` is true, then a singleton tuple is lifted to a tuple of length 1, otherwise it will fail. /// If `allow_empty` is true, then an empty tuple is allowed. fn parse_tuple( @@ -1937,13 +1970,11 @@ impl<'a> Parser<'a> { } } - /// Parses fulltext expressions [(1)] + /// Parses fulltext expressions [`sqlparser::ast::Expr::MatchAgainst`] /// /// # Errors /// This method will raise an error if the column list is empty or with invalid identifiers, /// the match expression is not a literal string, or if the search modifier is not valid. - /// - /// [(1)]: Expr::MatchAgainst pub fn parse_match_against(&mut self) -> Result { let columns = self.parse_parenthesized_column_list(Mandatory, false)?; @@ -1988,17 +2019,19 @@ impl<'a> Parser<'a> { }) } - /// Parse an INTERVAL expression. + /// Parse an `INTERVAL` expression. /// /// Some syntactically valid intervals: /// - /// 1. `INTERVAL '1' DAY` - /// 2. `INTERVAL '1-1' YEAR TO MONTH` - /// 3. `INTERVAL '1' SECOND` - /// 4. `INTERVAL '1:1:1.1' HOUR (5) TO SECOND (5)` - /// 5. `INTERVAL '1.1' SECOND (2, 2)` - /// 6. `INTERVAL '1:1' HOUR (5) TO MINUTE (5)` - /// 7. (MySql and BigQuey only):`INTERVAL 1 DAY` + /// ```sql + /// 1. INTERVAL '1' DAY + /// 2. INTERVAL '1-1' YEAR TO MONTH + /// 3. INTERVAL '1' SECOND + /// 4. INTERVAL '1:1:1.1' HOUR (5) TO SECOND (5) + /// 5. INTERVAL '1.1' SECOND (2, 2) + /// 6. INTERVAL '1:1' HOUR (5) TO MINUTE (5) + /// 7. (MySql & BigQuery only): INTERVAL 1 DAY + /// ``` /// /// Note that we do not currently attempt to parse the quoted value. pub fn parse_interval(&mut self) -> Result { @@ -2099,7 +2132,7 @@ impl<'a> Parser<'a> { /// ``` fn parse_bigquery_struct_literal(&mut self) -> Result { let (fields, trailing_bracket) = - self.parse_struct_type_def(Self::parse_big_query_struct_field_def)?; + self.parse_struct_type_def(Self::parse_struct_field_def)?; if trailing_bracket.0 { return parser_err!("unmatched > in STRUCT literal", self.peek_token().location); } @@ -2194,13 +2227,16 @@ impl<'a> Parser<'a> { )) } - /// Parse a field definition in a BigQuery struct. + /// Parse a field definition in a [struct] or [tuple]. /// Syntax: /// /// ```sql /// [field_name] field_type /// ``` - fn parse_big_query_struct_field_def( + /// + /// [struct]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declaring_a_struct_type + /// [tuple]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple + fn parse_struct_field_def( &mut self, ) -> Result<(StructField, MatchedTrailingBracket), ParserError> { // Look beyond the next item to infer whether both field name @@ -2227,7 +2263,33 @@ impl<'a> Parser<'a> { )) } - /// DuckDB specific: Parse a duckdb dictionary [1] + /// DuckDB specific: Parse a Union type definition as a sequence of field-value pairs. + /// + /// Syntax: + /// + /// ```sql + /// UNION(field_name field_type[,...]) + /// ``` + /// + /// [1]: https://duckdb.org/docs/sql/data_types/union.html + fn parse_union_type_def(&mut self) -> Result, ParserError> { + self.expect_keyword(Keyword::UNION)?; + + self.expect_token(&Token::LParen)?; + + let fields = self.parse_comma_separated(|p| { + Ok(UnionField { + field_name: p.parse_identifier(false)?, + field_type: p.parse_data_type()?, + }) + })?; + + self.expect_token(&Token::RParen)?; + + Ok(fields) + } + + /// DuckDB specific: Parse a duckdb [dictionary] /// /// Syntax: /// @@ -2235,7 +2297,7 @@ impl<'a> Parser<'a> { /// {'field_name': expr1[, ... ]} /// ``` /// - /// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs + /// [dictionary]: https://duckdb.org/docs/sql/data_types/struct#creating-structs fn parse_duckdb_struct_literal(&mut self) -> Result { self.expect_token(&Token::LBrace)?; @@ -2246,13 +2308,15 @@ impl<'a> Parser<'a> { Ok(Expr::Dictionary(fields)) } - /// Parse a field for a duckdb dictionary [1] + /// Parse a field for a duckdb [dictionary] + /// /// Syntax + /// /// ```sql /// 'name': expr /// ``` /// - /// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs + /// [dictionary]: https://duckdb.org/docs/sql/data_types/struct#creating-structs fn parse_duckdb_dictionary_field(&mut self) -> Result { let key = self.parse_identifier(false)?; @@ -2266,6 +2330,92 @@ impl<'a> Parser<'a> { }) } + /// DuckDB specific: Parse a duckdb [map] + /// + /// Syntax: + /// + /// ```sql + /// Map {key1: value1[, ... ]} + /// ``` + /// + /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps + fn parse_duckdb_map_literal(&mut self) -> Result { + self.expect_token(&Token::LBrace)?; + + let fields = self.parse_comma_separated(Self::parse_duckdb_map_field)?; + + self.expect_token(&Token::RBrace)?; + + Ok(Expr::Map(Map { entries: fields })) + } + + /// Parse a field for a duckdb [map] + /// + /// Syntax + /// + /// ```sql + /// key: value + /// ``` + /// + /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps + fn parse_duckdb_map_field(&mut self) -> Result { + let key = self.parse_expr()?; + + self.expect_token(&Token::Colon)?; + + let value = self.parse_expr()?; + + Ok(MapEntry { + key: Box::new(key), + value: Box::new(value), + }) + } + + /// Parse clickhouse [map] + /// + /// Syntax + /// + /// ```sql + /// Map(key_data_type, value_data_type) + /// ``` + /// + /// [map]: https://clickhouse.com/docs/en/sql-reference/data-types/map + fn parse_click_house_map_def(&mut self) -> Result<(DataType, DataType), ParserError> { + self.expect_keyword(Keyword::MAP)?; + self.expect_token(&Token::LParen)?; + let key_data_type = self.parse_data_type()?; + self.expect_token(&Token::Comma)?; + let value_data_type = self.parse_data_type()?; + self.expect_token(&Token::RParen)?; + + Ok((key_data_type, value_data_type)) + } + + /// Parse clickhouse [tuple] + /// + /// Syntax + /// + /// ```sql + /// Tuple([field_name] field_type, ...) + /// ``` + /// + /// [tuple]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple + fn parse_click_house_tuple_def(&mut self) -> Result, ParserError> { + self.expect_keyword(Keyword::TUPLE)?; + self.expect_token(&Token::LParen)?; + let mut field_defs = vec![]; + loop { + let (def, _) = self.parse_struct_field_def()?; + field_defs.push(def); + if !self.consume_token(&Token::Comma) { + break; + } + } + self.expect_token(&Token::RParen)?; + + Ok(field_defs) + } + /// For nested types that use the angle bracket syntax, this matches either /// `>`, `>>` or nothing depending on which variant is expected (specified by the previously /// matched `trailing_bracket` argument). It returns whether there is a trailing @@ -2300,9 +2450,8 @@ impl<'a> Parser<'a> { return infix; } - let tok = self.next_token(); - - let regular_binary_operator = match &tok.token { + let mut tok = self.next_token(); + let regular_binary_operator = match &mut tok.token { Token::Spaceship => Some(BinaryOperator::Spaceship), Token::DoubleEq => Some(BinaryOperator::Eq), Token::Eq => Some(BinaryOperator::Eq), @@ -2366,6 +2515,7 @@ impl<'a> Parser<'a> { Token::Question => Some(BinaryOperator::Question), Token::QuestionAnd => Some(BinaryOperator::QuestionAnd), Token::QuestionPipe => Some(BinaryOperator::QuestionPipe), + Token::CustomBinaryOperator(s) => Some(BinaryOperator::Custom(core::mem::take(s))), Token::Word(w) => match w.keyword { Keyword::AND => Some(BinaryOperator::And), @@ -2469,26 +2619,11 @@ impl<'a> Parser<'a> { } } Keyword::AT => { - // if self.parse_keyword(Keyword::TIME) { - // self.expect_keyword(Keyword::ZONE)?; - if self.parse_keywords(&[Keyword::TIME, Keyword::ZONE]) { - let time_zone = self.next_token(); - match time_zone.token { - Token::SingleQuotedString(time_zone) => { - log::trace!("Peek token: {:?}", self.peek_token()); - Ok(Expr::AtTimeZone { - timestamp: Box::new(expr), - time_zone, - }) - } - _ => self.expected( - "Expected Token::SingleQuotedString after AT TIME ZONE", - time_zone, - ), - } - } else { - self.expected("Expected Token::Word after AT", tok) - } + self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; + Ok(Expr::AtTimeZone { + timestamp: Box::new(expr), + time_zone: Box::new(self.parse_subexpr(precedence)?), + }) } Keyword::NOT | Keyword::IN @@ -2545,35 +2680,12 @@ impl<'a> Parser<'a> { ), } } else if Token::DoubleColon == tok { - let data_type = self.parse_data_type()?; - - let cast_expr = Expr::Cast { + Ok(Expr::Cast { kind: CastKind::DoubleColon, expr: Box::new(expr), - data_type: data_type.clone(), + data_type: self.parse_data_type()?, format: None, - }; - - match data_type { - DataType::Date - | DataType::Datetime(_) - | DataType::Timestamp(_, _) - | DataType::Time(_, _) => { - let value = self.parse_optional_time_zone()?; - match value { - Some(Value::SingleQuotedString(tz)) => Ok(Expr::AtTimeZone { - timestamp: Box::new(cast_expr), - time_zone: tz, - }), - None => Ok(cast_expr), - _ => Err(ParserError::ParserError(format!( - "Expected Token::SingleQuotedString after AT TIME ZONE, but found: {}", - value.unwrap() - ))), - } - } - _ => Ok(cast_expr), - } + }) } else if Token::ExclamationMark == tok { // PostgreSQL factorial operation Ok(Expr::UnaryOp { @@ -2582,8 +2694,7 @@ impl<'a> Parser<'a> { }) } else if Token::LBracket == tok { if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) { - // parse index - self.parse_array_index(expr) + self.parse_subscript(expr) } else if dialect_of!(self is SnowflakeDialect) { self.prev_token(); self.parse_json_access(expr) @@ -2602,7 +2713,7 @@ impl<'a> Parser<'a> { } } - /// parse the ESCAPE CHAR portion of LIKE, ILIKE, and SIMILAR TO + /// Parse the `ESCAPE CHAR` portion of `LIKE`, `ILIKE`, and `SIMILAR TO` pub fn parse_escape_char(&mut self) -> Result, ParserError> { if self.parse_keyword(Keyword::ESCAPE) { Ok(Some(self.parse_literal_string()?)) @@ -2611,18 +2722,87 @@ impl<'a> Parser<'a> { } } - pub fn parse_array_index(&mut self, expr: Expr) -> Result { - let index = self.parse_expr()?; - self.expect_token(&Token::RBracket)?; - let mut indexes: Vec = vec![index]; - while self.consume_token(&Token::LBracket) { - let index = self.parse_expr()?; + /// Parses an array subscript like + /// * `[:]` + /// * `[l]` + /// * `[l:]` + /// * `[:u]` + /// * `[l:u]` + /// * `[l:u:s]` + /// + /// Parser is right after `[` + fn parse_subscript_inner(&mut self) -> Result { + // at either `:(rest)` or `:(rest)]` + let lower_bound = if self.consume_token(&Token::Colon) { + None + } else { + Some(self.parse_expr()?) + }; + + // check for end + if self.consume_token(&Token::RBracket) { + if let Some(lower_bound) = lower_bound { + return Ok(Subscript::Index { index: lower_bound }); + }; + return Ok(Subscript::Slice { + lower_bound, + upper_bound: None, + stride: None, + }); + } + + // consume the `:` + if lower_bound.is_some() { + self.expect_token(&Token::Colon)?; + } + + // we are now at either `]`, `(rest)]` + let upper_bound = if self.consume_token(&Token::RBracket) { + return Ok(Subscript::Slice { + lower_bound, + upper_bound: None, + stride: None, + }); + } else { + Some(self.parse_expr()?) + }; + + // check for end + if self.consume_token(&Token::RBracket) { + return Ok(Subscript::Slice { + lower_bound, + upper_bound, + stride: None, + }); + } + + // we are now at `:]` or `:stride]` + self.expect_token(&Token::Colon)?; + let stride = if self.consume_token(&Token::RBracket) { + None + } else { + Some(self.parse_expr()?) + }; + + if stride.is_some() { self.expect_token(&Token::RBracket)?; - indexes.push(index); } - Ok(Expr::ArrayIndex { - obj: Box::new(expr), - indexes, + + Ok(Subscript::Slice { + lower_bound, + upper_bound, + stride, + }) + } + + /// Parses an array subscript like `[1:3]` + /// + /// Parser is right after `[` + pub fn parse_subscript(&mut self, expr: Expr) -> Result { + let subscript = self.parse_subscript_inner()?; + Ok(Expr::Subscript { + expr: Box::new(expr), + subscript: Box::new(subscript), }) } @@ -2631,7 +2811,7 @@ impl<'a> Parser<'a> { match token.token { Token::Word(Word { value, - // path segments in SF dot notation can be unquoted or double quoted + // path segments in SF dot notation can be unquoted or double-quoted quote_style: quote_style @ (Some('"') | None), // some experimentation suggests that snowflake permits // any keyword here unquoted. @@ -2720,7 +2900,7 @@ impl<'a> Parser<'a> { }) } - /// Parses the parens following the `[ NOT ] IN` operator + /// Parses the parens following the `[ NOT ] IN` operator. pub fn parse_in(&mut self, expr: Expr, negated: bool) -> Result { // BigQuery allows `IN UNNEST(array_expression)` // https://cloud.google.com/bigquery/docs/reference/standard-sql/operators#in_operators @@ -2757,7 +2937,7 @@ impl<'a> Parser<'a> { Ok(in_op) } - /// Parses `BETWEEN AND `, assuming the `BETWEEN` keyword was already consumed + /// Parses `BETWEEN AND `, assuming the `BETWEEN` keyword was already consumed. pub fn parse_between(&mut self, expr: Expr, negated: bool) -> Result { // Stop parsing subexpressions for and on tokens with // precedence lower than that of `BETWEEN`, such as `AND`, `IS`, etc. @@ -2772,7 +2952,7 @@ impl<'a> Parser<'a> { }) } - /// Parse a postgresql casting style which is in the form of `expr::datatype` + /// Parse a postgresql casting style which is in the form of `expr::datatype`. pub fn parse_pg_cast(&mut self, expr: Expr) -> Result { Ok(Expr::Cast { kind: CastKind::DoubleColon, @@ -2782,12 +2962,16 @@ impl<'a> Parser<'a> { }) } - // use https://www.postgresql.org/docs/7.0/operators.htm#AEN2026 as a reference + // Use https://www.postgresql.org/docs/7.0/operators.htm#AEN2026 as a reference // higher number = higher precedence + // + // NOTE: The pg documentation is incomplete, e.g. the AT TIME ZONE operator + // actually has higher precedence than addition. + // See https://postgrespro.com/list/thread-id/2673331. + const AT_TZ_PREC: u8 = 41; const MUL_DIV_MOD_OP_PREC: u8 = 40; const PLUS_MINUS_PREC: u8 = 30; const XOR_PREC: u8 = 24; - const TIME_ZONE_PREC: u8 = 20; const BETWEEN_PREC: u8 = 20; const LIKE_PREC: u8 = 19; const IS_PREC: u8 = 17; @@ -2817,7 +3001,7 @@ impl<'a> Parser<'a> { (Token::Word(w), Token::Word(w2)) if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE => { - Ok(Self::TIME_ZONE_PREC) + Ok(Self::AT_TZ_PREC) } _ => Ok(0), } @@ -2826,7 +3010,7 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::NOT => match self.peek_nth_token(1).token { // The precedence of NOT varies depending on keyword that // follows it. If it is followed by IN, BETWEEN, or LIKE, - // it takes on the precedence of those tokens. Otherwise it + // it takes on the precedence of those tokens. Otherwise, it // is not an infix operator, and therefore has zero // precedence. Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), @@ -2872,7 +3056,7 @@ impl<'a> Parser<'a> { Ok(Self::MUL_DIV_MOD_OP_PREC) } Token::DoubleColon => Ok(50), - Token::Colon => Ok(50), + Token::Colon if dialect_of!(self is SnowflakeDialect) => Ok(50), Token::ExclamationMark => Ok(50), Token::LBracket | Token::Overlap | Token::CaretAt => Ok(50), Token::Arrow @@ -2886,7 +3070,8 @@ impl<'a> Parser<'a> { | Token::AtAt | Token::Question | Token::QuestionAnd - | Token::QuestionPipe => Ok(Self::PG_OTHER_PREC), + | Token::QuestionPipe + | Token::CustomBinaryOperator(_) => Ok(Self::PG_OTHER_PREC), _ => Ok(0), } } @@ -3034,7 +3219,7 @@ impl<'a> Parser<'a> { /// Report `found` was encountered instead of `expected` pub fn expected(&self, expected: &str, found: TokenWithLocation) -> Result { parser_err!( - format!("Expected {expected}, found: {found}"), + format!("Expected: {expected}, found: {found}"), found.location ) } @@ -3096,7 +3281,7 @@ impl<'a> Parser<'a> { /// If the current token is one of the given `keywords`, consume the token /// and return the keyword that matches. Otherwise, no tokens are consumed - /// and returns `None`. + /// and returns [`None`]. #[must_use] pub fn parse_one_of_keywords(&mut self, keywords: &[Keyword]) -> Option { match self.peek_token().token { @@ -3128,7 +3313,7 @@ impl<'a> Parser<'a> { } /// If the current token is the `expected` keyword, consume the token. - /// Otherwise return an error. + /// Otherwise, return an error. pub fn expect_keyword(&mut self, expected: Keyword) -> Result<(), ParserError> { if self.parse_keyword(expected) { Ok(()) @@ -3181,6 +3366,18 @@ impl<'a> Parser<'a> { } } + fn parse(s: String, loc: Location) -> Result + where + ::Err: Display, + { + s.parse::().map_err(|e| { + ParserError::ParserError(format!( + "Could not parse '{s}' as {}: {e}{loc}", + core::any::type_name::() + )) + }) + } + /// Parse a comma-separated list of 1+ SelectItem pub fn parse_projection(&mut self) -> Result, ParserError> { // BigQuery and Snowflake allow trailing commas, but only in project lists @@ -3191,7 +3388,7 @@ impl<'a> Parser<'a> { // This pattern could be captured better with RAII type semantics, but it's quite a bit of // code to add for just one case, so we'll just do it manually here. let old_value = self.options.trailing_commas; - self.options.trailing_commas |= dialect_of!(self is BigQueryDialect | SnowflakeDialect); + self.options.trailing_commas |= self.dialect.supports_projection_trailing_commas(); let ret = self.parse_comma_separated(|p| p.parse_select_item()); self.options.trailing_commas = old_value; @@ -3199,23 +3396,15 @@ impl<'a> Parser<'a> { ret } - /// Parse a comma-separated list of 1+ items accepted by `F` - pub fn parse_comma_separated(&mut self, mut f: F) -> Result, ParserError> - where - F: FnMut(&mut Parser<'a>) -> Result, - { + pub fn parse_actions_list(&mut self) -> Result, ParserError> { let mut values = vec![]; loop { - values.push(f(self)?); + values.push(self.parse_grant_permission()?); if !self.consume_token(&Token::Comma) { break; } else if self.options.trailing_commas { match self.peek_token().token { - Token::Word(kw) - if keywords::RESERVED_FOR_COLUMN_ALIAS - .iter() - .any(|d| kw.keyword == *d) => - { + Token::Word(kw) if kw.keyword == Keyword::ON => { break; } Token::RParen @@ -3230,6 +3419,50 @@ impl<'a> Parser<'a> { Ok(values) } + fn parse_comma_separated_end(&mut self) -> Option { + if !self.consume_token(&Token::Comma) { + Some(Token::Comma) + } else if self.options.trailing_commas { + let token = self.peek_token().token; + match token { + Token::Word(ref kw) if keywords::RESERVED_FOR_COLUMN_ALIAS.contains(&kw.keyword) => { + Some(token) + } + Token::RParen | Token::SemiColon | Token::EOF | Token::RBracket | Token::RBrace => { + Some(token) + } + _ => None, + } + } else { + None + } + } + + /// Parse a comma-separated list of 1+ items accepted by `F` + pub fn parse_comma_separated(&mut self, mut f: F) -> Result, ParserError> + where + F: FnMut(&mut Parser<'a>) -> Result, + { + let mut values = vec![]; + loop { + values.push(f(self)?); + if self.parse_comma_separated_end().is_some() { + break; + } + } + Ok(values) + } + + pub fn parse_parenthesized(&mut self, mut f: F) -> Result + where + F: FnMut(&mut Parser<'a>) -> Result, + { + self.expect_token(&Token::LParen)?; + let res = f(self)?; + self.expect_token(&Token::RParen)?; + Ok(res) + } + /// Parse a comma-separated list of 0+ items accepted by `F` pub fn parse_comma_separated0(&mut self, f: F) -> Result, ParserError> where @@ -3250,8 +3483,7 @@ impl<'a> Parser<'a> { self.parse_comma_separated(f) } - /// Run a parser method `f`, reverting back to the current position - /// if unsuccessful. + /// Run a parser method `f`, reverting back to the current position if unsuccessful. #[must_use] fn maybe_parse(&mut self, mut f: F) -> Option where @@ -3266,8 +3498,8 @@ impl<'a> Parser<'a> { } } - /// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns `None` if `ALL` is parsed - /// and results in a `ParserError` if both `ALL` and `DISTINCT` are found. + /// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns [`None`] if `ALL` is parsed + /// and results in a [`ParserError`] if both `ALL` and `DISTINCT` are found. pub fn parse_all_or_distinct(&mut self) -> Result, ParserError> { let loc = self.peek_token().location; let all = self.parse_keyword(Keyword::ALL); @@ -3507,21 +3739,13 @@ impl<'a> Parser<'a> { /// Parse a UNCACHE TABLE statement pub fn parse_uncache_table(&mut self) -> Result { - let has_table = self.parse_keyword(Keyword::TABLE); - if has_table { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let table_name = self.parse_object_name(false)?; - if self.peek_token().token == Token::EOF { - Ok(Statement::UNCache { - table_name, - if_exists, - }) - } else { - self.expected("an `EOF`", self.peek_token()) - } - } else { - self.expected("a `TABLE` keyword", self.peek_token()) - } + self.expect_keyword(Keyword::TABLE)?; + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let table_name = self.parse_object_name(false)?; + Ok(Statement::UNCache { + table_name, + if_exists, + }) } /// SQLite-specific `CREATE VIRTUAL TABLE` @@ -3624,95 +3848,53 @@ impl<'a> Parser<'a> { temporary: bool, ) -> Result { if dialect_of!(self is HiveDialect) { - let name = self.parse_object_name(false)?; - self.expect_keyword(Keyword::AS)?; - let class_name = self.parse_function_definition()?; - let params = CreateFunctionBody { - as_: Some(class_name), - using: self.parse_optional_create_function_using()?, - ..Default::default() - }; - - Ok(Statement::CreateFunction { - or_replace, - temporary, - name, - args: None, - return_type: None, - params, - }) + self.parse_hive_create_function(or_replace, temporary) } else if dialect_of!(self is PostgreSqlDialect | GenericDialect) { - let name = self.parse_object_name(false)?; - self.expect_token(&Token::LParen)?; - let args = if self.consume_token(&Token::RParen) { - self.prev_token(); - None - } else { - Some(self.parse_comma_separated(Parser::parse_function_arg)?) - }; - - self.expect_token(&Token::RParen)?; - - let return_type = if self.parse_keyword(Keyword::RETURNS) { - Some(self.parse_data_type()?) - } else { - None - }; - - let params = self.parse_create_function_body()?; - - Ok(Statement::CreateFunction { - or_replace, - temporary, - name, - args, - return_type, - params, - }) + self.parse_postgres_create_function(or_replace, temporary) } else if dialect_of!(self is DuckDbDialect) { self.parse_create_macro(or_replace, temporary) + } else if dialect_of!(self is BigQueryDialect) { + self.parse_bigquery_create_function(or_replace, temporary) } else { self.prev_token(); self.expected("an object type after CREATE", self.peek_token()) } } - fn parse_function_arg(&mut self) -> Result { - let mode = if self.parse_keyword(Keyword::IN) { - Some(ArgMode::In) - } else if self.parse_keyword(Keyword::OUT) { - Some(ArgMode::Out) - } else if self.parse_keyword(Keyword::INOUT) { - Some(ArgMode::InOut) - } else { + /// Parse `CREATE FUNCTION` for [Postgres] + /// + /// [Postgres]: https://www.postgresql.org/docs/15/sql-createfunction.html + fn parse_postgres_create_function( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { + let name = self.parse_object_name(false)?; + self.expect_token(&Token::LParen)?; + let args = if self.consume_token(&Token::RParen) { + self.prev_token(); None + } else { + Some(self.parse_comma_separated(Parser::parse_function_arg)?) }; - // parse: [ argname ] argtype - let mut name = None; - let mut data_type = self.parse_data_type()?; - if let DataType::Custom(n, _) = &data_type { - // the first token is actually a name - name = Some(n.0[0].clone()); - data_type = self.parse_data_type()?; - } + self.expect_token(&Token::RParen)?; - let default_expr = if self.parse_keyword(Keyword::DEFAULT) || self.consume_token(&Token::Eq) - { - Some(self.parse_expr()?) + let return_type = if self.parse_keyword(Keyword::RETURNS) { + Some(self.parse_data_type()?) } else { None }; - Ok(OperateFunctionArg { - mode, - name, - data_type, - default_expr, - }) - } - fn parse_create_function_body(&mut self) -> Result { - let mut body = CreateFunctionBody::default(); + #[derive(Default)] + struct Body { + language: Option, + behavior: Option, + function_body: Option, + called_on_null: Option, + parallel: Option, + } + let mut body = Body::default(); loop { fn ensure_not_set(field: &Option, name: &str) -> Result<(), ParserError> { if field.is_some() { @@ -3723,8 +3905,10 @@ impl<'a> Parser<'a> { Ok(()) } if self.parse_keyword(Keyword::AS) { - ensure_not_set(&body.as_, "AS")?; - body.as_ = Some(self.parse_function_definition()?); + ensure_not_set(&body.function_body, "AS")?; + body.function_body = Some(CreateFunctionBody::AsBeforeOptions( + self.parse_create_function_body_string()?, + )); } else if self.parse_keyword(Keyword::LANGUAGE) { ensure_not_set(&body.language, "LANGUAGE")?; body.language = Some(self.parse_identifier(false)?); @@ -3778,12 +3962,186 @@ impl<'a> Parser<'a> { return self.expected("one of UNSAFE | RESTRICTED | SAFE", self.peek_token()); } } else if self.parse_keyword(Keyword::RETURN) { - ensure_not_set(&body.return_, "RETURN")?; - body.return_ = Some(self.parse_expr()?); + ensure_not_set(&body.function_body, "RETURN")?; + body.function_body = Some(CreateFunctionBody::Return(self.parse_expr()?)); + } else { + break; + } + } + + Ok(Statement::CreateFunction { + or_replace, + temporary, + name, + args, + return_type, + behavior: body.behavior, + called_on_null: body.called_on_null, + parallel: body.parallel, + language: body.language, + function_body: body.function_body, + if_not_exists: false, + using: None, + determinism_specifier: None, + options: None, + remote_connection: None, + }) + } + + /// Parse `CREATE FUNCTION` for [Hive] + /// + /// [Hive]: https://cwiki.apache.org/confluence/display/hive/languagemanual+ddl#LanguageManualDDL-Create/Drop/ReloadFunction + fn parse_hive_create_function( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { + let name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::AS)?; + + let as_ = self.parse_create_function_body_string()?; + let using = self.parse_optional_create_function_using()?; + + Ok(Statement::CreateFunction { + or_replace, + temporary, + name, + function_body: Some(CreateFunctionBody::AsBeforeOptions(as_)), + using, + if_not_exists: false, + args: None, + return_type: None, + behavior: None, + called_on_null: None, + parallel: None, + language: None, + determinism_specifier: None, + options: None, + remote_connection: None, + }) + } + + /// Parse `CREATE FUNCTION` for [BigQuery] + /// + /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement + fn parse_bigquery_create_function( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let name = self.parse_object_name(false)?; + + let parse_function_param = + |parser: &mut Parser| -> Result { + let name = parser.parse_identifier(false)?; + let data_type = parser.parse_data_type()?; + Ok(OperateFunctionArg { + mode: None, + name: Some(name), + data_type, + default_expr: None, + }) + }; + self.expect_token(&Token::LParen)?; + let args = self.parse_comma_separated0(parse_function_param)?; + self.expect_token(&Token::RParen)?; + + let return_type = if self.parse_keyword(Keyword::RETURNS) { + Some(self.parse_data_type()?) + } else { + None + }; + + let determinism_specifier = if self.parse_keyword(Keyword::DETERMINISTIC) { + Some(FunctionDeterminismSpecifier::Deterministic) + } else if self.parse_keywords(&[Keyword::NOT, Keyword::DETERMINISTIC]) { + Some(FunctionDeterminismSpecifier::NotDeterministic) + } else { + None + }; + + let language = if self.parse_keyword(Keyword::LANGUAGE) { + Some(self.parse_identifier(false)?) + } else { + None + }; + + let remote_connection = + if self.parse_keywords(&[Keyword::REMOTE, Keyword::WITH, Keyword::CONNECTION]) { + Some(self.parse_object_name(false)?) + } else { + None + }; + + // `OPTIONS` may come before of after the function body but + // may be specified at most once. + let mut options = self.maybe_parse_options(Keyword::OPTIONS)?; + + let function_body = if remote_connection.is_none() { + self.expect_keyword(Keyword::AS)?; + let expr = self.parse_expr()?; + if options.is_none() { + options = self.maybe_parse_options(Keyword::OPTIONS)?; + Some(CreateFunctionBody::AsBeforeOptions(expr)) } else { - return Ok(body); + Some(CreateFunctionBody::AsAfterOptions(expr)) } + } else { + None + }; + + Ok(Statement::CreateFunction { + or_replace, + temporary, + if_not_exists, + name, + args: Some(args), + return_type, + function_body, + language, + determinism_specifier, + options, + remote_connection, + using: None, + behavior: None, + called_on_null: None, + parallel: None, + }) + } + + fn parse_function_arg(&mut self) -> Result { + let mode = if self.parse_keyword(Keyword::IN) { + Some(ArgMode::In) + } else if self.parse_keyword(Keyword::OUT) { + Some(ArgMode::Out) + } else if self.parse_keyword(Keyword::INOUT) { + Some(ArgMode::InOut) + } else { + None + }; + + // parse: [ argname ] argtype + let mut name = None; + let mut data_type = self.parse_data_type()?; + if let DataType::Custom(n, _) = &data_type { + // the first token is actually a name + name = Some(n.0[0].clone()); + data_type = self.parse_data_type()?; } + + let default_expr = if self.parse_keyword(Keyword::DEFAULT) || self.consume_token(&Token::Eq) + { + Some(self.parse_expr()?) + } else { + None + }; + Ok(OperateFunctionArg { + mode, + name, + data_type, + default_expr, + }) } pub fn parse_create_macro( @@ -3927,16 +4285,34 @@ impl<'a> Parser<'a> { }; if dialect_of!(self is BigQueryDialect | GenericDialect) { - if let Token::Word(word) = self.peek_token().token { - if word.keyword == Keyword::OPTIONS { - let opts = self.parse_options(Keyword::OPTIONS)?; - if !opts.is_empty() { - options = CreateTableOptions::Options(opts); - } + if let Some(opts) = self.maybe_parse_options(Keyword::OPTIONS)? { + if !opts.is_empty() { + options = CreateTableOptions::Options(opts); } }; } + let to = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keyword(Keyword::TO) + { + Some(self.parse_object_name(false)?) + } else { + None + }; + + let comment = if dialect_of!(self is SnowflakeDialect | GenericDialect) + && self.parse_keyword(Keyword::COMMENT) + { + self.expect_token(&Token::Eq)?; + let next_token = self.next_token(); + match next_token.token { + Token::SingleQuotedString(str) => Some(str), + _ => self.expected("string literal", next_token)?, + } + } else { + None + }; + self.expect_keyword(Keyword::AS)?; let query = self.parse_boxed_query()?; // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. @@ -3957,9 +4333,11 @@ impl<'a> Parser<'a> { or_replace, options, cluster_by, + comment, with_no_schema_binding, if_not_exists, temporary, + to, }) } @@ -4210,15 +4588,17 @@ impl<'a> Parser<'a> { ObjectType::Stage } else if self.parse_keyword(Keyword::FUNCTION) { return self.parse_drop_function(); + } else if self.parse_keyword(Keyword::PROCEDURE) { + return self.parse_drop_procedure(); } else if self.parse_keyword(Keyword::SECRET) { return self.parse_drop_secret(temporary, persistent); } else { return self.expected( - "TABLE, VIEW, INDEX, ROLE, SCHEMA, FUNCTION, STAGE or SEQUENCE after DROP", + "TABLE, VIEW, INDEX, ROLE, SCHEMA, FUNCTION, PROCEDURE, STAGE or SEQUENCE after DROP", self.peek_token(), ); }; - // Many dialects support the non standard `IF EXISTS` clause and allow + // Many dialects support the non-standard `IF EXISTS` clause and allow // specifying multiple objects to delete in a single statement let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let names = self.parse_comma_separated(|p| p.parse_object_name(false))?; @@ -4266,6 +4646,26 @@ impl<'a> Parser<'a> { }) } + /// ```sql + /// DROP PROCEDURE [ IF EXISTS ] name [ ( [ [ argmode ] [ argname ] argtype [, ...] ] ) ] [, ...] + /// [ CASCADE | RESTRICT ] + /// ``` + fn parse_drop_procedure(&mut self) -> Result { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let proc_desc = self.parse_comma_separated(Parser::parse_drop_function_desc)?; + let option = match self.parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) { + Some(Keyword::CASCADE) => Some(ReferentialAction::Cascade), + Some(Keyword::RESTRICT) => Some(ReferentialAction::Restrict), + Some(_) => unreachable!(), // parse_one_of_keywords does not return other keywords + None => None, + }; + Ok(Statement::DropProcedure { + if_exists, + proc_desc, + option, + }) + } + fn parse_drop_function_desc(&mut self) -> Result { let name = self.parse_object_name(false)?; @@ -4532,7 +4932,7 @@ impl<'a> Parser<'a> { continue; } _ => { - // Put back the semi-colon, this is the end of the DECLARE statement. + // Put back the semicolon, this is the end of the DECLARE statement. self.prev_token(); } } @@ -4774,7 +5174,7 @@ impl<'a> Parser<'a> { None }; - Ok(Statement::CreateIndex { + Ok(Statement::CreateIndex(CreateIndex { name: index_name, table_name, using, @@ -4785,7 +5185,7 @@ impl<'a> Parser<'a> { include, nulls_distinct, predicate, - }) + })) } pub fn parse_create_extension(&mut self) -> Result { @@ -5028,7 +5428,15 @@ impl<'a> Parser<'a> { self.expect_token(&Token::Eq)?; let next_token = self.next_token(); match next_token.token { - Token::Word(w) => Some(w.value), + Token::Word(w) => { + let name = w.value; + let parameters = if self.peek_token() == Token::LParen { + Some(self.parse_parenthesized_identifiers()?) + } else { + None + }; + Some(TableEngine { name, parameters }) + } _ => self.expected("identifier", next_token)?, } } else { @@ -5039,41 +5447,40 @@ impl<'a> Parser<'a> { let _ = self.consume_token(&Token::Eq); let next_token = self.next_token(); match next_token.token { - Token::Number(s, _) => Some(s.parse::().expect("literal int")), + Token::Number(s, _) => Some(Self::parse::(s, next_token.location)?), _ => self.expected("literal int", next_token)?, } } else { None }; + // ClickHouse supports `PRIMARY KEY`, before `ORDER BY` + // https://clickhouse.com/docs/en/sql-reference/statements/create/table#primary-key + let primary_key = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) + { + Some(Box::new(self.parse_expr()?)) + } else { + None + }; + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { if self.consume_token(&Token::LParen) { let columns = if self.peek_token() != Token::RParen { - self.parse_comma_separated(|p| p.parse_identifier(false))? + self.parse_comma_separated(|p| p.parse_expr())? } else { vec![] }; self.expect_token(&Token::RParen)?; - Some(columns) + Some(OneOrManyWithParens::Many(columns)) } else { - Some(vec![self.parse_identifier(false)?]) + Some(OneOrManyWithParens::One(self.parse_expr()?)) } } else { None }; - let big_query_config = if dialect_of!(self is BigQueryDialect | GenericDialect) { - self.parse_optional_big_query_create_table_config()? - } else { - Default::default() - }; - - // Parse optional `AS ( query )` - let query = if self.parse_keyword(Keyword::AS) { - Some(self.parse_boxed_query()?) - } else { - None - }; + let create_table_config = self.parse_optional_create_table_config()?; let default_charset = if self.parse_keywords(&[Keyword::DEFAULT, Keyword::CHARSET]) { self.expect_token(&Token::Eq)?; @@ -5120,13 +5527,20 @@ impl<'a> Parser<'a> { let _ = self.consume_token(&Token::Eq); let next_token = self.next_token(); match next_token.token { - Token::SingleQuotedString(str) => Some(str), + Token::SingleQuotedString(str) => Some(CommentDef::WithoutEq(str)), _ => self.expected("comment", next_token)?, } } else { None }; + // Parse optional `AS ( query )` + let query = if self.parse_keyword(Keyword::AS) { + Some(self.parse_boxed_query()?) + } else { + None + }; + Ok(CreateTableBuilder::new(table_name) .temporary(temporary) .columns(columns) @@ -5151,36 +5565,46 @@ impl<'a> Parser<'a> { .collation(collation) .on_commit(on_commit) .on_cluster(on_cluster) - .partition_by(big_query_config.partition_by) - .cluster_by(big_query_config.cluster_by) - .options(big_query_config.options) + .partition_by(create_table_config.partition_by) + .cluster_by(create_table_config.cluster_by) + .options(create_table_config.options) + .primary_key(primary_key) .strict(strict) .build()) } - /// Parse configuration like partitioning, clustering information during big-query table creation. - /// - fn parse_optional_big_query_create_table_config( + /// Parse configuration like partitioning, clustering information during the table creation. + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_2) + /// [PostgreSQL](https://www.postgresql.org/docs/current/ddl-partitioning.html) + fn parse_optional_create_table_config( &mut self, - ) -> Result { - let mut partition_by = None; - if self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { - partition_by = Some(Box::new(self.parse_expr()?)); + ) -> Result { + let partition_by = if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | GenericDialect) + && self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) + { + Some(Box::new(self.parse_expr()?)) + } else { + None }; let mut cluster_by = None; - if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { - cluster_by = Some(self.parse_comma_separated(|p| p.parse_identifier(false))?); - }; + let mut options = None; + if dialect_of!(self is BigQueryDialect | GenericDialect) { + if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { + cluster_by = Some(WrappedCollection::NoWrapping( + self.parse_comma_separated(|p| p.parse_identifier(false))?, + )); + }; - let mut options = None; - if let Token::Word(word) = self.peek_token().token { - if word.keyword == Keyword::OPTIONS { - options = Some(self.parse_options(Keyword::OPTIONS)?); - } - }; + if let Token::Word(word) = self.peek_token().token { + if word.keyword == Keyword::OPTIONS { + options = Some(self.parse_options(Keyword::OPTIONS)?); + } + }; + } - Ok(BigQueryTableConfiguration { + Ok(CreateTableConfiguration { partition_by, cluster_by, options, @@ -5224,12 +5648,17 @@ impl<'a> Parser<'a> { } else { return self.expected("column name or constraint definition", self.peek_token()); } + let comma = self.consume_token(&Token::Comma); - if self.consume_token(&Token::RParen) { - // allow a trailing comma, even though it's not in standard - break; - } else if !comma { + let rparen = self.peek_token().token == Token::RParen; + + if !comma && !rparen { return self.expected("',' or ')' after column definition", self.peek_token()); + }; + + if rparen && (!comma || self.options.trailing_commas) { + let _ = self.consume_token(&Token::RParen); + break; } } @@ -5324,6 +5753,24 @@ impl<'a> Parser<'a> { Ok(Some(ColumnOption::Null)) } else if self.parse_keyword(Keyword::DEFAULT) { Ok(Some(ColumnOption::Default(self.parse_expr()?))) + } else if dialect_of!(self is ClickHouseDialect| GenericDialect) + && self.parse_keyword(Keyword::MATERIALIZED) + { + Ok(Some(ColumnOption::Materialized(self.parse_expr()?))) + } else if dialect_of!(self is ClickHouseDialect| GenericDialect) + && self.parse_keyword(Keyword::ALIAS) + { + Ok(Some(ColumnOption::Alias(self.parse_expr()?))) + } else if dialect_of!(self is ClickHouseDialect| GenericDialect) + && self.parse_keyword(Keyword::EPHEMERAL) + { + // The expression is optional for the EPHEMERAL syntax, so we need to check + // if the column definition has remaining tokens before parsing the expression. + if matches!(self.peek_token().token, Token::Comma | Token::RParen) { + Ok(Some(ColumnOption::Ephemeral(None))) + } else { + Ok(Some(ColumnOption::Ephemeral(Some(self.parse_expr()?)))) + } } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) { let characteristics = self.parse_constraint_characteristics()?; Ok(Some(ColumnOption::Unique { @@ -5714,6 +6161,18 @@ impl<'a> Parser<'a> { } } + pub fn maybe_parse_options( + &mut self, + keyword: Keyword, + ) -> Result>, ParserError> { + if let Token::Word(word) = self.peek_token().token { + if word.keyword == keyword { + return Ok(Some(self.parse_options(keyword)?)); + } + }; + Ok(None) + } + pub fn parse_options(&mut self, keyword: Keyword) -> Result, ParserError> { if self.parse_keyword(keyword) { self.expect_token(&Token::LParen)?; @@ -6073,6 +6532,25 @@ impl<'a> Parser<'a> { self.expect_keyword(Keyword::WITH)?; let table_name = self.parse_object_name(false)?; AlterTableOperation::SwapWith { table_name } + } else if dialect_of!(self is PostgreSqlDialect | GenericDialect) + && self.parse_keywords(&[Keyword::OWNER, Keyword::TO]) + { + let new_owner = match self.parse_one_of_keywords( &[Keyword::CURRENT_USER, Keyword::CURRENT_ROLE, Keyword::SESSION_USER]) { + Some(Keyword::CURRENT_USER) => Owner::CurrentUser, + Some(Keyword::CURRENT_ROLE) => Owner::CurrentRole, + Some(Keyword::SESSION_USER) => Owner::SessionUser, + Some(_) => unreachable!(), + None => { + match self.parse_identifier(false) { + Ok(ident) => Owner::Ident(ident), + Err(e) => { + return Err(ParserError::ParserError(format!("Expected: CURRENT_USER, CURRENT_ROLE, SESSION_USER or identifier after OWNER TO. {e}"))) + } + } + }, + }; + + AlterTableOperation::OwnerTo { new_owner } } else { let options: Vec = self.parse_options_with_keywords(&[Keyword::SET, Keyword::TBLPROPERTIES])?; @@ -6182,6 +6660,7 @@ impl<'a> Parser<'a> { } else { Ok(Statement::Call(Function { name: object_name, + parameters: FunctionArguments::None, args: FunctionArguments::None, over: None, filter: None, @@ -6453,10 +6932,7 @@ impl<'a> Parser<'a> { // The call to n.parse() returns a bigdecimal when the // bigdecimal feature is enabled, and is otherwise a no-op // (i.e., it returns the input string). - Token::Number(ref n, l) => match n.parse() { - Ok(n) => Ok(Value::Number(n, l)), - Err(e) => parser_err!(format!("Could not parse '{n}' as number: {e}"), location), - }, + Token::Number(n, l) => Ok(Value::Number(Self::parse(n, location)?, l)), Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())), Token::TripleSingleQuotedString(ref s) => { @@ -6548,26 +7024,27 @@ impl<'a> Parser<'a> { pub fn parse_literal_uint(&mut self) -> Result { let next_token = self.next_token(); match next_token.token { - Token::Number(s, _) => s.parse::().map_err(|e| { - ParserError::ParserError(format!("Could not parse '{s}' as u64: {e}")) - }), + Token::Number(s, _) => Self::parse::(s, next_token.location), _ => self.expected("literal int", next_token), } } - pub fn parse_function_definition(&mut self) -> Result { + /// Parse the body of a `CREATE FUNCTION` specified as a string. + /// e.g. `CREATE FUNCTION ... AS $$ body $$`. + fn parse_create_function_body_string(&mut self) -> Result { let peek_token = self.peek_token(); match peek_token.token { - Token::DollarQuotedString(value) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => + Token::DollarQuotedString(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { self.next_token(); - Ok(FunctionDefinition::DoubleDollarDef(value.value)) + Ok(Expr::Value(Value::DollarQuotedString(s))) } - _ => Ok(FunctionDefinition::SingleQuotedDef( + _ => Ok(Expr::Value(Value::SingleQuotedString( self.parse_literal_string()?, - )), + ))), } } + /// Parse a literal string pub fn parse_literal_string(&mut self) -> Result { let next_token = self.next_token(); @@ -6611,6 +7088,7 @@ impl<'a> Parser<'a> { Keyword::FLOAT => Ok(DataType::Float(self.parse_optional_precision()?)), Keyword::REAL => Ok(DataType::Real), Keyword::FLOAT4 => Ok(DataType::Float4), + Keyword::FLOAT32 => Ok(DataType::Float32), Keyword::FLOAT64 => Ok(DataType::Float64), Keyword::FLOAT8 => Ok(DataType::Float8), Keyword::DOUBLE => { @@ -6668,7 +7146,19 @@ impl<'a> Parser<'a> { Ok(DataType::Int4(optional_precision?)) } } + Keyword::INT8 => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedInt8(optional_precision?)) + } else { + Ok(DataType::Int8(optional_precision?)) + } + } + Keyword::INT16 => Ok(DataType::Int16), + Keyword::INT32 => Ok(DataType::Int32), Keyword::INT64 => Ok(DataType::Int64), + Keyword::INT128 => Ok(DataType::Int128), + Keyword::INT256 => Ok(DataType::Int256), Keyword::INTEGER => { let optional_precision = self.parse_optional_precision(); if self.parse_keyword(Keyword::UNSIGNED) { @@ -6685,14 +7175,12 @@ impl<'a> Parser<'a> { Ok(DataType::BigInt(optional_precision?)) } } - Keyword::INT8 => { - let optional_precision = self.parse_optional_precision(); - if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedInt8(optional_precision?)) - } else { - Ok(DataType::Int8(optional_precision?)) - } - } + Keyword::UINT8 => Ok(DataType::UInt8), + Keyword::UINT16 => Ok(DataType::UInt16), + Keyword::UINT32 => Ok(DataType::UInt32), + Keyword::UINT64 => Ok(DataType::UInt64), + Keyword::UINT128 => Ok(DataType::UInt128), + Keyword::UINT256 => Ok(DataType::UInt256), Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_character_length()?)), Keyword::NVARCHAR => { Ok(DataType::Nvarchar(self.parse_optional_character_length()?)) @@ -6728,7 +7216,13 @@ impl<'a> Parser<'a> { Keyword::BYTES => Ok(DataType::Bytes(self.parse_optional_precision()?)), Keyword::UUID => Ok(DataType::Uuid), Keyword::DATE => Ok(DataType::Date), + Keyword::DATE32 => Ok(DataType::Date32), Keyword::DATETIME => Ok(DataType::Datetime(self.parse_optional_precision()?)), + Keyword::DATETIME64 => { + self.prev_token(); + let (precision, time_zone) = self.parse_datetime_64()?; + Ok(DataType::Datetime64(precision, time_zone)) + } Keyword::TIMESTAMP => { let precision = self.parse_optional_precision()?; let tz = if self.parse_keyword(Keyword::WITH) { @@ -6771,6 +7265,12 @@ impl<'a> Parser<'a> { Keyword::JSONB => Ok(DataType::JSONB), Keyword::REGCLASS => Ok(DataType::Regclass), Keyword::STRING => Ok(DataType::String(self.parse_optional_precision()?)), + Keyword::FIXEDSTRING => { + self.expect_token(&Token::LParen)?; + let character_length = self.parse_literal_uint()?; + self.expect_token(&Token::RParen)?; + Ok(DataType::FixedString(character_length)) + } Keyword::TEXT => Ok(DataType::Text), Keyword::BYTEA => Ok(DataType::Bytea), Keyword::NUMERIC => Ok(DataType::Numeric( @@ -6793,6 +7293,10 @@ impl<'a> Parser<'a> { Keyword::ARRAY => { if dialect_of!(self is SnowflakeDialect) { Ok(DataType::Array(ArrayElemTypeDef::None)) + } else if dialect_of!(self is ClickHouseDialect) { + Ok(self.parse_sub_type(|internal_type| { + DataType::Array(ArrayElemTypeDef::Parenthesis(internal_type)) + })?) } else { self.expect_token(&Token::Lt)?; let (inside_type, _trailing_bracket) = self.parse_data_type_helper()?; @@ -6805,10 +7309,40 @@ impl<'a> Parser<'a> { Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => { self.prev_token(); let (field_defs, _trailing_bracket) = - self.parse_struct_type_def(Self::parse_big_query_struct_field_def)?; + self.parse_struct_type_def(Self::parse_struct_field_def)?; trailing_bracket = _trailing_bracket; Ok(DataType::Struct(field_defs)) } + Keyword::UNION if dialect_of!(self is DuckDbDialect | GenericDialect) => { + self.prev_token(); + let fields = self.parse_union_type_def()?; + Ok(DataType::Union(fields)) + } + Keyword::NULLABLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + Ok(self.parse_sub_type(DataType::Nullable)?) + } + Keyword::LOWCARDINALITY if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + Ok(self.parse_sub_type(DataType::LowCardinality)?) + } + Keyword::MAP if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + self.prev_token(); + let (key_data_type, value_data_type) = self.parse_click_house_map_def()?; + Ok(DataType::Map( + Box::new(key_data_type), + Box::new(value_data_type), + )) + } + Keyword::NESTED if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + self.expect_token(&Token::LParen)?; + let field_defs = self.parse_comma_separated(Parser::parse_column_def)?; + self.expect_token(&Token::RParen)?; + Ok(DataType::Nested(field_defs)) + } + Keyword::TUPLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + self.prev_token(); + let field_defs = self.parse_click_house_tuple_def()?; + Ok(DataType::Tuple(field_defs)) + } _ => { self.prev_token(); let type_name = self.parse_object_name(false)?; @@ -6894,7 +7428,7 @@ impl<'a> Parser<'a> { // ignore the and treat the multiple strings as // a single ." Token::SingleQuotedString(s) => Ok(Some(Ident::with_quote('\'', s))), - // Support for MySql dialect double quoted string, `AS "HOUR"` for example + // Support for MySql dialect double-quoted string, `AS "HOUR"` for example Token::DoubleQuotedString(s) => Ok(Some(Ident::with_quote('\"', s))), _ => { if after_as { @@ -7156,7 +7690,16 @@ impl<'a> Parser<'a> { } else { None }; - Ok(ViewColumnDef { name, options }) + let data_type = if dialect_of!(self is ClickHouseDialect) { + Some(self.parse_data_type()?) + } else { + None + }; + Ok(ViewColumnDef { + name, + data_type, + options, + }) } /// Parse a parenthesized comma-separated list of unqualified, possibly quoted identifiers @@ -7198,6 +7741,26 @@ impl<'a> Parser<'a> { } } + /// Parse datetime64 [1] + /// Syntax + /// ```sql + /// DateTime64(precision[, timezone]) + /// ``` + /// + /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/datetime64 + pub fn parse_datetime_64(&mut self) -> Result<(u64, Option), ParserError> { + self.expect_keyword(Keyword::DATETIME64)?; + self.expect_token(&Token::LParen)?; + let precision = self.parse_literal_uint()?; + let time_zone = if self.consume_token(&Token::Comma) { + Some(self.parse_literal_string()?) + } else { + None + }; + self.expect_token(&Token::RParen)?; + Ok((precision, time_zone)) + } + pub fn parse_optional_character_length( &mut self, ) -> Result, ParserError> { @@ -7290,6 +7853,17 @@ impl<'a> Parser<'a> { } } + /// Parse a parenthesized sub data type + fn parse_sub_type(&mut self, parent_type: F) -> Result + where + F: FnOnce(Box) -> DataType, + { + self.expect_token(&Token::LParen)?; + let inside_type = self.parse_data_type()?; + self.expect_token(&Token::RParen)?; + Ok(parent_type(inside_type.into())) + } + pub fn parse_delete(&mut self) -> Result { let (tables, with_from_keyword) = if !self.parse_keyword(Keyword::FROM) { // `FROM` keyword is optional in BigQuery SQL. @@ -7418,7 +7992,7 @@ impl<'a> Parser<'a> { /// This function can be used to reduce the stack size required in debug /// builds. Instead of `sizeof(Query)` only a pointer (`Box`) /// is used. - fn parse_boxed_query(&mut self) -> Result, ParserError> { + pub fn parse_boxed_query(&mut self) -> Result, ParserError> { self.parse_query().map(Box::new) } @@ -7442,11 +8016,13 @@ impl<'a> Parser<'a> { body: self.parse_insert_setexpr_boxed()?, limit: None, limit_by: vec![], - order_by: vec![], + order_by: None, offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, }) } else if self.parse_keyword(Keyword::UPDATE) { Ok(Query { @@ -7454,19 +8030,31 @@ impl<'a> Parser<'a> { body: self.parse_update_setexpr_boxed()?, limit: None, limit_by: vec![], - order_by: vec![], + order_by: None, offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, }) } else { let body = self.parse_boxed_query_body(0)?; let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_order_by_expr)? + let order_by_exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?; + let interpolate = if dialect_of!(self is ClickHouseDialect | GenericDialect) { + self.parse_interpolations()? + } else { + None + }; + + Some(OrderBy { + exprs: order_by_exprs, + interpolate, + }) } else { - vec![] + None }; let mut limit = None; @@ -7504,6 +8092,8 @@ impl<'a> Parser<'a> { vec![] }; + let settings = self.parse_settings()?; + let fetch = if self.parse_keyword(Keyword::FETCH) { Some(self.parse_fetch()?) } else { @@ -7520,6 +8110,18 @@ impl<'a> Parser<'a> { locks.push(self.parse_lock()?); } } + let format_clause = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keyword(Keyword::FORMAT) + { + if self.parse_keyword(Keyword::NULL) { + Some(FormatClause::Null) + } else { + let ident = self.parse_identifier(false)?; + Some(FormatClause::Identifier(ident)) + } + } else { + None + }; Ok(Query { with, @@ -7531,10 +8133,29 @@ impl<'a> Parser<'a> { fetch, locks, for_clause, + settings, + format_clause, }) } } + fn parse_settings(&mut self) -> Result>, ParserError> { + let settings = if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::SETTINGS) + { + let key_values = self.parse_comma_separated(|p| { + let key = p.parse_identifier(false)?; + p.expect_token(&Token::Eq)?; + let value = p.parse_value()?; + Ok(Setting { key, value }) + })?; + Some(key_values) + } else { + None + }; + Ok(settings) + } + /// Parse a mssql `FOR [XML | JSON | BROWSE]` clause pub fn parse_for_clause(&mut self) -> Result, ParserError> { if self.parse_keyword(Keyword::XML) { @@ -7714,7 +8335,7 @@ impl<'a> Parser<'a> { pub fn parse_query_body(&mut self, precedence: u8) -> Result { // We parse the expression using a Pratt parser, as in `parse_expr()`. // Start by parsing a restricted SELECT or a `(subquery)`: - let mut expr = if self.parse_keyword(Keyword::SELECT) { + let expr = if self.parse_keyword(Keyword::SELECT) { SetExpr::Select(self.parse_select().map(Box::new)?) } else if self.consume_token(&Token::LParen) { // CTEs are not allowed here, but the parser currently accepts them @@ -7733,6 +8354,17 @@ impl<'a> Parser<'a> { ); }; + self.parse_remaining_set_exprs(expr, precedence) + } + + /// Parse any extra set expressions that may be present in a query body + /// + /// (this is its own function to reduce required stack size in debug builds) + fn parse_remaining_set_exprs( + &mut self, + mut expr: SetExpr, + precedence: u8, + ) -> Result { loop { // The query can be optionally followed by a set operator: let op = self.parse_set_operator(&self.peek_token().token); @@ -7771,7 +8403,7 @@ impl<'a> Parser<'a> { pub fn parse_set_quantifier(&mut self, op: &Option) -> SetQuantifier { match op { - Some(SetOperator::Union) => { + Some(SetOperator::Except | SetOperator::Intersect | SetOperator::Union) => { if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { SetQuantifier::DistinctByName } else if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { @@ -7788,15 +8420,6 @@ impl<'a> Parser<'a> { SetQuantifier::None } } - Some(SetOperator::Except) | Some(SetOperator::Intersect) => { - if self.parse_keyword(Keyword::ALL) { - SetQuantifier::All - } else if self.parse_keyword(Keyword::DISTINCT) { - SetQuantifier::Distinct - } else { - SetQuantifier::None - } - } _ => SetQuantifier::None, } } @@ -7886,6 +8509,14 @@ impl<'a> Parser<'a> { } } + let prewhere = if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::PREWHERE) + { + Some(self.parse_expr()?) + } else { + None + }; + let selection = if self.parse_keyword(Keyword::WHERE) { Some(self.parse_expr()?) } else { @@ -7893,13 +8524,42 @@ impl<'a> Parser<'a> { }; let group_by = if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) { - if self.parse_keyword(Keyword::ALL) { - GroupByExpr::All + let expressions = if self.parse_keyword(Keyword::ALL) { + None } else { - GroupByExpr::Expressions(self.parse_comma_separated(Parser::parse_group_by_expr)?) + Some(self.parse_comma_separated(Parser::parse_group_by_expr)?) + }; + + let mut modifiers = vec![]; + if dialect_of!(self is ClickHouseDialect | GenericDialect) { + loop { + if !self.parse_keyword(Keyword::WITH) { + break; + } + let keyword = self.expect_one_of_keywords(&[ + Keyword::ROLLUP, + Keyword::CUBE, + Keyword::TOTALS, + ])?; + modifiers.push(match keyword { + Keyword::ROLLUP => GroupByWithModifier::Rollup, + Keyword::CUBE => GroupByWithModifier::Cube, + Keyword::TOTALS => GroupByWithModifier::Totals, + _ => { + return parser_err!( + "BUG: expected to match GroupBy modifier keyword", + self.peek_token().location + ) + } + }); + } + } + match expressions { + None => GroupByExpr::All(modifiers), + Some(exprs) => GroupByExpr::Expressions(exprs, modifiers), } } else { - GroupByExpr::Expressions(vec![]) + GroupByExpr::Expressions(vec![], vec![]) }; let cluster_by = if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { @@ -7968,6 +8628,7 @@ impl<'a> Parser<'a> { into, from, lateral_views, + prewhere, selection, group_by, cluster_by, @@ -8135,7 +8796,9 @@ impl<'a> Parser<'a> { let mut values = vec![]; loop { - let value = if let Ok(expr) = self.parse_expr() { + let value = if let Some(expr) = self.try_parse_expr_sub_query()? { + expr + } else if let Ok(expr) = self.parse_expr() { expr } else { self.expected("variable value", self.peek_token())? @@ -8180,10 +8843,10 @@ impl<'a> Parser<'a> { }) } else if variable.to_string() == "TRANSACTION" && modifier.is_none() { if self.parse_keyword(Keyword::SNAPSHOT) { - let snaphot_id = self.parse_value()?; + let snapshot_id = self.parse_value()?; return Ok(Statement::SetTransaction { modes: vec![], - snapshot: Some(snaphot_id), + snapshot: Some(snapshot_id), session: false, }); } @@ -8370,6 +9033,18 @@ impl<'a> Parser<'a> { relation: self.parse_table_factor()?, join_operator: JoinOperator::OuterApply, } + } else if self.parse_keyword(Keyword::ASOF) { + self.expect_keyword(Keyword::JOIN)?; + let relation = self.parse_table_factor()?; + self.expect_keyword(Keyword::MATCH_CONDITION)?; + let match_condition = self.parse_parenthesized(Self::parse_expr)?; + Join { + relation, + join_operator: JoinOperator::AsOf { + match_condition, + constraint: self.parse_join_constraint(false)?, + }, + } } else { let natural = self.parse_keyword(Keyword::NATURAL); let peek_keyword = if let Token::Word(w) = self.peek_token().token { @@ -8594,8 +9269,19 @@ impl<'a> Parser<'a> { self.expected("joined table", self.peek_token()) } } else if dialect_of!(self is SnowflakeDialect | DatabricksDialect | GenericDialect) - && self.parse_keyword(Keyword::VALUES) + && matches!( + self.peek_tokens(), + [ + Token::Word(Word { + keyword: Keyword::VALUES, + .. + }), + Token::LParen + ] + ) { + self.expect_keyword(Keyword::VALUES)?; + // Snowflake and Databricks allow syntax like below: // SELECT * FROM VALUES (1, 'a'), (2, 'b') AS t (col1, col2) // where there are no parentheses around the VALUES clause. @@ -8606,13 +9292,15 @@ impl<'a> Parser<'a> { subquery: Box::new(Query { with: None, body: Box::new(values), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, }), alias, }) @@ -8623,6 +9311,7 @@ impl<'a> Parser<'a> { let array_exprs = self.parse_comma_separated(Parser::parse_expr)?; self.expect_token(&Token::RParen)?; + let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); let alias = match self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS) { Ok(Some(alias)) => Some(alias), Ok(None) => None, @@ -8649,6 +9338,7 @@ impl<'a> Parser<'a> { array_exprs, with_offset, with_offset_alias, + with_ordinality, }) } else if self.parse_keyword_with_tokens(Keyword::JSON_TABLE, &[Token::LParen]) { let json_expr = self.parse_expr()?; @@ -8672,7 +9362,7 @@ impl<'a> Parser<'a> { let partitions: Vec = if dialect_of!(self is MySqlDialect | GenericDialect) && self.parse_keyword(Keyword::PARTITION) { - self.parse_partitions()? + self.parse_parenthesized_identifiers()? } else { vec![] }; @@ -8680,13 +9370,15 @@ impl<'a> Parser<'a> { // Parse potential version qualifier let version = self.parse_table_version()?; - // Postgres, MSSQL: table-valued functions: + // Postgres, MSSQL, ClickHouse: table-valued functions: let args = if self.consume_token(&Token::LParen) { - Some(self.parse_optional_args()?) + Some(self.parse_table_function_args()?) } else { None }; + let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; // MSSQL-specific table hints: @@ -8708,6 +9400,7 @@ impl<'a> Parser<'a> { with_hints, version, partitions, + with_ordinality, }; while let Some(kw) = self.parse_one_of_keywords(&[Keyword::PIVOT, Keyword::UNPIVOT]) { @@ -8805,9 +9498,7 @@ impl<'a> Parser<'a> { }; self.expect_keyword(Keyword::PATTERN)?; - self.expect_token(&Token::LParen)?; - let pattern = self.parse_pattern()?; - self.expect_token(&Token::RParen)?; + let pattern = self.parse_parenthesized(Self::parse_pattern)?; self.expect_keyword(Keyword::DEFINE)?; @@ -8894,7 +9585,7 @@ impl<'a> Parser<'a> { return self.expected("literal number", next_token); }; self.expect_token(&Token::RBrace)?; - RepetitionQuantifier::AtMost(n.parse().expect("literal int")) + RepetitionQuantifier::AtMost(Self::parse(n, token.location)?) } Token::Number(n, _) if self.consume_token(&Token::Comma) => { let next_token = self.next_token(); @@ -8902,12 +9593,12 @@ impl<'a> Parser<'a> { Token::Number(m, _) => { self.expect_token(&Token::RBrace)?; RepetitionQuantifier::Range( - n.parse().expect("literal int"), - m.parse().expect("literal int"), + Self::parse(n, token.location)?, + Self::parse(m, token.location)?, ) } Token::RBrace => { - RepetitionQuantifier::AtLeast(n.parse().expect("literal int")) + RepetitionQuantifier::AtLeast(Self::parse(n, token.location)?) } _ => { return self.expected("} or upper bound", next_token); @@ -8916,7 +9607,7 @@ impl<'a> Parser<'a> { } Token::Number(n, _) => { self.expect_token(&Token::RBrace)?; - RepetitionQuantifier::Exactly(n.parse().expect("literal int")) + RepetitionQuantifier::Exactly(Self::parse(n, token.location)?) } _ => return self.expected("quantifier range", token), } @@ -9068,16 +9759,44 @@ impl<'a> Parser<'a> { self.expect_keyword(Keyword::FOR)?; let value_column = self.parse_object_name(false)?.0; self.expect_keyword(Keyword::IN)?; + self.expect_token(&Token::LParen)?; - let pivot_values = self.parse_comma_separated(Self::parse_expr_with_alias)?; + let value_source = if self.parse_keyword(Keyword::ANY) { + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; + PivotValueSource::Any(order_by) + } else if self + .parse_one_of_keywords(&[Keyword::SELECT, Keyword::WITH]) + .is_some() + { + self.prev_token(); + PivotValueSource::Subquery(self.parse_query()?) + } else { + PivotValueSource::List(self.parse_comma_separated(Self::parse_expr_with_alias)?) + }; self.expect_token(&Token::RParen)?; + + let default_on_null = + if self.parse_keywords(&[Keyword::DEFAULT, Keyword::ON, Keyword::NULL]) { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Some(expr) + } else { + None + }; + self.expect_token(&Token::RParen)?; let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; Ok(TableFactor::Pivot { table: Box::new(table), aggregate_functions, value_column, - pivot_values, + value_source, + default_on_null, alias, }) } @@ -9150,7 +9869,7 @@ impl<'a> Parser<'a> { } } else { let (actions, err): (Vec<_>, Vec<_>) = self - .parse_comma_separated(Parser::parse_grant_permission)? + .parse_actions_list()? .into_iter() .map(|(kw, columns)| match kw { Keyword::DELETE => Ok(Action::Delete), @@ -9217,7 +9936,7 @@ impl<'a> Parser<'a> { Ok((privileges, objects)) } - pub fn parse_grant_permission(&mut self) -> Result<(Keyword, Option>), ParserError> { + pub fn parse_grant_permission(&mut self) -> Result { if let Some(kw) = self.parse_one_of_keywords(&[ Keyword::CONNECT, Keyword::CREATE, @@ -9525,10 +10244,22 @@ impl<'a> Parser<'a> { /// Parse a `var = expr` assignment, used in an UPDATE statement pub fn parse_assignment(&mut self) -> Result { - let id = self.parse_identifiers()?; + let target = self.parse_assignment_target()?; self.expect_token(&Token::Eq)?; let value = self.parse_expr()?; - Ok(Assignment { id, value }) + Ok(Assignment { target, value }) + } + + /// Parse the left-hand side of an assignment, used in an UPDATE statement + pub fn parse_assignment_target(&mut self) -> Result { + if self.consume_token(&Token::LParen) { + let columns = self.parse_comma_separated(|p| p.parse_object_name(false))?; + self.expect_token(&Token::RParen)?; + Ok(AssignmentTarget::Tuple(columns)) + } else { + let column = self.parse_object_name(false)?; + Ok(AssignmentTarget::ColumnName(column)) + } } pub fn parse_function_args(&mut self) -> Result { @@ -9584,6 +10315,30 @@ impl<'a> Parser<'a> { } } + fn parse_table_function_args(&mut self) -> Result { + { + let settings = self.parse_settings()?; + if self.consume_token(&Token::RParen) { + return Ok(TableFunctionArgs { + args: vec![], + settings, + }); + } + } + let mut args = vec![]; + let settings = loop { + if let Some(settings) = self.parse_settings()? { + break Some(settings); + } + args.push(self.parse_function_args()?); + if self.parse_comma_separated_end().is_some() { + break None; + } + }; + self.expect_token(&Token::RParen)?; + Ok(TableFunctionArgs { args, settings }) + } + /// Parses a potentially empty list of arguments to a window function /// (including the closing parenthesis). /// @@ -9677,6 +10432,12 @@ impl<'a> Parser<'a> { Expr::Wildcard => Ok(SelectItem::Wildcard( self.parse_wildcard_additional_options()?, )), + Expr::Identifier(v) if v.value.to_lowercase() == "from" && v.quote_style.is_none() => { + parser_err!( + format!("Expected an expression, found: {}", v), + self.peek_token().location + ) + } expr => self .parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS) .map(|alias| match alias { @@ -9709,15 +10470,14 @@ impl<'a> Parser<'a> { } else { None }; - let opt_rename = if dialect_of!(self is GenericDialect | SnowflakeDialect) { - self.parse_optional_select_item_rename()? + let opt_replace = if dialect_of!(self is GenericDialect | BigQueryDialect | ClickHouseDialect | DuckDbDialect | SnowflakeDialect) + { + self.parse_optional_select_item_replace()? } else { None }; - - let opt_replace = if dialect_of!(self is GenericDialect | BigQueryDialect | ClickHouseDialect | DuckDbDialect | SnowflakeDialect) - { - self.parse_optional_select_item_replace()? + let opt_rename = if dialect_of!(self is GenericDialect | SnowflakeDialect) { + self.parse_optional_select_item_rename()? } else { None }; @@ -9882,13 +10642,77 @@ impl<'a> Parser<'a> { None }; + let with_fill = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keywords(&[Keyword::WITH, Keyword::FILL]) + { + Some(self.parse_with_fill()?) + } else { + None + }; + Ok(OrderByExpr { expr, asc, nulls_first, + with_fill, }) } + // Parse a WITH FILL clause (ClickHouse dialect) + // that follow the WITH FILL keywords in a ORDER BY clause + pub fn parse_with_fill(&mut self) -> Result { + let from = if self.parse_keyword(Keyword::FROM) { + Some(self.parse_expr()?) + } else { + None + }; + + let to = if self.parse_keyword(Keyword::TO) { + Some(self.parse_expr()?) + } else { + None + }; + + let step = if self.parse_keyword(Keyword::STEP) { + Some(self.parse_expr()?) + } else { + None + }; + + Ok(WithFill { from, to, step }) + } + + // Parse a set of comma seperated INTERPOLATE expressions (ClickHouse dialect) + // that follow the INTERPOLATE keyword in an ORDER BY clause with the WITH FILL modifier + pub fn parse_interpolations(&mut self) -> Result, ParserError> { + if !self.parse_keyword(Keyword::INTERPOLATE) { + return Ok(None); + } + + if self.consume_token(&Token::LParen) { + let interpolations = self.parse_comma_separated0(|p| p.parse_interpolation())?; + self.expect_token(&Token::RParen)?; + // INTERPOLATE () and INTERPOLATE ( ... ) variants + return Ok(Some(Interpolate { + exprs: Some(interpolations), + })); + } + + // INTERPOLATE + Ok(Some(Interpolate { exprs: None })) + } + + // Parse a INTERPOLATE expression (ClickHouse dialect) + pub fn parse_interpolation(&mut self) -> Result { + let column = self.parse_identifier(false)?; + let expr = if self.parse_keyword(Keyword::AS) { + Some(self.parse_expr()?) + } else { + None + }; + Ok(InterpolateExpr { column, expr }) + } + /// Parse a TOP clause, MSSQL equivalent of LIMIT, /// that follows after `SELECT [DISTINCT]`. pub fn parse_top(&mut self) -> Result { @@ -9899,7 +10723,7 @@ impl<'a> Parser<'a> { } else { let next_token = self.next_token(); let quantity = match next_token.token { - Token::Number(s, _) => s.parse::().expect("literal int"), + Token::Number(s, _) => Self::parse::(s, next_token.location)?, _ => self.expected("literal int", next_token)?, }; Some(TopQuantity::Constant(quantity)) @@ -10563,7 +11387,7 @@ impl<'a> Parser<'a> { }) } - fn parse_partitions(&mut self) -> Result, ParserError> { + fn parse_parenthesized_identifiers(&mut self) -> Result, ParserError> { self.expect_token(&Token::LParen)?; let partitions = self.parse_comma_separated(|p| p.parse_identifier(false))?; self.expect_token(&Token::RParen)?; @@ -11141,7 +11965,7 @@ mod tests { assert_eq!( ast, Err(ParserError::TokenizerError( - "Unterminated string literal at Line: 1, Column 5".to_string() + "Unterminated string literal at Line: 1, Column: 5".to_string() )) ); } @@ -11153,7 +11977,7 @@ mod tests { assert_eq!( ast, Err(ParserError::ParserError( - "Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a at Line: 1, Column 16" + "Expected: [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a at Line: 1, Column: 16" .to_string() )) ); diff --git a/src/test_utils.rs b/src/test_utils.rs index 464366ae4..1f5300be1 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -309,6 +309,22 @@ pub fn table(name: impl Into) -> TableFactor { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, + } +} + +pub fn table_with_alias(name: impl Into, alias: impl Into) -> TableFactor { + TableFactor::Table { + name: ObjectName(vec![Ident::new(name)]), + alias: Some(TableAlias { + name: Ident::new(alias), + columns: vec![], + }), + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + with_ordinality: false, } } @@ -322,6 +338,7 @@ pub fn join(relation: TableFactor) -> Join { pub fn call(function: &str, args: impl IntoIterator) -> Expr { Expr::Function(Function { name: ObjectName(vec![Ident::new(function)]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: args diff --git a/src/tokenizer.rs b/src/tokenizer.rs index b6fed354d..b8336cec8 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -231,6 +231,10 @@ pub enum Token { /// jsonb ?| text[] -> boolean: Check whether any member of the text array exists as top-level /// keys within the jsonb object QuestionPipe, + /// Custom binary operator + /// This is used to represent any custom binary operator that is not part of the SQL standard. + /// PostgreSQL allows defining custom binary operators using CREATE OPERATOR. + CustomBinaryOperator(String), } impl fmt::Display for Token { @@ -320,6 +324,7 @@ impl fmt::Display for Token { Token::Question => write!(f, "?"), Token::QuestionAnd => write!(f, "?&"), Token::QuestionPipe => write!(f, "?|"), + Token::CustomBinaryOperator(s) => f.write_str(s), } } } @@ -424,7 +429,7 @@ impl fmt::Display for Location { write!( f, // TODO: use standard compiler location syntax (::) - " at Line: {}, Column {}", + " at Line: {}, Column: {}", self.line, self.column, ) } @@ -649,7 +654,7 @@ impl<'a> Tokenizer<'a> { Ok(()) } - // Tokenize the identifer or keywords in `ch` + // Tokenize the identifier or keywords in `ch` fn tokenize_identifier_or_keyword( &self, ch: impl IntoIterator, @@ -961,15 +966,12 @@ impl<'a> Tokenizer<'a> { Some('>') => { chars.next(); match chars.peek() { - Some('>') => { - chars.next(); - Ok(Some(Token::LongArrow)) - } - _ => Ok(Some(Token::Arrow)), + Some('>') => self.consume_for_binop(chars, "->>", Token::LongArrow), + _ => self.start_binop(chars, "->", Token::Arrow), } } // a regular '-' operator - _ => Ok(Some(Token::Minus)), + _ => self.start_binop(chars, "-", Token::Minus), } } '/' => { @@ -999,26 +1001,28 @@ impl<'a> Tokenizer<'a> { '%' => { chars.next(); // advance past '%' match chars.peek() { - Some(' ') => Ok(Some(Token::Mod)), + Some(s) if s.is_whitespace() => Ok(Some(Token::Mod)), Some(sch) if self.dialect.is_identifier_start('%') => { self.tokenize_identifier_or_keyword([ch, *sch], chars) } - _ => Ok(Some(Token::Mod)), + _ => self.start_binop(chars, "%", Token::Mod), } } '|' => { chars.next(); // consume the '|' match chars.peek() { - Some('/') => self.consume_and_return(chars, Token::PGSquareRoot), + Some('/') => self.consume_for_binop(chars, "|/", Token::PGSquareRoot), Some('|') => { chars.next(); // consume the second '|' match chars.peek() { - Some('/') => self.consume_and_return(chars, Token::PGCubeRoot), - _ => Ok(Some(Token::StringConcat)), + Some('/') => { + self.consume_for_binop(chars, "||/", Token::PGCubeRoot) + } + _ => self.start_binop(chars, "||", Token::StringConcat), } } // Bitshift '|' operator - _ => Ok(Some(Token::Pipe)), + _ => self.start_binop(chars, "|", Token::Pipe), } } '=' => { @@ -1061,22 +1065,22 @@ impl<'a> Tokenizer<'a> { Some('=') => { chars.next(); match chars.peek() { - Some('>') => self.consume_and_return(chars, Token::Spaceship), - _ => Ok(Some(Token::LtEq)), + Some('>') => self.consume_for_binop(chars, "<=>", Token::Spaceship), + _ => self.start_binop(chars, "<=", Token::LtEq), } } - Some('>') => self.consume_and_return(chars, Token::Neq), - Some('<') => self.consume_and_return(chars, Token::ShiftLeft), - Some('@') => self.consume_and_return(chars, Token::ArrowAt), - _ => Ok(Some(Token::Lt)), + Some('>') => self.consume_for_binop(chars, "<>", Token::Neq), + Some('<') => self.consume_for_binop(chars, "<<", Token::ShiftLeft), + Some('@') => self.consume_for_binop(chars, "<@", Token::ArrowAt), + _ => self.start_binop(chars, "<", Token::Lt), } } '>' => { chars.next(); // consume match chars.peek() { - Some('=') => self.consume_and_return(chars, Token::GtEq), - Some('>') => self.consume_and_return(chars, Token::ShiftRight), - _ => Ok(Some(Token::Gt)), + Some('=') => self.consume_for_binop(chars, ">=", Token::GtEq), + Some('>') => self.consume_for_binop(chars, ">>", Token::ShiftRight), + _ => self.start_binop(chars, ">", Token::Gt), } } ':' => { @@ -1094,9 +1098,12 @@ impl<'a> Tokenizer<'a> { '&' => { chars.next(); // consume the '&' match chars.peek() { - Some('&') => self.consume_and_return(chars, Token::Overlap), + Some('&') => { + chars.next(); // consume the second '&' + self.start_binop(chars, "&&", Token::Overlap) + } // Bitshift '&' operator - _ => Ok(Some(Token::Ampersand)), + _ => self.start_binop(chars, "&", Token::Ampersand), } } '^' => { @@ -1119,38 +1126,37 @@ impl<'a> Tokenizer<'a> { '~' => { chars.next(); // consume match chars.peek() { - Some('*') => self.consume_and_return(chars, Token::TildeAsterisk), + Some('*') => self.consume_for_binop(chars, "~*", Token::TildeAsterisk), Some('~') => { chars.next(); match chars.peek() { Some('*') => { - self.consume_and_return(chars, Token::DoubleTildeAsterisk) + self.consume_for_binop(chars, "~~*", Token::DoubleTildeAsterisk) } - _ => Ok(Some(Token::DoubleTilde)), + _ => self.start_binop(chars, "~~", Token::DoubleTilde), } } - _ => Ok(Some(Token::Tilde)), + _ => self.start_binop(chars, "~", Token::Tilde), } } '#' => { chars.next(); match chars.peek() { - Some('-') => self.consume_and_return(chars, Token::HashMinus), + Some('-') => self.consume_for_binop(chars, "#-", Token::HashMinus), Some('>') => { chars.next(); match chars.peek() { Some('>') => { - chars.next(); - Ok(Some(Token::HashLongArrow)) + self.consume_for_binop(chars, "#>>", Token::HashLongArrow) } - _ => Ok(Some(Token::HashArrow)), + _ => self.start_binop(chars, "#>", Token::HashArrow), } } Some(' ') => Ok(Some(Token::Sharp)), Some(sch) if self.dialect.is_identifier_start('#') => { self.tokenize_identifier_or_keyword([ch, *sch], chars) } - _ => Ok(Some(Token::Sharp)), + _ => self.start_binop(chars, "#", Token::Sharp), } } '@' => { @@ -1206,6 +1212,39 @@ impl<'a> Tokenizer<'a> { } } + /// Consume the next character, then parse a custom binary operator. The next character should be included in the prefix + fn consume_for_binop( + &self, + chars: &mut State, + prefix: &str, + default: Token, + ) -> Result, TokenizerError> { + chars.next(); // consume the first char + self.start_binop(chars, prefix, default) + } + + /// parse a custom binary operator + fn start_binop( + &self, + chars: &mut State, + prefix: &str, + default: Token, + ) -> Result, TokenizerError> { + let mut custom = None; + while let Some(&ch) = chars.peek() { + if !self.dialect.is_custom_operator_part(ch) { + break; + } + + custom.get_or_insert_with(|| prefix.to_string()).push(ch); + chars.next(); + } + + Ok(Some( + custom.map(Token::CustomBinaryOperator).unwrap_or(default), + )) + } + /// Tokenize dollar preceded value (i.e: a string/placeholder) fn tokenize_dollar_preceded_value(&self, chars: &mut State) -> Result { let mut s = String::new(); @@ -1777,7 +1816,7 @@ mod tests { use std::error::Error; assert!(err.source().is_none()); } - assert_eq!(err.to_string(), "test at Line: 1, Column 1"); + assert_eq!(err.to_string(), "test at Line: 1, Column: 1"); } #[test] diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 179755e0c..089a41889 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -224,6 +224,7 @@ fn parse_delete_statement() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation ); @@ -261,10 +262,12 @@ fn parse_create_view_with_options() { vec![ ViewColumnDef { name: Ident::new("name"), + data_type: None, options: None, }, ViewColumnDef { name: Ident::new("age"), + data_type: None, options: Some(vec![SqlOption { name: Ident::new("description"), value: Expr::Value(Value::DoubleQuotedString("field age".to_string())), @@ -309,9 +312,11 @@ fn parse_create_view_if_not_exists() { materialized, options, cluster_by, + comment, with_no_schema_binding: late_binding, if_not_exists, temporary, + .. } => { assert_eq!("mydataset.newview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -320,6 +325,7 @@ fn parse_create_view_if_not_exists() { assert!(!or_replace); assert_eq!(options, CreateTableOptions::None); assert_eq!(cluster_by, vec![]); + assert!(comment.is_none()); assert!(!late_binding); assert!(if_not_exists); assert!(!temporary); @@ -350,7 +356,7 @@ fn parse_create_view_with_unquoted_hyphen() { fn parse_create_table_with_unquoted_hyphen() { let sql = "CREATE TABLE my-pro-ject.mydataset.mytable (x INT64)"; match bigquery().verified_stmt(sql) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!( name, ObjectName(vec![ @@ -384,14 +390,14 @@ fn parse_create_table_with_options() { r#"OPTIONS(partition_expiration_days = 1, description = "table option description")"# ); match bigquery().verified_stmt(sql) { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, columns, partition_by, cluster_by, options, .. - } => { + }) => { assert_eq!( name, ObjectName(vec!["mydataset".into(), "newtable".into()]) @@ -438,7 +444,10 @@ fn parse_create_table_with_options() { assert_eq!( ( Some(Box::new(Expr::Identifier(Ident::new("_PARTITIONDATE")))), - Some(vec![Ident::new("userid"), Ident::new("age"),]), + Some(WrappedCollection::NoWrapping(vec![ + Ident::new("userid"), + Ident::new("age"), + ])), Some(vec![ SqlOption { name: Ident::new("partition_expiration_days"), @@ -473,7 +482,7 @@ fn parse_create_table_with_options() { fn parse_nested_data_types() { let sql = "CREATE TABLE table (x STRUCT, b BYTES(42)>, y ARRAY>)"; match bigquery_and_generic().one_statement_parses_to(sql, sql) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name, ObjectName(vec!["table".into()])); assert_eq!( columns, @@ -528,7 +537,7 @@ fn parse_invalid_brackets() { bigquery_and_generic() .parse_sql_statements(sql) .unwrap_err(), - ParserError::ParserError("Expected (, found: >".to_string()) + ParserError::ParserError("Expected: (, found: >".to_string()) ); let sql = "CREATE TABLE table (x STRUCT>>)"; @@ -537,7 +546,7 @@ fn parse_invalid_brackets() { .parse_sql_statements(sql) .unwrap_err(), ParserError::ParserError( - "Expected ',' or ')' after column definition, found: >".to_string() + "Expected: ',' or ')' after column definition, found: >".to_string() ) ); } @@ -1345,6 +1354,7 @@ fn parse_table_identifiers() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![] },] @@ -1517,6 +1527,7 @@ fn parse_table_time_travel() { Value::SingleQuotedString(version) ))), partitions: vec![], + with_ordinality: false, }, joins: vec![] },] @@ -1543,7 +1554,8 @@ fn parse_join_constraint_unnest_alias() { Ident::new("a") ])], with_offset: false, - with_offset_alias: None + with_offset_alias: None, + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), @@ -1583,11 +1595,11 @@ fn parse_merge() { let update_action = MergeAction::Update { assignments: vec![ Assignment { - id: vec![Ident::new("a")], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new("a")])), value: Expr::Value(number("1")), }, Assignment { - id: vec![Ident::new("b")], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new("b")])), value: Expr::Value(number("2")), }, ], @@ -1612,6 +1624,7 @@ fn parse_merge() { with_hints: Default::default(), version: Default::default(), partitions: Default::default(), + with_ordinality: false, }, table ); @@ -1626,6 +1639,7 @@ fn parse_merge() { with_hints: Default::default(), version: Default::default(), partitions: Default::default(), + with_ordinality: false, }, source ); @@ -1746,11 +1760,11 @@ fn parse_merge_invalid_statements() { for (sql, err_msg) in [ ( "MERGE T USING U ON TRUE WHEN MATCHED BY TARGET AND 1 THEN DELETE", - "Expected THEN, found: BY", + "Expected: THEN, found: BY", ), ( "MERGE T USING U ON TRUE WHEN MATCHED BY SOURCE AND 1 THEN DELETE", - "Expected THEN, found: BY", + "Expected: THEN, found: BY", ), ( "MERGE T USING U ON TRUE WHEN NOT MATCHED BY SOURCE THEN INSERT(a) VALUES (b)", @@ -1891,13 +1905,13 @@ fn parse_big_query_declare() { let error_sql = "DECLARE x"; assert_eq!( - ParserError::ParserError("Expected a data type name, found: EOF".to_owned()), + ParserError::ParserError("Expected: a data type name, found: EOF".to_owned()), bigquery().parse_sql_statements(error_sql).unwrap_err() ); let error_sql = "DECLARE x 42"; assert_eq!( - ParserError::ParserError("Expected a data type name, found: 42".to_owned()), + ParserError::ParserError("Expected: a data type name, found: 42".to_owned()), bigquery().parse_sql_statements(error_sql).unwrap_err() ); } @@ -1950,6 +1964,145 @@ fn parse_map_access_expr() { bigquery().verified_only_select(sql); } +#[test] +fn test_bigquery_create_function() { + let sql = concat!( + "CREATE OR REPLACE TEMPORARY FUNCTION ", + "project1.mydataset.myfunction(x FLOAT64) ", + "RETURNS FLOAT64 ", + "OPTIONS(x = 'y') ", + "AS 42" + ); + + let stmt = bigquery().verified_stmt(sql); + assert_eq!( + stmt, + Statement::CreateFunction { + or_replace: true, + temporary: true, + if_not_exists: false, + name: ObjectName(vec![ + Ident::new("project1"), + Ident::new("mydataset"), + Ident::new("myfunction"), + ]), + args: Some(vec![OperateFunctionArg::with_name("x", DataType::Float64),]), + return_type: Some(DataType::Float64), + function_body: Some(CreateFunctionBody::AsAfterOptions(Expr::Value(number( + "42" + )))), + options: Some(vec![SqlOption { + name: Ident::new("x"), + value: Expr::Value(Value::SingleQuotedString("y".into())), + }]), + behavior: None, + using: None, + language: None, + determinism_specifier: None, + remote_connection: None, + called_on_null: None, + parallel: None, + } + ); + + let sqls = [ + // Arbitrary Options expressions. + concat!( + "CREATE OR REPLACE TEMPORARY FUNCTION ", + "myfunction(a FLOAT64, b INT64, c STRING) ", + "RETURNS ARRAY ", + "OPTIONS(a = [1, 2], b = 'two', c = [('k1', 'v1'), ('k2', 'v2')]) ", + "AS ((SELECT 1 FROM mytable))" + ), + // Options after body. + concat!( + "CREATE OR REPLACE TEMPORARY FUNCTION ", + "myfunction(a FLOAT64, b INT64, c STRING) ", + "RETURNS ARRAY ", + "AS ((SELECT 1 FROM mytable)) ", + "OPTIONS(a = [1, 2], b = 'two', c = [('k1', 'v1'), ('k2', 'v2')])", + ), + // IF NOT EXISTS + concat!( + "CREATE OR REPLACE TEMPORARY FUNCTION IF NOT EXISTS ", + "myfunction(a FLOAT64, b INT64, c STRING) ", + "RETURNS ARRAY ", + "OPTIONS(a = [1, 2]) ", + "AS ((SELECT 1 FROM mytable))" + ), + // No return type. + concat!( + "CREATE OR REPLACE TEMPORARY FUNCTION ", + "myfunction(a FLOAT64, b INT64, c STRING) ", + "OPTIONS(a = [1, 2]) ", + "AS ((SELECT 1 FROM mytable))" + ), + // With language - body after options + concat!( + "CREATE OR REPLACE TEMPORARY FUNCTION ", + "myfunction(a FLOAT64, b INT64, c STRING) ", + "DETERMINISTIC ", + "LANGUAGE js ", + "OPTIONS(a = [1, 2]) ", + "AS \"console.log('hello');\"" + ), + // With language - body before options + concat!( + "CREATE OR REPLACE TEMPORARY FUNCTION ", + "myfunction(a FLOAT64, b INT64, c STRING) ", + "NOT DETERMINISTIC ", + "LANGUAGE js ", + "AS \"console.log('hello');\" ", + "OPTIONS(a = [1, 2])", + ), + // Remote + concat!( + "CREATE OR REPLACE TEMPORARY FUNCTION ", + "myfunction(a FLOAT64, b INT64, c STRING) ", + "RETURNS INT64 ", + "REMOTE WITH CONNECTION us.myconnection ", + "OPTIONS(a = [1, 2])", + ), + ]; + for sql in sqls { + bigquery().verified_stmt(sql); + } + + let error_sqls = [ + ( + concat!( + "CREATE TEMPORARY FUNCTION myfunction() ", + "OPTIONS(a = [1, 2]) ", + "AS ((SELECT 1 FROM mytable)) ", + "OPTIONS(a = [1, 2])", + ), + "Expected: end of statement, found: OPTIONS", + ), + ( + concat!( + "CREATE TEMPORARY FUNCTION myfunction() ", + "IMMUTABLE ", + "AS ((SELECT 1 FROM mytable)) ", + ), + "Expected: AS, found: IMMUTABLE", + ), + ( + concat!( + "CREATE TEMPORARY FUNCTION myfunction() ", + "AS \"console.log('hello');\" ", + "LANGUAGE js ", + ), + "Expected: end of statement, found: LANGUAGE", + ), + ]; + for (sql, error) in error_sqls { + assert_eq!( + ParserError::ParserError(error.to_owned()), + bigquery().parse_sql_statements(sql).unwrap_err() + ); + } +} + #[test] fn test_bigquery_trim() { let real_sql = r#"SELECT customer_id, TRIM(item_price_id, '"', "a") AS item_price_id FROM models_staging.subscriptions"#; @@ -1970,7 +2123,7 @@ fn test_bigquery_trim() { // missing comma separation let error_sql = "SELECT TRIM('xyz' 'a')"; assert_eq!( - ParserError::ParserError("Expected ), found: 'a'".to_owned()), + ParserError::ParserError("Expected: ), found: 'a'".to_owned()), bigquery().parse_sql_statements(error_sql).unwrap_err() ); } diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 7150a9489..8344ec83d 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -21,8 +21,8 @@ use test_utils::*; use sqlparser::ast::Expr::{BinaryOp, Identifier, MapAccess}; use sqlparser::ast::SelectItem::UnnamedExpr; use sqlparser::ast::TableFactor::Table; +use sqlparser::ast::Value::Number; use sqlparser::ast::*; - use sqlparser::dialect::ClickHouseDialect; use sqlparser::dialect::GenericDialect; @@ -59,10 +59,12 @@ fn parse_map_access_expr() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: Some(BinaryOp { left: Box::new(BinaryOp { left: Box::new(Identifier(Ident::new("id"))), @@ -88,7 +90,7 @@ fn parse_map_access_expr() { right: Box::new(Expr::Value(Value::SingleQuotedString("foo".to_string()))), }), }), - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -161,6 +163,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); @@ -183,6 +186,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], @@ -211,13 +215,419 @@ fn parse_delimited_identifiers() { #[test] fn parse_create_table() { clickhouse().verified_stmt(r#"CREATE TABLE "x" ("a" "int") ENGINE=MergeTree ORDER BY ("x")"#); - clickhouse().one_statement_parses_to( - r#"CREATE TABLE "x" ("a" "int") ENGINE=MergeTree ORDER BY "x""#, - r#"CREATE TABLE "x" ("a" "int") ENGINE=MergeTree ORDER BY ("x")"#, - ); + clickhouse().verified_stmt(r#"CREATE TABLE "x" ("a" "int") ENGINE=MergeTree ORDER BY "x""#); clickhouse().verified_stmt( - r#"CREATE TABLE "x" ("a" "int") ENGINE=MergeTree ORDER BY ("x") AS SELECT * FROM "t" WHERE true"#, + r#"CREATE TABLE "x" ("a" "int") ENGINE=MergeTree ORDER BY "x" AS SELECT * FROM "t" WHERE true"#, + ); +} + +fn column_def(name: Ident, data_type: DataType) -> ColumnDef { + ColumnDef { + name, + data_type, + collation: None, + options: vec![], + } +} + +#[test] +fn parse_clickhouse_data_types() { + let sql = concat!( + "CREATE TABLE table (", + "a1 UInt8, a2 UInt16, a3 UInt32, a4 UInt64, a5 UInt128, a6 UInt256,", + " b1 Int8, b2 Int16, b3 Int32, b4 Int64, b5 Int128, b6 Int256,", + " c1 Float32, c2 Float64,", + " d1 Date32, d2 DateTime64(3), d3 DateTime64(3, 'UTC'),", + " e1 FixedString(255),", + " f1 LowCardinality(Int32)", + ") ORDER BY (a1)", + ); + // ClickHouse has a case-sensitive definition of data type, but canonical representation is not + let canonical_sql = sql + .replace(" Int8", " INT8") + .replace(" Int64", " INT64") + .replace(" Float64", " FLOAT64"); + + match clickhouse_and_generic().one_statement_parses_to(sql, &canonical_sql) { + Statement::CreateTable(CreateTable { name, columns, .. }) => { + assert_eq!(name, ObjectName(vec!["table".into()])); + assert_eq!( + columns, + vec![ + column_def("a1".into(), DataType::UInt8), + column_def("a2".into(), DataType::UInt16), + column_def("a3".into(), DataType::UInt32), + column_def("a4".into(), DataType::UInt64), + column_def("a5".into(), DataType::UInt128), + column_def("a6".into(), DataType::UInt256), + column_def("b1".into(), DataType::Int8(None)), + column_def("b2".into(), DataType::Int16), + column_def("b3".into(), DataType::Int32), + column_def("b4".into(), DataType::Int64), + column_def("b5".into(), DataType::Int128), + column_def("b6".into(), DataType::Int256), + column_def("c1".into(), DataType::Float32), + column_def("c2".into(), DataType::Float64), + column_def("d1".into(), DataType::Date32), + column_def("d2".into(), DataType::Datetime64(3, None)), + column_def("d3".into(), DataType::Datetime64(3, Some("UTC".into()))), + column_def("e1".into(), DataType::FixedString(255)), + column_def( + "f1".into(), + DataType::LowCardinality(Box::new(DataType::Int32)) + ), + ] + ); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_table_with_nullable() { + let sql = r#"CREATE TABLE table (k UInt8, `a` Nullable(String), `b` Nullable(DateTime64(9, 'UTC')), c Nullable(DateTime64(9)), d Date32 NULL) ENGINE=MergeTree ORDER BY (`k`)"#; + // ClickHouse has a case-sensitive definition of data type, but canonical representation is not + let canonical_sql = sql.replace("String", "STRING"); + + match clickhouse_and_generic().one_statement_parses_to(sql, &canonical_sql) { + Statement::CreateTable(CreateTable { name, columns, .. }) => { + assert_eq!(name, ObjectName(vec!["table".into()])); + assert_eq!( + columns, + vec![ + column_def("k".into(), DataType::UInt8), + column_def( + Ident::with_quote('`', "a"), + DataType::Nullable(Box::new(DataType::String(None))) + ), + column_def( + Ident::with_quote('`', "b"), + DataType::Nullable(Box::new(DataType::Datetime64( + 9, + Some("UTC".to_string()) + ))) + ), + column_def( + "c".into(), + DataType::Nullable(Box::new(DataType::Datetime64(9, None))) + ), + ColumnDef { + name: "d".into(), + data_type: DataType::Date32, + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Null + }], + } + ] + ); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_table_with_nested_data_types() { + let sql = concat!( + "CREATE TABLE table (", + " i Nested(a Array(Int16), b LowCardinality(String)),", + " k Array(Tuple(FixedString(128), Int128)),", + " l Tuple(a DateTime64(9), b Array(UUID)),", + " m Map(String, UInt16)", + ") ENGINE=MergeTree ORDER BY (k)" + ); + + match clickhouse().one_statement_parses_to(sql, "") { + Statement::CreateTable(CreateTable { name, columns, .. }) => { + assert_eq!(name, ObjectName(vec!["table".into()])); + assert_eq!( + columns, + vec![ + ColumnDef { + name: Ident::new("i"), + data_type: DataType::Nested(vec![ + column_def( + "a".into(), + DataType::Array(ArrayElemTypeDef::Parenthesis(Box::new( + DataType::Int16 + ),)) + ), + column_def( + "b".into(), + DataType::LowCardinality(Box::new(DataType::String(None))) + ) + ]), + collation: None, + options: vec![], + }, + ColumnDef { + name: Ident::new("k"), + data_type: DataType::Array(ArrayElemTypeDef::Parenthesis(Box::new( + DataType::Tuple(vec![ + StructField { + field_name: None, + field_type: DataType::FixedString(128) + }, + StructField { + field_name: None, + field_type: DataType::Int128 + } + ]) + ))), + collation: None, + options: vec![], + }, + ColumnDef { + name: Ident::new("l"), + data_type: DataType::Tuple(vec![ + StructField { + field_name: Some("a".into()), + field_type: DataType::Datetime64(9, None), + }, + StructField { + field_name: Some("b".into()), + field_type: DataType::Array(ArrayElemTypeDef::Parenthesis( + Box::new(DataType::Uuid) + )) + }, + ]), + collation: None, + options: vec![], + }, + ColumnDef { + name: Ident::new("m"), + data_type: DataType::Map( + Box::new(DataType::String(None)), + Box::new(DataType::UInt16) + ), + collation: None, + options: vec![], + }, + ] + ); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_table_with_primary_key() { + match clickhouse_and_generic().verified_stmt(concat!( + r#"CREATE TABLE db.table (`i` INT, `k` INT)"#, + " ENGINE=SharedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}')", + " PRIMARY KEY tuple(i)", + " ORDER BY tuple(i)", + )) { + Statement::CreateTable(CreateTable { + name, + columns, + engine, + primary_key, + order_by, + .. + }) => { + assert_eq!(name.to_string(), "db.table"); + assert_eq!( + vec![ + ColumnDef { + name: Ident::with_quote('`', "i"), + data_type: DataType::Int(None), + collation: None, + options: vec![], + }, + ColumnDef { + name: Ident::with_quote('`', "k"), + data_type: DataType::Int(None), + collation: None, + options: vec![], + }, + ], + columns + ); + assert_eq!( + engine, + Some(TableEngine { + name: "SharedMergeTree".to_string(), + parameters: Some(vec![ + Ident::with_quote('\'', "/clickhouse/tables/{uuid}/{shard}"), + Ident::with_quote('\'', "{replica}"), + ]), + }) + ); + fn assert_function(actual: &Function, name: &str, arg: &str) -> bool { + assert_eq!(actual.name, ObjectName(vec![Ident::new(name)])); + assert_eq!( + actual.args, + FunctionArguments::List(FunctionArgumentList { + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Identifier( + Ident::new(arg) + )),)], + duplicate_treatment: None, + clauses: vec![], + }) + ); + true + } + match primary_key.unwrap().as_ref() { + Expr::Function(primary_key) => { + assert!(assert_function(primary_key, "tuple", "i")); + } + _ => panic!("unexpected primary key type"), + } + match order_by { + Some(OneOrManyWithParens::One(Expr::Function(order_by))) => { + assert!(assert_function(&order_by, "tuple", "i")); + } + _ => panic!("unexpected order by type"), + }; + } + _ => unreachable!(), + } + + clickhouse_and_generic() + .parse_sql_statements(concat!( + r#"CREATE TABLE db.table (`i` Int, `k` Int)"#, + " ORDER BY tuple(i), tuple(k)", + )) + .expect_err("ORDER BY supports one expression with tuple"); +} + +#[test] +fn parse_create_table_with_variant_default_expressions() { + let sql = concat!( + "CREATE TABLE table (", + "a DATETIME MATERIALIZED now(),", + " b DATETIME EPHEMERAL now(),", + " c DATETIME EPHEMERAL,", + " d STRING ALIAS toString(c)", + ") ENGINE=MergeTree" ); + match clickhouse_and_generic().verified_stmt(sql) { + Statement::CreateTable(CreateTable { columns, .. }) => { + assert_eq!( + columns, + vec![ + ColumnDef { + name: Ident::new("a"), + data_type: DataType::Datetime(None), + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Materialized(Expr::Function(Function { + name: ObjectName(vec![Ident::new("now")]), + args: FunctionArguments::List(FunctionArgumentList { + args: vec![], + duplicate_treatment: None, + clauses: vec![], + }), + parameters: FunctionArguments::None, + null_treatment: None, + filter: None, + over: None, + within_group: vec![], + })) + }], + }, + ColumnDef { + name: Ident::new("b"), + data_type: DataType::Datetime(None), + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Ephemeral(Some(Expr::Function(Function { + name: ObjectName(vec![Ident::new("now")]), + args: FunctionArguments::List(FunctionArgumentList { + args: vec![], + duplicate_treatment: None, + clauses: vec![], + }), + parameters: FunctionArguments::None, + null_treatment: None, + filter: None, + over: None, + within_group: vec![], + }))) + }], + }, + ColumnDef { + name: Ident::new("c"), + data_type: DataType::Datetime(None), + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Ephemeral(None) + }], + }, + ColumnDef { + name: Ident::new("d"), + data_type: DataType::String(None), + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Alias(Expr::Function(Function { + name: ObjectName(vec![Ident::new("toString")]), + args: FunctionArguments::List(FunctionArgumentList { + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Identifier(Ident::new("c")) + ))], + duplicate_treatment: None, + clauses: vec![], + }), + parameters: FunctionArguments::None, + null_treatment: None, + filter: None, + over: None, + within_group: vec![], + })) + }], + } + ] + ) + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_view_with_fields_data_types() { + match clickhouse().verified_stmt(r#"CREATE VIEW v (i "int", f "String") AS SELECT * FROM t"#) { + Statement::CreateView { name, columns, .. } => { + assert_eq!(name, ObjectName(vec!["v".into()])); + assert_eq!( + columns, + vec![ + ViewColumnDef { + name: "i".into(), + data_type: Some(DataType::Custom( + ObjectName(vec![Ident { + value: "int".into(), + quote_style: Some('"') + }]), + vec![] + )), + options: None + }, + ViewColumnDef { + name: "f".into(), + data_type: Some(DataType::Custom( + ObjectName(vec![Ident { + value: "String".into(), + quote_style: Some('"') + }]), + vec![] + )), + options: None + }, + ] + ); + } + _ => unreachable!(), + } + + clickhouse() + .parse_sql_statements(r#"CREATE VIEW v (i, f) AS SELECT * FROM t"#) + .expect_err("CREATE VIEW with fields and without data types should be invalid"); } #[test] @@ -238,11 +648,96 @@ fn parse_limit_by() { ); } +#[test] +fn parse_settings_in_query() { + match clickhouse_and_generic() + .verified_stmt(r#"SELECT * FROM t SETTINGS max_threads = 1, max_block_size = 10000"#) + { + Statement::Query(query) => { + assert_eq!( + query.settings, + Some(vec![ + Setting { + key: Ident::new("max_threads"), + value: Number("1".parse().unwrap(), false) + }, + Setting { + key: Ident::new("max_block_size"), + value: Number("10000".parse().unwrap(), false) + }, + ]) + ); + } + _ => unreachable!(), + } + + let invalid_cases = vec![ + "SELECT * FROM t SETTINGS a", + "SELECT * FROM t SETTINGS a=", + "SELECT * FROM t SETTINGS a=1, b", + "SELECT * FROM t SETTINGS a=1, b=", + "SELECT * FROM t SETTINGS a=1, b=c", + ]; + for sql in invalid_cases { + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("Expected: SETTINGS key = value, found: "); + } +} #[test] fn parse_select_star_except() { clickhouse().verified_stmt("SELECT * EXCEPT (prev_status) FROM anomalies"); } +#[test] +fn parse_select_parametric_function() { + match clickhouse_and_generic().verified_stmt("SELECT HISTOGRAM(0.5, 0.6)(x, y) FROM t") { + Statement::Query(query) => { + let projection: &Vec = query.body.as_select().unwrap().projection.as_ref(); + assert_eq!(projection.len(), 1); + match &projection[0] { + UnnamedExpr(Expr::Function(f)) => { + let args = match &f.args { + FunctionArguments::List(ref args) => args, + _ => unreachable!(), + }; + assert_eq!(args.args.len(), 2); + assert_eq!( + args.args[0], + FunctionArg::Unnamed(FunctionArgExpr::Expr(Identifier(Ident::from("x")))) + ); + assert_eq!( + args.args[1], + FunctionArg::Unnamed(FunctionArgExpr::Expr(Identifier(Ident::from("y")))) + ); + + let parameters = match f.parameters { + FunctionArguments::List(ref args) => args, + _ => unreachable!(), + }; + assert_eq!(parameters.args.len(), 2); + assert_eq!( + parameters.args[0], + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(Value::Number( + "0.5".parse().unwrap(), + false + )))) + ); + assert_eq!( + parameters.args[1], + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(Value::Number( + "0.6".parse().unwrap(), + false + )))) + ); + } + _ => unreachable!(), + } + } + _ => unreachable!(), + } +} + #[test] fn parse_select_star_except_no_parens() { clickhouse().one_statement_parses_to( @@ -251,6 +746,383 @@ fn parse_select_star_except_no_parens() { ); } +#[test] +fn parse_create_materialized_view() { + // example sql + // https://clickhouse.com/docs/en/guides/developer/cascading-materialized-views + let sql = concat!( + "CREATE MATERIALIZED VIEW analytics.monthly_aggregated_data_mv ", + "TO analytics.monthly_aggregated_data ", + "AS SELECT toDate(toStartOfMonth(event_time)) ", + "AS month, domain_name, sumState(count_views) ", + "AS sumCountViews FROM analytics.hourly_data ", + "GROUP BY domain_name, month" + ); + clickhouse_and_generic().verified_stmt(sql); +} + +#[test] +fn parse_group_by_with_modifier() { + let clauses = ["x", "a, b", "ALL"]; + let modifiers = [ + "WITH ROLLUP", + "WITH CUBE", + "WITH TOTALS", + "WITH ROLLUP WITH CUBE", + ]; + let expected_modifiers = [ + vec![GroupByWithModifier::Rollup], + vec![GroupByWithModifier::Cube], + vec![GroupByWithModifier::Totals], + vec![GroupByWithModifier::Rollup, GroupByWithModifier::Cube], + ]; + for clause in &clauses { + for (modifier, expected_modifier) in modifiers.iter().zip(expected_modifiers.iter()) { + let sql = format!("SELECT * FROM t GROUP BY {clause} {modifier}"); + match clickhouse_and_generic().verified_stmt(&sql) { + Statement::Query(query) => { + let group_by = &query.body.as_select().unwrap().group_by; + if clause == &"ALL" { + assert_eq!(group_by, &GroupByExpr::All(expected_modifier.to_vec())); + } else { + assert_eq!( + group_by, + &GroupByExpr::Expressions( + clause + .split(", ") + .map(|c| Identifier(Ident::new(c))) + .collect(), + expected_modifier.to_vec() + ) + ); + } + } + _ => unreachable!(), + } + } + } + + // invalid cases + let invalid_cases = [ + "SELECT * FROM t GROUP BY x WITH", + "SELECT * FROM t GROUP BY x WITH ROLLUP CUBE", + "SELECT * FROM t GROUP BY x WITH WITH ROLLUP", + "SELECT * FROM t GROUP BY WITH ROLLUP", + ]; + for sql in invalid_cases { + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("Expected: one of ROLLUP or CUBE or TOTALS, found: WITH"); + } +} + +#[test] +fn parse_select_order_by_with_fill_interpolate() { + let sql = "SELECT id, fname, lname FROM customer WHERE id < 5 \ + ORDER BY \ + fname ASC NULLS FIRST WITH FILL FROM 10 TO 20 STEP 2, \ + lname DESC NULLS LAST WITH FILL FROM 30 TO 40 STEP 3 \ + INTERPOLATE (col1 AS col1 + 1) \ + LIMIT 2"; + let select = clickhouse().verified_query(sql); + assert_eq!( + OrderBy { + exprs: vec![ + OrderByExpr { + expr: Expr::Identifier(Ident::new("fname")), + asc: Some(true), + nulls_first: Some(true), + with_fill: Some(WithFill { + from: Some(Expr::Value(number("10"))), + to: Some(Expr::Value(number("20"))), + step: Some(Expr::Value(number("2"))), + }), + }, + OrderByExpr { + expr: Expr::Identifier(Ident::new("lname")), + asc: Some(false), + nulls_first: Some(false), + with_fill: Some(WithFill { + from: Some(Expr::Value(number("30"))), + to: Some(Expr::Value(number("40"))), + step: Some(Expr::Value(number("3"))), + }), + }, + ], + interpolate: Some(Interpolate { + exprs: Some(vec![InterpolateExpr { + column: Ident::new("col1"), + expr: Some(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("col1"))), + op: BinaryOperator::Plus, + right: Box::new(Expr::Value(number("1"))), + }), + }]) + }) + }, + select.order_by.expect("ORDER BY expected") + ); + assert_eq!(Some(Expr::Value(number("2"))), select.limit); +} + +#[test] +fn parse_select_order_by_with_fill_interpolate_multi_interpolates() { + let sql = "SELECT id, fname, lname FROM customer ORDER BY fname WITH FILL \ + INTERPOLATE (col1 AS col1 + 1) INTERPOLATE (col2 AS col2 + 2)"; + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("ORDER BY only accepts a single INTERPOLATE clause"); +} + +#[test] +fn parse_select_order_by_with_fill_interpolate_multi_with_fill_interpolates() { + let sql = "SELECT id, fname, lname FROM customer \ + ORDER BY \ + fname WITH FILL INTERPOLATE (col1 AS col1 + 1), \ + lname WITH FILL INTERPOLATE (col2 AS col2 + 2)"; + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("ORDER BY only accepts a single INTERPOLATE clause"); +} + +#[test] +fn parse_select_order_by_interpolate_not_last() { + let sql = "SELECT id, fname, lname FROM customer \ + ORDER BY \ + fname INTERPOLATE (col2 AS col2 + 2), + lname"; + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("ORDER BY INTERPOLATE must be in the last position"); +} + +#[test] +fn parse_with_fill() { + let sql = "SELECT fname FROM customer ORDER BY fname \ + WITH FILL FROM 10 TO 20 STEP 2"; + let select = clickhouse().verified_query(sql); + assert_eq!( + Some(WithFill { + from: Some(Expr::Value(number("10"))), + to: Some(Expr::Value(number("20"))), + step: Some(Expr::Value(number("2"))), + }), + select.order_by.expect("ORDER BY expected").exprs[0].with_fill + ); +} + +#[test] +fn parse_with_fill_missing_single_argument() { + let sql = "SELECT id, fname, lname FROM customer ORDER BY \ + fname WITH FILL FROM TO 20"; + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("WITH FILL requires expressions for all arguments"); +} + +#[test] +fn parse_with_fill_multiple_incomplete_arguments() { + let sql = "SELECT id, fname, lname FROM customer ORDER BY \ + fname WITH FILL FROM TO 20, lname WITH FILL FROM TO STEP 1"; + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("WITH FILL requires expressions for all arguments"); +} + +#[test] +fn parse_interpolate_body_with_columns() { + let sql = "SELECT fname FROM customer ORDER BY fname WITH FILL \ + INTERPOLATE (col1 AS col1 + 1, col2 AS col3, col4 AS col4 + 4)"; + let select = clickhouse().verified_query(sql); + assert_eq!( + Some(Interpolate { + exprs: Some(vec![ + InterpolateExpr { + column: Ident::new("col1"), + expr: Some(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("col1"))), + op: BinaryOperator::Plus, + right: Box::new(Expr::Value(number("1"))), + }), + }, + InterpolateExpr { + column: Ident::new("col2"), + expr: Some(Expr::Identifier(Ident::new("col3"))), + }, + InterpolateExpr { + column: Ident::new("col4"), + expr: Some(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("col4"))), + op: BinaryOperator::Plus, + right: Box::new(Expr::Value(number("4"))), + }), + }, + ]) + }), + select.order_by.expect("ORDER BY expected").interpolate + ); +} + +#[test] +fn parse_interpolate_without_body() { + let sql = "SELECT fname FROM customer ORDER BY fname WITH FILL INTERPOLATE"; + let select = clickhouse().verified_query(sql); + assert_eq!( + Some(Interpolate { exprs: None }), + select.order_by.expect("ORDER BY expected").interpolate + ); +} + +#[test] +fn parse_interpolate_with_empty_body() { + let sql = "SELECT fname FROM customer ORDER BY fname WITH FILL INTERPOLATE ()"; + let select = clickhouse().verified_query(sql); + assert_eq!( + Some(Interpolate { + exprs: Some(vec![]) + }), + select.order_by.expect("ORDER BY expected").interpolate + ); +} + +#[test] +fn test_prewhere() { + match clickhouse_and_generic().verified_stmt("SELECT * FROM t PREWHERE x = 1 WHERE y = 2") { + Statement::Query(query) => { + let prewhere = query.body.as_select().unwrap().prewhere.as_ref(); + assert_eq!( + prewhere, + Some(&BinaryOp { + left: Box::new(Identifier(Ident::new("x"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(Value::Number("1".parse().unwrap(), false))), + }) + ); + let selection = query.as_ref().body.as_select().unwrap().selection.as_ref(); + assert_eq!( + selection, + Some(&BinaryOp { + left: Box::new(Identifier(Ident::new("y"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(Value::Number("2".parse().unwrap(), false))), + }) + ); + } + _ => unreachable!(), + } + + match clickhouse_and_generic().verified_stmt("SELECT * FROM t PREWHERE x = 1 AND y = 2") { + Statement::Query(query) => { + let prewhere = query.body.as_select().unwrap().prewhere.as_ref(); + assert_eq!( + prewhere, + Some(&BinaryOp { + left: Box::new(BinaryOp { + left: Box::new(Identifier(Ident::new("x"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(Value::Number("1".parse().unwrap(), false))), + }), + op: BinaryOperator::And, + right: Box::new(BinaryOp { + left: Box::new(Identifier(Ident::new("y"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(Value::Number("2".parse().unwrap(), false))), + }), + }) + ); + } + _ => unreachable!(), + } +} + +#[test] +fn test_query_with_format_clause() { + let format_options = vec!["TabSeparated", "JSONCompact", "NULL"]; + for format in &format_options { + let sql = format!("SELECT * FROM t FORMAT {}", format); + match clickhouse_and_generic().verified_stmt(&sql) { + Statement::Query(query) => { + if *format == "NULL" { + assert_eq!(query.format_clause, Some(FormatClause::Null)); + } else { + assert_eq!( + query.format_clause, + Some(FormatClause::Identifier(Ident::new(*format))) + ); + } + } + _ => unreachable!(), + } + } + + let invalid_cases = [ + "SELECT * FROM t FORMAT", + "SELECT * FROM t FORMAT TabSeparated JSONCompact", + "SELECT * FROM t FORMAT TabSeparated TabSeparated", + ]; + for sql in &invalid_cases { + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("Expected: FORMAT {identifier}, found: "); + } +} + +#[test] +fn parse_create_table_on_commit_and_as_query() { + let sql = r#"CREATE LOCAL TEMPORARY TABLE test ON COMMIT PRESERVE ROWS AS SELECT 1"#; + match clickhouse_and_generic().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + on_commit, + query, + .. + }) => { + assert_eq!(name.to_string(), "test"); + assert_eq!(on_commit, Some(OnCommit::PreserveRows)); + assert_eq!( + query.unwrap().body.as_select().unwrap().projection, + vec![UnnamedExpr(Expr::Value(Value::Number( + "1".parse().unwrap(), + false + )))] + ); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_select_table_function_settings() { + let sql = r#"SELECT * FROM table_function(arg, SETTINGS setting = 3)"#; + match clickhouse_and_generic().verified_stmt(sql) { + Statement::Query(q) => { + let from = &q.body.as_select().unwrap().from; + assert_eq!(from.len(), 1); + assert_eq!(from[0].joins, vec![]); + match &from[0].relation { + Table { args, .. } => { + let args = args.as_ref().unwrap(); + assert_eq!( + args.args, + vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier("arg".into()) + ))] + ); + assert_eq!( + args.settings, + Some(vec![Setting { + key: "setting".into(), + value: Value::Number("3".into(), false) + }]) + ) + } + _ => unreachable!(), + } + } + _ => unreachable!(), + } +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 6668ce8f4..dd3ed0515 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -42,6 +42,7 @@ mod test_utils; #[cfg(test)] use pretty_assertions::assert_eq; +use sqlparser::ast::Expr::Identifier; use sqlparser::test_utils::all_dialects_except; #[test] @@ -115,7 +116,7 @@ fn parse_replace_into() { let sql = "REPLACE INTO public.customer (id, name, active) VALUES (1, 2, 3)"; assert_eq!( - ParserError::ParserError("Unsupported statement REPLACE at Line: 1, Column 9".to_string()), + ParserError::ParserError("Unsupported statement REPLACE at Line: 1, Column: 9".to_string()), Parser::parse_sql(&dialect, sql,).unwrap_err(), ) } @@ -199,7 +200,7 @@ fn parse_insert_default_values() { let insert_with_columns_and_default_values = "INSERT INTO test_table (test_col) DEFAULT VALUES"; assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: DEFAULT".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: DEFAULT".to_string() ), parse_sql_statements(insert_with_columns_and_default_values).unwrap_err() ); @@ -207,20 +208,20 @@ fn parse_insert_default_values() { let insert_with_default_values_and_hive_after_columns = "INSERT INTO test_table DEFAULT VALUES (some_column)"; assert_eq!( - ParserError::ParserError("Expected end of statement, found: (".to_string()), + ParserError::ParserError("Expected: end of statement, found: (".to_string()), parse_sql_statements(insert_with_default_values_and_hive_after_columns).unwrap_err() ); let insert_with_default_values_and_hive_partition = "INSERT INTO test_table DEFAULT VALUES PARTITION (some_column)"; assert_eq!( - ParserError::ParserError("Expected end of statement, found: PARTITION".to_string()), + ParserError::ParserError("Expected: end of statement, found: PARTITION".to_string()), parse_sql_statements(insert_with_default_values_and_hive_partition).unwrap_err() ); let insert_with_default_values_and_values_list = "INSERT INTO test_table DEFAULT VALUES (1)"; assert_eq!( - ParserError::ParserError("Expected end of statement, found: (".to_string()), + ParserError::ParserError("Expected: end of statement, found: (".to_string()), parse_sql_statements(insert_with_default_values_and_values_list).unwrap_err() ); } @@ -296,15 +297,15 @@ fn parse_update() { assignments, vec![ Assignment { - id: vec!["a".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["a".into()])), value: Expr::Value(number("1")), }, Assignment { - id: vec!["b".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["b".into()])), value: Expr::Value(number("2")), }, Assignment { - id: vec!["c".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["c".into()])), value: Expr::Value(number("3")), }, ] @@ -319,14 +320,14 @@ fn parse_update() { let sql = "UPDATE t WHERE 1"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected SET, found: WHERE".to_string()), + ParserError::ParserError("Expected: SET, found: WHERE".to_string()), res.unwrap_err() ); let sql = "UPDATE t SET a = 1 extrabadstuff"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected end of statement, found: extrabadstuff".to_string()), + ParserError::ParserError("Expected: end of statement, found: extrabadstuff".to_string()), res.unwrap_err() ); } @@ -359,11 +360,12 @@ fn parse_update_set_from() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }, assignments: vec![Assignment { - id: vec![Ident::new("name")], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new("name")])), value: Expr::CompoundIdentifier(vec![Ident::new("t2"), Ident::new("name")]) }], from: Some(TableWithJoins { @@ -387,14 +389,17 @@ fn parse_update_set_from() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: None, - group_by: GroupByExpr::Expressions(vec![Expr::Identifier(Ident::new( - "id" - ))]), + group_by: GroupByExpr::Expressions( + vec![Expr::Identifier(Ident::new("id"))], + vec![] + ), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -405,13 +410,15 @@ fn parse_update_set_from() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, }), alias: Some(TableAlias { name: Ident::new("t2"), @@ -459,6 +466,7 @@ fn parse_update_with_table_alias() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }, @@ -466,7 +474,10 @@ fn parse_update_with_table_alias() { ); assert_eq!( vec![Assignment { - id: vec![Ident::new("u"), Ident::new("username")], + target: AssignmentTarget::ColumnName(ObjectName(vec![ + Ident::new("u"), + Ident::new("username") + ])), value: Expr::Value(Value::SingleQuotedString("new_user".to_string())), }], assignments @@ -523,6 +534,7 @@ fn parse_select_with_table_alias() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }] @@ -559,6 +571,7 @@ fn parse_delete_statement() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation ); @@ -574,7 +587,7 @@ fn parse_delete_without_from_error() { let dialects = all_dialects_except(|d| d.is::() || d.is::()); let res = dialects.parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected FROM, found: WHERE".to_string()), + ParserError::ParserError("Expected: FROM, found: WHERE".to_string()), res.unwrap_err() ); } @@ -605,6 +618,7 @@ fn parse_delete_statement_for_multi_tables() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation ); @@ -616,6 +630,7 @@ fn parse_delete_statement_for_multi_tables() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].joins[0].relation ); @@ -641,6 +656,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation ); @@ -652,6 +668,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[1].relation ); @@ -663,6 +680,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, using[0].relation ); @@ -674,6 +692,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, using[0].joins[0].relation ); @@ -704,6 +723,7 @@ fn parse_where_delete_statement() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation, ); @@ -748,6 +768,7 @@ fn parse_where_delete_with_alias_statement() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation, ); @@ -763,6 +784,7 @@ fn parse_where_delete_with_alias_statement() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }]), @@ -889,7 +911,7 @@ fn parse_select_distinct_on() { fn parse_select_distinct_missing_paren() { let result = parse_sql_statements("SELECT DISTINCT (name, id FROM customer"); assert_eq!( - ParserError::ParserError("Expected ), found: FROM".to_string()), + ParserError::ParserError("Expected: ), found: FROM".to_string()), result.unwrap_err(), ); } @@ -933,7 +955,7 @@ fn parse_select_into() { let sql = "SELECT * INTO table0 asdf FROM table1"; let result = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected end of statement, found: asdf".to_string()), + ParserError::ParserError("Expected: end of statement, found: asdf".to_string()), result.unwrap_err() ) } @@ -970,7 +992,7 @@ fn parse_select_wildcard() { let sql = "SELECT * + * FROM foo;"; let result = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected end of statement, found: +".to_string()), + ParserError::ParserError("Expected: end of statement, found: +".to_string()), result.unwrap_err(), ); } @@ -999,7 +1021,7 @@ fn parse_column_aliases() { assert_eq!(&Expr::Value(number("1")), right.as_ref()); assert_eq!(&Ident::new("newname"), alias); } else { - panic!("Expected ExprWithAlias") + panic!("Expected: ExprWithAlias") } // alias without AS is parsed correctly: @@ -1010,13 +1032,13 @@ fn parse_column_aliases() { fn test_eof_after_as() { let res = parse_sql_statements("SELECT foo AS"); assert_eq!( - ParserError::ParserError("Expected an identifier after AS, found: EOF".to_string()), + ParserError::ParserError("Expected: an identifier after AS, found: EOF".to_string()), res.unwrap_err() ); let res = parse_sql_statements("SELECT 1 FROM foo AS"); assert_eq!( - ParserError::ParserError("Expected an identifier after AS, found: EOF".to_string()), + ParserError::ParserError("Expected: an identifier after AS, found: EOF".to_string()), res.unwrap_err() ); } @@ -1042,6 +1064,7 @@ fn parse_select_count_wildcard() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard)], @@ -1063,6 +1086,7 @@ fn parse_select_count_distinct() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: Some(DuplicateTreatment::Distinct), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::UnaryOp { @@ -1101,7 +1125,7 @@ fn parse_not() { fn parse_invalid_infix_not() { let res = parse_sql_statements("SELECT c FROM t WHERE c NOT ("); assert_eq!( - ParserError::ParserError("Expected end of statement, found: NOT".to_string()), + ParserError::ParserError("Expected: end of statement, found: NOT".to_string()), res.unwrap_err(), ); } @@ -1174,11 +1198,11 @@ fn parse_exponent_in_select() -> Result<(), ParserError> { let select = match select.pop().unwrap() { Statement::Query(inner) => *inner, - _ => panic!("Expected Query"), + _ => panic!("Expected: Query"), }; let select = match *select.body { SetExpr::Select(inner) => *inner, - _ => panic!("Expected SetExpr::Select"), + _ => panic!("Expected: SetExpr::Select"), }; assert_eq!( @@ -1807,7 +1831,7 @@ fn parse_in_error() { let sql = "SELECT * FROM customers WHERE segment in segment"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected (, found: segment".to_string()), + ParserError::ParserError("Expected: (, found: segment".to_string()), res.unwrap_err() ); } @@ -2020,14 +2044,14 @@ fn parse_tuple_invalid() { let sql = "select (1"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected ), found: EOF".to_string()), + ParserError::ParserError("Expected: ), found: EOF".to_string()), res.unwrap_err() ); let sql = "select (), 2"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected an expression:, found: )".to_string()), + ParserError::ParserError("Expected: an expression:, found: )".to_string()), res.unwrap_err() ); } @@ -2042,19 +2066,22 @@ fn parse_select_order_by() { expr: Expr::Identifier(Ident::new("lname")), asc: Some(true), nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("fname")), asc: Some(false), nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("id")), asc: None, nulls_first: None, + with_fill: None, }, ], - select.order_by + select.order_by.expect("ORDER BY expected").exprs ); } chk("SELECT id, fname, lname FROM customer WHERE id < 5 ORDER BY lname ASC, fname DESC, id"); @@ -2074,14 +2101,16 @@ fn parse_select_order_by_limit() { expr: Expr::Identifier(Ident::new("lname")), asc: Some(true), nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("fname")), asc: Some(false), nulls_first: None, + with_fill: None, }, ], - select.order_by + select.order_by.expect("ORDER BY expected").exprs ); assert_eq!(Some(Expr::Value(number("2"))), select.limit); } @@ -2097,14 +2126,16 @@ fn parse_select_order_by_nulls_order() { expr: Expr::Identifier(Ident::new("lname")), asc: Some(true), nulls_first: Some(true), + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("fname")), asc: Some(false), nulls_first: Some(false), + with_fill: None, }, ], - select.order_by + select.order_by.expect("ORDER BY expeccted").exprs ); assert_eq!(Some(Expr::Value(number("2"))), select.limit); } @@ -2114,10 +2145,13 @@ fn parse_select_group_by() { let sql = "SELECT id, fname, lname FROM customer GROUP BY lname, fname"; let select = verified_only_select(sql); assert_eq!( - GroupByExpr::Expressions(vec![ - Expr::Identifier(Ident::new("lname")), - Expr::Identifier(Ident::new("fname")), - ]), + GroupByExpr::Expressions( + vec![ + Expr::Identifier(Ident::new("lname")), + Expr::Identifier(Ident::new("fname")), + ], + vec![] + ), select.group_by ); @@ -2132,7 +2166,7 @@ fn parse_select_group_by() { fn parse_select_group_by_all() { let sql = "SELECT id, fname, lname, SUM(order) FROM customer GROUP BY ALL"; let select = verified_only_select(sql); - assert_eq!(GroupByExpr::All, select.group_by); + assert_eq!(GroupByExpr::All(vec![]), select.group_by); one_statement_parses_to( "SELECT id, fname, lname, SUM(order) FROM customer GROUP BY ALL", @@ -2148,6 +2182,7 @@ fn parse_select_having() { Some(Expr::BinaryOp { left: Box::new(Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard)], @@ -2177,6 +2212,7 @@ fn parse_select_qualify() { Some(Expr::BinaryOp { left: Box::new(Expr::Function(Function { name: ObjectName(vec![Ident::new("ROW_NUMBER")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], @@ -2191,6 +2227,7 @@ fn parse_select_qualify() { expr: Expr::Identifier(Ident::new("o")), asc: None, nulls_first: None, + with_fill: None, }], window_frame: None, })), @@ -2439,7 +2476,7 @@ fn parse_extract() { let dialects = all_dialects_except(|d| d.is::() || d.is::()); let res = dialects.parse_sql_statements("SELECT EXTRACT(JIFFY FROM d)"); assert_eq!( - ParserError::ParserError("Expected date/time field, found: JIFFY".to_string()), + ParserError::ParserError("Expected: date/time field, found: JIFFY".to_string()), res.unwrap_err() ); } @@ -2478,7 +2515,7 @@ fn parse_ceil_datetime() { let dialects = all_dialects_except(|d| d.is::() || d.is::()); let res = dialects.parse_sql_statements("SELECT CEIL(d TO JIFFY) FROM df"); assert_eq!( - ParserError::ParserError("Expected date/time field, found: JIFFY".to_string()), + ParserError::ParserError("Expected: date/time field, found: JIFFY".to_string()), res.unwrap_err() ); } @@ -2505,7 +2542,7 @@ fn parse_floor_datetime() { let dialects = all_dialects_except(|d| d.is::() || d.is::()); let res = dialects.parse_sql_statements("SELECT FLOOR(d TO JIFFY) FROM df"); assert_eq!( - ParserError::ParserError("Expected date/time field, found: JIFFY".to_string()), + ParserError::ParserError("Expected: date/time field, found: JIFFY".to_string()), res.unwrap_err() ); } @@ -2520,6 +2557,7 @@ fn parse_listagg() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("LISTAGG")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: Some(DuplicateTreatment::Distinct), args: vec![ @@ -2550,6 +2588,7 @@ fn parse_listagg() { }), asc: None, nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident { @@ -2558,6 +2597,7 @@ fn parse_listagg() { }), asc: None, nulls_first: None, + with_fill: None, }, ] }), @@ -2706,7 +2746,7 @@ fn parse_window_function_null_treatment_arg() { let sql = "SELECT LAG(1 IGNORE NULLS) IGNORE NULLS OVER () FROM t1"; assert_eq!( dialects.parse_sql_statements(sql).unwrap_err(), - ParserError::ParserError("Expected end of statement, found: NULLS".to_string()) + ParserError::ParserError("Expected: end of statement, found: NULLS".to_string()) ); let sql = "SELECT LAG(1 IGNORE NULLS) IGNORE NULLS OVER () FROM t1"; @@ -2714,7 +2754,7 @@ fn parse_window_function_null_treatment_arg() { all_dialects_where(|d| !d.supports_window_function_null_treatment_arg()) .parse_sql_statements(sql) .unwrap_err(), - ParserError::ParserError("Expected ), found: IGNORE".to_string()) + ParserError::ParserError("Expected: ), found: IGNORE".to_string()) ); } @@ -2747,7 +2787,7 @@ fn parse_create_table() { FOREIGN KEY (lng) REFERENCES othertable4(longitude) ON UPDATE SET NULL)", ); match ast { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, columns, constraints, @@ -2757,7 +2797,7 @@ fn parse_create_table() { file_format: None, location: None, .. - } => { + }) => { assert_eq!("uk_cities", name.to_string()); assert_eq!( columns, @@ -2904,13 +2944,13 @@ fn parse_create_table() { assert!(res .unwrap_err() .to_string() - .contains("Expected \',\' or \')\' after column definition, found: GARBAGE")); + .contains("Expected: \',\' or \')\' after column definition, found: GARBAGE")); let res = parse_sql_statements("CREATE TABLE t (a int NOT NULL CONSTRAINT foo)"); assert!(res .unwrap_err() .to_string() - .contains("Expected constraint details after CONSTRAINT ")); + .contains("Expected: constraint details after CONSTRAINT ")); } #[test] @@ -2936,7 +2976,7 @@ fn parse_create_table_with_constraint_characteristics() { FOREIGN KEY (lng) REFERENCES othertable4(longitude) ON UPDATE SET NULL NOT DEFERRABLE INITIALLY IMMEDIATE ENFORCED)", ); match ast { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, columns, constraints, @@ -2946,7 +2986,7 @@ fn parse_create_table_with_constraint_characteristics() { file_format: None, location: None, .. - } => { + }) => { assert_eq!("uk_cities", name.to_string()); assert_eq!( columns, @@ -3049,7 +3089,7 @@ fn parse_create_table_with_constraint_characteristics() { assert!(res .unwrap_err() .to_string() - .contains("Expected \',\' or \')\' after column definition, found: NOT")); + .contains("Expected: \',\' or \')\' after column definition, found: NOT")); let res = parse_sql_statements("CREATE TABLE t ( a int NOT NULL, @@ -3058,7 +3098,7 @@ fn parse_create_table_with_constraint_characteristics() { assert!(res .unwrap_err() .to_string() - .contains("Expected \',\' or \')\' after column definition, found: ENFORCED")); + .contains("Expected: \',\' or \')\' after column definition, found: ENFORCED")); let res = parse_sql_statements("CREATE TABLE t ( a int NOT NULL, @@ -3067,7 +3107,7 @@ fn parse_create_table_with_constraint_characteristics() { assert!(res .unwrap_err() .to_string() - .contains("Expected \',\' or \')\' after column definition, found: INITIALLY")); + .contains("Expected: \',\' or \')\' after column definition, found: INITIALLY")); } #[test] @@ -3104,7 +3144,7 @@ fn parse_create_table_column_constraint_characteristics() { }; match ast { - Statement::CreateTable { columns, .. } => { + Statement::CreateTable(CreateTable { columns, .. }) => { assert_eq!( columns, vec![ColumnDef { @@ -3158,7 +3198,7 @@ fn parse_create_table_column_constraint_characteristics() { assert!(res .unwrap_err() .to_string() - .contains("Expected one of DEFERRED or IMMEDIATE, found: BADVALUE")); + .contains("Expected: one of DEFERRED or IMMEDIATE, found: BADVALUE")); let res = parse_sql_statements( "CREATE TABLE t (a int NOT NULL UNIQUE INITIALLY IMMEDIATE DEFERRABLE INITIALLY DEFERRED)", @@ -3214,12 +3254,12 @@ fn parse_create_table_hive_array() { }; match dialects.one_statement_parses_to(sql.as_str(), sql.as_str()) { - Statement::CreateTable { + Statement::CreateTable(CreateTable { if_not_exists, name, columns, .. - } => { + }) => { assert!(if_not_exists); assert_eq!(name, ObjectName(vec!["something".into()])); assert_eq!( @@ -3257,7 +3297,7 @@ fn parse_create_table_hive_array() { assert_eq!( dialects.parse_sql_statements(sql).unwrap_err(), - ParserError::ParserError("Expected >, found: )".to_string()) + ParserError::ParserError("Expected: >, found: )".to_string()) ); } @@ -3373,7 +3413,7 @@ fn parse_create_table_as() { let sql = "CREATE TABLE t AS SELECT * FROM a"; match verified_stmt(sql) { - Statement::CreateTable { name, query, .. } => { + Statement::CreateTable(CreateTable { name, query, .. }) => { assert_eq!(name.to_string(), "t".to_string()); assert_eq!(query, Some(Box::new(verified_query("SELECT * FROM a")))); } @@ -3385,7 +3425,7 @@ fn parse_create_table_as() { // (without data types) in a CTAS, but we have yet to support that. let sql = "CREATE TABLE t (a INT, b INT) AS SELECT 1 AS b, 2 AS a"; match verified_stmt(sql) { - Statement::CreateTable { columns, query, .. } => { + Statement::CreateTable(CreateTable { columns, query, .. }) => { assert_eq!(columns.len(), 2); assert_eq!(columns[0].to_string(), "a INT".to_string()); assert_eq!(columns[1].to_string(), "b INT".to_string()); @@ -3408,17 +3448,19 @@ fn parse_create_table_as_table() { table_name: Some("old_table".to_string()), schema_name: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, }); match verified_stmt(sql1) { - Statement::CreateTable { query, name, .. } => { + Statement::CreateTable(CreateTable { query, name, .. }) => { assert_eq!(name, ObjectName(vec![Ident::new("new_table")])); assert_eq!(query.unwrap(), expected_query1); } @@ -3433,17 +3475,19 @@ fn parse_create_table_as_table() { table_name: Some("old_table".to_string()), schema_name: Some("schema_name".to_string()), }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, }); match verified_stmt(sql2) { - Statement::CreateTable { query, name, .. } => { + Statement::CreateTable(CreateTable { query, name, .. }) => { assert_eq!(name, ObjectName(vec![Ident::new("new_table")])); assert_eq!(query.unwrap(), expected_query2); } @@ -3453,10 +3497,15 @@ fn parse_create_table_as_table() { #[test] fn parse_create_table_on_cluster() { + let generic = TestedDialects { + dialects: vec![Box::new(GenericDialect {})], + options: None, + }; + // Using single-quote literal to define current cluster let sql = "CREATE TABLE t ON CLUSTER '{cluster}' (a INT, b INT)"; - match verified_stmt(sql) { - Statement::CreateTable { on_cluster, .. } => { + match generic.verified_stmt(sql) { + Statement::CreateTable(CreateTable { on_cluster, .. }) => { assert_eq!(on_cluster.unwrap(), "{cluster}".to_string()); } _ => unreachable!(), @@ -3464,8 +3513,8 @@ fn parse_create_table_on_cluster() { // Using explicitly declared cluster name let sql = "CREATE TABLE t ON CLUSTER my_cluster (a INT, b INT)"; - match verified_stmt(sql) { - Statement::CreateTable { on_cluster, .. } => { + match generic.verified_stmt(sql) { + Statement::CreateTable(CreateTable { on_cluster, .. }) => { assert_eq!(on_cluster.unwrap(), "my_cluster".to_string()); } _ => unreachable!(), @@ -3477,9 +3526,9 @@ fn parse_create_or_replace_table() { let sql = "CREATE OR REPLACE TABLE t (a INT)"; match verified_stmt(sql) { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, or_replace, .. - } => { + }) => { assert_eq!(name.to_string(), "t".to_string()); assert!(or_replace); } @@ -3488,7 +3537,7 @@ fn parse_create_or_replace_table() { let sql = "CREATE TABLE t (a INT, b INT) AS SELECT 1 AS b, 2 AS a"; match verified_stmt(sql) { - Statement::CreateTable { columns, query, .. } => { + Statement::CreateTable(CreateTable { columns, query, .. }) => { assert_eq!(columns.len(), 2); assert_eq!(columns[0].to_string(), "a INT".to_string()); assert_eq!(columns[1].to_string(), "b INT".to_string()); @@ -3517,9 +3566,14 @@ fn parse_create_table_with_on_delete_on_update_2in_any_order() -> Result<(), Par #[test] fn parse_create_table_with_options() { + let generic = TestedDialects { + dialects: vec![Box::new(GenericDialect {})], + options: None, + }; + let sql = "CREATE TABLE t (c INT) WITH (foo = 'bar', a = 123)"; - match verified_stmt(sql) { - Statement::CreateTable { with_options, .. } => { + match generic.verified_stmt(sql) { + Statement::CreateTable(CreateTable { with_options, .. }) => { assert_eq!( vec![ SqlOption { @@ -3542,7 +3596,7 @@ fn parse_create_table_with_options() { fn parse_create_table_clone() { let sql = "CREATE OR REPLACE TABLE a CLONE a_tmp"; match verified_stmt(sql) { - Statement::CreateTable { name, clone, .. } => { + Statement::CreateTable(CreateTable { name, clone, .. }) => { assert_eq!(ObjectName(vec![Ident::new("a")]), name); assert_eq!(Some(ObjectName(vec![(Ident::new("a_tmp"))])), clone) } @@ -3552,8 +3606,13 @@ fn parse_create_table_clone() { #[test] fn parse_create_table_trailing_comma() { - let sql = "CREATE TABLE foo (bar int,)"; - all_dialects().one_statement_parses_to(sql, "CREATE TABLE foo (bar INT)"); + let dialect = TestedDialects { + dialects: vec![Box::new(DuckDbDialect {})], + options: None, + }; + + let sql = "CREATE TABLE foo (bar int,);"; + dialect.one_statement_parses_to(sql, "CREATE TABLE foo (bar INT)"); } #[test] @@ -3572,7 +3631,7 @@ fn parse_create_external_table() { STORED AS TEXTFILE LOCATION '/tmp/example.csv'", ); match ast { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, columns, constraints, @@ -3582,7 +3641,7 @@ fn parse_create_external_table() { file_format, location, .. - } => { + }) => { assert_eq!("uk_cities", name.to_string()); assert_eq!( columns, @@ -3643,7 +3702,7 @@ fn parse_create_or_replace_external_table() { STORED AS TEXTFILE LOCATION '/tmp/example.csv'", ); match ast { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, columns, constraints, @@ -3654,7 +3713,7 @@ fn parse_create_or_replace_external_table() { location, or_replace, .. - } => { + }) => { assert_eq!("uk_cities", name.to_string()); assert_eq!( columns, @@ -3700,7 +3759,7 @@ fn parse_create_external_table_lowercase() { lng DOUBLE) \ STORED AS PARQUET LOCATION '/tmp/example.csv'", ); - assert_matches!(ast, Statement::CreateTable { .. }); + assert_matches!(ast, Statement::CreateTable(CreateTable { .. })); } #[test] @@ -4017,7 +4076,7 @@ fn parse_alter_table_alter_column_type() { let res = dialect.parse_sql_statements(&format!("{alter_stmt} ALTER COLUMN is_active TYPE TEXT")); assert_eq!( - ParserError::ParserError("Expected SET/DROP NOT NULL, SET DEFAULT, or SET DATA TYPE after ALTER COLUMN, found: TYPE".to_string()), + ParserError::ParserError("Expected: SET/DROP NOT NULL, SET DEFAULT, or SET DATA TYPE after ALTER COLUMN, found: TYPE".to_string()), res.unwrap_err() ); @@ -4025,7 +4084,7 @@ fn parse_alter_table_alter_column_type() { "{alter_stmt} ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'" )); assert_eq!( - ParserError::ParserError("Expected end of statement, found: USING".to_string()), + ParserError::ParserError("Expected: end of statement, found: USING".to_string()), res.unwrap_err() ); } @@ -4064,7 +4123,7 @@ fn parse_alter_table_drop_constraint() { let res = parse_sql_statements(&format!("{alter_stmt} DROP CONSTRAINT is_active TEXT")); assert_eq!( - ParserError::ParserError("Expected end of statement, found: TEXT".to_string()), + ParserError::ParserError("Expected: end of statement, found: TEXT".to_string()), res.unwrap_err() ); } @@ -4073,14 +4132,14 @@ fn parse_alter_table_drop_constraint() { fn parse_bad_constraint() { let res = parse_sql_statements("ALTER TABLE tab ADD"); assert_eq!( - ParserError::ParserError("Expected identifier, found: EOF".to_string()), + ParserError::ParserError("Expected: identifier, found: EOF".to_string()), res.unwrap_err() ); let res = parse_sql_statements("CREATE TABLE tab (foo int,"); assert_eq!( ParserError::ParserError( - "Expected column name or constraint definition, found: EOF".to_string() + "Expected: column name or constraint definition, found: EOF".to_string() ), res.unwrap_err() ); @@ -4209,6 +4268,7 @@ fn parse_named_argument_function() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("FUN")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![ @@ -4247,6 +4307,7 @@ fn parse_named_argument_function_with_eq_operator() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("FUN")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![ @@ -4319,6 +4380,7 @@ fn parse_window_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("row_number")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], @@ -4333,6 +4395,7 @@ fn parse_window_functions() { expr: Expr::Identifier(Ident::new("dt")), asc: Some(false), nulls_first: None, + with_fill: None, }], window_frame: None, })), @@ -4418,11 +4481,11 @@ fn parse_window_clause() { ORDER BY C3"; verified_only_select(sql); - let sql = "SELECT from mytable WINDOW window1 AS window2"; + let sql = "SELECT * from mytable WINDOW window1 AS window2"; let dialects = all_dialects_except(|d| d.is::() || d.is::()); let res = dialects.parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected (, found: window2".to_string()), + ParserError::ParserError("Expected: (, found: window2".to_string()), res.unwrap_err() ); } @@ -4447,6 +4510,7 @@ fn test_parse_named_window() { value: "MIN".to_string(), quote_style: None, }]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( @@ -4476,6 +4540,7 @@ fn test_parse_named_window() { value: "MAX".to_string(), quote_style: None, }]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( @@ -4512,12 +4577,14 @@ fn test_parse_named_window() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -4538,6 +4605,7 @@ fn test_parse_named_window() { }), asc: None, nulls_first: None, + with_fill: None, }], window_frame: None, }), @@ -4833,13 +4901,13 @@ fn parse_interval() { let result = parse_sql_statements("SELECT INTERVAL '1' SECOND TO SECOND"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: SECOND".to_string()), + ParserError::ParserError("Expected: end of statement, found: SECOND".to_string()), result.unwrap_err(), ); let result = parse_sql_statements("SELECT INTERVAL '10' HOUR (1) TO HOUR (2)"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: (".to_string()), + ParserError::ParserError("Expected: end of statement, found: (".to_string()), result.unwrap_err(), ); @@ -4893,10 +4961,12 @@ fn parse_interval_and_or_xor() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: Some(Expr::BinaryOp { left: Box::new(Expr::BinaryOp { left: Box::new(Expr::Identifier(Ident { @@ -4946,7 +5016,7 @@ fn parse_interval_and_or_xor() { }), }), }), - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -4957,13 +5027,15 @@ fn parse_interval_and_or_xor() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, }))]; assert_eq!(actual_ast, expected_ast); @@ -4995,7 +5067,9 @@ fn parse_at_timezone() { assert_eq!( &Expr::AtTimeZone { timestamp: Box::new(call("FROM_UNIXTIME", [zero.clone()])), - time_zone: "UTC-06:00".to_string(), + time_zone: Box::new(Expr::Value(Value::SingleQuotedString( + "UTC-06:00".to_string() + ))), }, expr_from_projection(only(&select.projection)), ); @@ -5009,7 +5083,9 @@ fn parse_at_timezone() { [ Expr::AtTimeZone { timestamp: Box::new(call("FROM_UNIXTIME", [zero])), - time_zone: "UTC-06:00".to_string(), + time_zone: Box::new(Expr::Value(Value::SingleQuotedString( + "UTC-06:00".to_string() + ))), }, Expr::Value(Value::SingleQuotedString("%Y-%m-%dT%H".to_string()),) ] @@ -5176,13 +5252,13 @@ fn parse_table_function() { let res = parse_sql_statements("SELECT * FROM TABLE '1' AS a"); assert_eq!( - ParserError::ParserError("Expected (, found: \'1\'".to_string()), + ParserError::ParserError("Expected: (, found: \'1\'".to_string()), res.unwrap_err() ); let res = parse_sql_statements("SELECT * FROM TABLE (FUN(a) AS a"); assert_eq!( - ParserError::ParserError("Expected ), found: AS".to_string()), + ParserError::ParserError("Expected: ), found: AS".to_string()), res.unwrap_err() ); } @@ -5239,6 +5315,7 @@ fn parse_unnest_in_from_clause() { array_exprs: vec![Expr::Identifier(Ident::new("expr"))], with_offset: true, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5256,6 +5333,7 @@ fn parse_unnest_in_from_clause() { array_exprs: vec![Expr::Identifier(Ident::new("expr"))], with_offset: false, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5273,6 +5351,7 @@ fn parse_unnest_in_from_clause() { array_exprs: vec![Expr::Identifier(Ident::new("expr"))], with_offset: true, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5293,6 +5372,7 @@ fn parse_unnest_in_from_clause() { array_exprs: vec![Expr::Identifier(Ident::new("expr"))], with_offset: false, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5317,6 +5397,7 @@ fn parse_unnest_in_from_clause() { )], with_offset: false, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5347,6 +5428,7 @@ fn parse_unnest_in_from_clause() { ], with_offset: false, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5456,6 +5538,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }, @@ -5467,6 +5550,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }, @@ -5486,6 +5570,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![Join { relation: TableFactor::Table { @@ -5495,6 +5580,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -5507,6 +5593,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![Join { relation: TableFactor::Table { @@ -5516,6 +5603,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -5538,6 +5626,7 @@ fn parse_cross_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: JoinOperator::CrossJoin, }, @@ -5560,6 +5649,7 @@ fn parse_joins_on() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: f(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), @@ -5631,6 +5721,7 @@ fn parse_joins_using() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: f(JoinConstraint::Using(vec!["c1".into()])), } @@ -5694,6 +5785,7 @@ fn parse_natural_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: f(JoinConstraint::Natural), } @@ -5730,7 +5822,7 @@ fn parse_natural_join() { let sql = "SELECT * FROM t1 natural"; assert_eq!( - ParserError::ParserError("Expected a join type after NATURAL, found: EOF".to_string()), + ParserError::ParserError("Expected: a join type after NATURAL, found: EOF".to_string()), parse_sql_statements(sql).unwrap_err(), ); } @@ -5811,7 +5903,7 @@ fn parse_join_syntax_variants() { let res = parse_sql_statements("SELECT * FROM a OUTER JOIN b ON 1"); assert_eq!( - ParserError::ParserError("Expected APPLY, found: JOIN".to_string()), + ParserError::ParserError("Expected: APPLY, found: JOIN".to_string()), res.unwrap_err() ); } @@ -5849,7 +5941,7 @@ fn parse_ctes() { Expr::Subquery(ref subquery) => { assert_ctes_in_select(&cte_sqls, subquery.as_ref()); } - _ => panic!("Expected subquery"), + _ => panic!("Expected: subquery"), } // CTE in a derived table let sql = &format!("SELECT * FROM ({with})"); @@ -5858,13 +5950,13 @@ fn parse_ctes() { TableFactor::Derived { subquery, .. } => { assert_ctes_in_select(&cte_sqls, subquery.as_ref()) } - _ => panic!("Expected derived table"), + _ => panic!("Expected: derived table"), } // CTE in a view let sql = &format!("CREATE VIEW v AS {with}"); match verified_stmt(sql) { Statement::CreateView { query, .. } => assert_ctes_in_select(&cte_sqls, &query), - _ => panic!("Expected CREATE VIEW"), + _ => panic!("Expected: CREATE VIEW"), } // CTE in a CTE... let sql = &format!("WITH outer_cte AS ({with}) SELECT * FROM outer_cte"); @@ -5961,6 +6053,7 @@ fn parse_derived_tables() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -5991,6 +6084,12 @@ fn parse_union_except_intersect() { verified_stmt("SELECT foo FROM tab UNION SELECT bar FROM TAB"); verified_stmt("(SELECT * FROM new EXCEPT SELECT * FROM old) UNION ALL (SELECT * FROM old EXCEPT SELECT * FROM new) ORDER BY 1"); verified_stmt("(SELECT * FROM new EXCEPT DISTINCT SELECT * FROM old) UNION DISTINCT (SELECT * FROM old EXCEPT DISTINCT SELECT * FROM new) ORDER BY 1"); + verified_stmt("SELECT 1 AS x, 2 AS y EXCEPT BY NAME SELECT 9 AS y, 8 AS x"); + verified_stmt("SELECT 1 AS x, 2 AS y EXCEPT ALL BY NAME SELECT 9 AS y, 8 AS x"); + verified_stmt("SELECT 1 AS x, 2 AS y EXCEPT DISTINCT BY NAME SELECT 9 AS y, 8 AS x"); + verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT BY NAME SELECT 9 AS y, 8 AS x"); + verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT ALL BY NAME SELECT 9 AS y, 8 AS x"); + verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT DISTINCT BY NAME SELECT 9 AS y, 8 AS x"); } #[test] @@ -6019,7 +6118,7 @@ fn parse_multiple_statements() { // Check that forgetting the semicolon results in an error: let res = parse_sql_statements(&(sql1.to_owned() + " " + sql2_kw + sql2_rest)); assert_eq!( - ParserError::ParserError("Expected end of statement, found: ".to_string() + sql2_kw), + ParserError::ParserError("Expected: end of statement, found: ".to_string() + sql2_kw), res.unwrap_err() ); } @@ -6074,7 +6173,7 @@ fn parse_overlay() { "SELECT OVERLAY('abccccde' PLACING 'abc' FROM 3 FOR 12)", ); assert_eq!( - ParserError::ParserError("Expected PLACING, found: FROM".to_owned()), + ParserError::ParserError("Expected: PLACING, found: FROM".to_owned()), parse_sql_statements("SELECT OVERLAY('abccccde' FROM 3)").unwrap_err(), ); @@ -6123,7 +6222,7 @@ fn parse_trim() { ); assert_eq!( - ParserError::ParserError("Expected ), found: 'xyz'".to_owned()), + ParserError::ParserError("Expected: ), found: 'xyz'".to_owned()), parse_sql_statements("SELECT TRIM(FOO 'xyz' FROM 'xyzfooxyz')").unwrap_err() ); @@ -6145,7 +6244,7 @@ fn parse_trim() { options: None, }; assert_eq!( - ParserError::ParserError("Expected ), found: 'a'".to_owned()), + ParserError::ParserError("Expected: ), found: 'a'".to_owned()), all_expected_snowflake .parse_sql_statements("SELECT TRIM('xyz', 'a')") .unwrap_err() @@ -6182,7 +6281,7 @@ fn parse_exists_subquery() { .parse_sql_statements("SELECT EXISTS ("); assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: EOF".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: EOF".to_string() ), res.unwrap_err(), ); @@ -6191,7 +6290,7 @@ fn parse_exists_subquery() { .parse_sql_statements("SELECT EXISTS (NULL)"); assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: NULL".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: NULL".to_string() ), res.unwrap_err(), ); @@ -6247,9 +6346,11 @@ fn parse_create_view() { materialized, options, cluster_by, + comment, with_no_schema_binding: late_binding, if_not_exists, temporary, + to, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -6258,9 +6359,11 @@ fn parse_create_view() { assert!(!or_replace); assert_eq!(options, CreateTableOptions::None); assert_eq!(cluster_by, vec![]); + assert!(comment.is_none()); assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); + assert!(to.is_none()) } _ => unreachable!(), } @@ -6301,9 +6404,11 @@ fn parse_create_view_with_columns() { query, materialized, cluster_by, + comment, with_no_schema_binding: late_binding, if_not_exists, temporary, + to, } => { assert_eq!("v", name.to_string()); assert_eq!( @@ -6312,6 +6417,7 @@ fn parse_create_view_with_columns() { .into_iter() .map(|name| ViewColumnDef { name, + data_type: None, options: None }) .collect::>() @@ -6321,9 +6427,11 @@ fn parse_create_view_with_columns() { assert!(!materialized); assert!(!or_replace); assert_eq!(cluster_by, vec![]); + assert!(comment.is_none()); assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); + assert!(to.is_none()) } _ => unreachable!(), } @@ -6341,9 +6449,11 @@ fn parse_create_view_temporary() { materialized, options, cluster_by, + comment, with_no_schema_binding: late_binding, if_not_exists, temporary, + to, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -6352,9 +6462,11 @@ fn parse_create_view_temporary() { assert!(!or_replace); assert_eq!(options, CreateTableOptions::None); assert_eq!(cluster_by, vec![]); + assert!(comment.is_none()); assert!(!late_binding); assert!(!if_not_exists); assert!(temporary); + assert!(to.is_none()) } _ => unreachable!(), } @@ -6372,9 +6484,11 @@ fn parse_create_or_replace_view() { query, materialized, cluster_by, + comment, with_no_schema_binding: late_binding, if_not_exists, temporary, + to, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![]); @@ -6383,9 +6497,11 @@ fn parse_create_or_replace_view() { assert!(!materialized); assert!(or_replace); assert_eq!(cluster_by, vec![]); + assert!(comment.is_none()); assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); + assert!(to.is_none()) } _ => unreachable!(), } @@ -6407,9 +6523,11 @@ fn parse_create_or_replace_materialized_view() { query, materialized, cluster_by, + comment, with_no_schema_binding: late_binding, if_not_exists, temporary, + to, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![]); @@ -6418,9 +6536,11 @@ fn parse_create_or_replace_materialized_view() { assert!(materialized); assert!(or_replace); assert_eq!(cluster_by, vec![]); + assert!(comment.is_none()); assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); + assert!(to.is_none()) } _ => unreachable!(), } @@ -6438,9 +6558,11 @@ fn parse_create_materialized_view() { materialized, options, cluster_by, + comment, with_no_schema_binding: late_binding, if_not_exists, temporary, + to, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -6449,9 +6571,11 @@ fn parse_create_materialized_view() { assert_eq!(options, CreateTableOptions::None); assert!(!or_replace); assert_eq!(cluster_by, vec![]); + assert!(comment.is_none()); assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); + assert!(to.is_none()) } _ => unreachable!(), } @@ -6469,9 +6593,11 @@ fn parse_create_materialized_view_with_cluster_by() { materialized, options, cluster_by, + comment, with_no_schema_binding: late_binding, if_not_exists, temporary, + to, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -6480,9 +6606,11 @@ fn parse_create_materialized_view_with_cluster_by() { assert_eq!(options, CreateTableOptions::None); assert!(!or_replace); assert_eq!(cluster_by, vec![Ident::new("foo")]); + assert!(comment.is_none()); assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); + assert!(to.is_none()) } _ => unreachable!(), } @@ -6538,7 +6666,7 @@ fn parse_drop_table() { let sql = "DROP TABLE"; assert_eq!( - ParserError::ParserError("Expected identifier, found: EOF".to_string()), + ParserError::ParserError("Expected: identifier, found: EOF".to_string()), parse_sql_statements(sql).unwrap_err(), ); @@ -6570,7 +6698,7 @@ fn parse_drop_view() { fn parse_invalid_subquery_without_parens() { let res = parse_sql_statements("SELECT SELECT 1 FROM bar WHERE 1=1 FROM baz"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: 1".to_string()), + ParserError::ParserError("Expected: end of statement, found: 1".to_string()), res.unwrap_err() ); } @@ -6783,7 +6911,7 @@ fn lateral_derived() { let sql = "SELECT * FROM LATERAL UNNEST ([10,20,30]) as numbers WITH OFFSET;"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected end of statement, found: WITH".to_string()), + ParserError::ParserError("Expected: end of statement, found: WITH".to_string()), res.unwrap_err() ); @@ -6791,7 +6919,7 @@ fn lateral_derived() { let res = parse_sql_statements(sql); assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: b".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: b".to_string() ), res.unwrap_err() ); @@ -6823,6 +6951,7 @@ fn lateral_function() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![Join { relation: TableFactor::Function { @@ -6840,8 +6969,9 @@ fn lateral_function() { }], }], lateral_views: vec![], + prewhere: None, selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -6909,19 +7039,19 @@ fn parse_start_transaction() { let res = parse_sql_statements("START TRANSACTION ISOLATION LEVEL BAD"); assert_eq!( - ParserError::ParserError("Expected isolation level, found: BAD".to_string()), + ParserError::ParserError("Expected: isolation level, found: BAD".to_string()), res.unwrap_err() ); let res = parse_sql_statements("START TRANSACTION BAD"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: BAD".to_string()), + ParserError::ParserError("Expected: end of statement, found: BAD".to_string()), res.unwrap_err() ); let res = parse_sql_statements("START TRANSACTION READ ONLY,"); assert_eq!( - ParserError::ParserError("Expected transaction mode, found: EOF".to_string()), + ParserError::ParserError("Expected: transaction mode, found: EOF".to_string()), res.unwrap_err() ); } @@ -7006,9 +7136,39 @@ fn parse_set_variable() { _ => unreachable!(), } + // Subquery expression + for (sql, canonical) in [ + ( + "SET (a) = (SELECT 22 FROM tbl1)", + "SET (a) = ((SELECT 22 FROM tbl1))", + ), + ( + "SET (a) = (SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2))", + "SET (a) = ((SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2)))", + ), + ( + "SET (a) = ((SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2)))", + "SET (a) = ((SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2)))", + ), + ( + "SET (a, b) = ((SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2)), SELECT 33 FROM tbl3)", + "SET (a, b) = ((SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2)), (SELECT 33 FROM tbl3))", + ), + ] { + multi_variable_dialects.one_statement_parses_to(sql, canonical); + } + let error_sqls = [ - ("SET (a, b, c) = (1, 2, 3", "Expected ), found: EOF"), - ("SET (a, b, c) = 1, 2, 3", "Expected (, found: 1"), + ("SET (a, b, c) = (1, 2, 3", "Expected: ), found: EOF"), + ("SET (a, b, c) = 1, 2, 3", "Expected: (, found: 1"), + ( + "SET (a) = ((SELECT 22 FROM tbl1)", + "Expected: ), found: EOF", + ), + ( + "SET (a) = ((SELECT 22 FROM tbl1) (SELECT 22 FROM tbl1))", + "Expected: ), found: (", + ), ]; for (sql, error) in error_sqls { assert_eq!( @@ -7037,7 +7197,9 @@ fn parse_double_colon_cast_at_timezone() { data_type: DataType::Timestamp(None, TimezoneInfo::None), format: None }), - time_zone: "Europe/Brussels".to_string() + time_zone: Box::new(Expr::Value(Value::SingleQuotedString( + "Europe/Brussels".to_string() + ))), }, expr_from_projection(only(&select.projection)), ); @@ -7181,22 +7343,24 @@ fn parse_create_index() { expr: Expr::Identifier(Ident::new("name")), asc: None, nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("age")), asc: Some(false), nulls_first: None, + with_fill: None, }, ]; match verified_stmt(sql) { - Statement::CreateIndex { + Statement::CreateIndex(CreateIndex { name: Some(name), table_name, columns, unique, if_not_exists, .. - } => { + }) => { assert_eq!("idx_name", name.to_string()); assert_eq!("test", table_name.to_string()); assert_eq!(indexed_columns, columns); @@ -7215,15 +7379,17 @@ fn test_create_index_with_using_function() { expr: Expr::Identifier(Ident::new("name")), asc: None, nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("age")), asc: Some(false), nulls_first: None, + with_fill: None, }, ]; match verified_stmt(sql) { - Statement::CreateIndex { + Statement::CreateIndex(CreateIndex { name: Some(name), table_name, using, @@ -7234,7 +7400,7 @@ fn test_create_index_with_using_function() { include, nulls_distinct: None, predicate: None, - } => { + }) => { assert_eq!("idx_name", name.to_string()); assert_eq!("test", table_name.to_string()); assert_eq!("btree", using.unwrap().to_string()); @@ -7528,6 +7694,7 @@ fn parse_merge() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, } ); assert_eq!(table, table_no_into); @@ -7553,12 +7720,14 @@ fn parse_merge() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -7569,13 +7738,15 @@ fn parse_merge() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, }), alias: Some(TableAlias { name: Ident { @@ -7660,14 +7831,20 @@ fn parse_merge() { action: MergeAction::Update { assignments: vec![ Assignment { - id: vec![Ident::new("dest"), Ident::new("F")], + target: AssignmentTarget::ColumnName(ObjectName(vec![ + Ident::new("dest"), + Ident::new("F") + ])), value: Expr::CompoundIdentifier(vec![ Ident::new("stg"), Ident::new("F"), ]), }, Assignment { - id: vec![Ident::new("dest"), Ident::new("G")], + target: AssignmentTarget::ColumnName(ObjectName(vec![ + Ident::new("dest"), + Ident::new("G") + ])), value: Expr::CompoundIdentifier(vec![ Ident::new("stg"), Ident::new("G"), @@ -8000,19 +8177,19 @@ fn parse_offset_and_limit() { // Can't repeat OFFSET / LIMIT let res = parse_sql_statements("SELECT foo FROM bar OFFSET 2 OFFSET 2"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: OFFSET".to_string()), + ParserError::ParserError("Expected: end of statement, found: OFFSET".to_string()), res.unwrap_err() ); let res = parse_sql_statements("SELECT foo FROM bar LIMIT 2 LIMIT 2"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: LIMIT".to_string()), + ParserError::ParserError("Expected: end of statement, found: LIMIT".to_string()), res.unwrap_err() ); let res = parse_sql_statements("SELECT foo FROM bar OFFSET 2 LIMIT 2 OFFSET 2"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: OFFSET".to_string()), + ParserError::ParserError("Expected: end of statement, found: OFFSET".to_string()), res.unwrap_err() ); } @@ -8024,6 +8201,7 @@ fn parse_time_functions() { let select = verified_only_select(&sql); let select_localtime_func_call_ast = Function { name: ObjectName(vec![Ident::new(func_name)]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], @@ -8081,7 +8259,7 @@ fn parse_position_negative() { let sql = "SELECT POSITION(foo IN) from bar"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected an expression:, found: )".to_string()), + ParserError::ParserError("Expected: an expression:, found: )".to_string()), res.unwrap_err() ); } @@ -8139,7 +8317,7 @@ fn parse_is_boolean() { let res = parse_sql_statements(sql); assert_eq!( ParserError::ParserError( - "Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: 0" + "Expected: [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: 0" .to_string() ), res.unwrap_err() @@ -8332,7 +8510,7 @@ fn parse_cache_table() { let res = parse_sql_statements("CACHE TABLE 'table_name' foo"); assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: foo".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: foo".to_string() ), res.unwrap_err() ); @@ -8340,7 +8518,7 @@ fn parse_cache_table() { let res = parse_sql_statements("CACHE flag TABLE 'table_name' OPTIONS('K1'='V1') foo"); assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: foo".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: foo".to_string() ), res.unwrap_err() ); @@ -8348,7 +8526,7 @@ fn parse_cache_table() { let res = parse_sql_statements("CACHE TABLE 'table_name' AS foo"); assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: foo".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: foo".to_string() ), res.unwrap_err() ); @@ -8356,26 +8534,26 @@ fn parse_cache_table() { let res = parse_sql_statements("CACHE flag TABLE 'table_name' OPTIONS('K1'='V1') AS foo"); assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: foo".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: foo".to_string() ), res.unwrap_err() ); let res = parse_sql_statements("CACHE 'table_name'"); assert_eq!( - ParserError::ParserError("Expected a `TABLE` keyword, found: 'table_name'".to_string()), + ParserError::ParserError("Expected: a `TABLE` keyword, found: 'table_name'".to_string()), res.unwrap_err() ); let res = parse_sql_statements("CACHE 'table_name' OPTIONS('K1'='V1')"); assert_eq!( - ParserError::ParserError("Expected a `TABLE` keyword, found: OPTIONS".to_string()), + ParserError::ParserError("Expected: a `TABLE` keyword, found: OPTIONS".to_string()), res.unwrap_err() ); let res = parse_sql_statements("CACHE flag 'table_name' OPTIONS('K1'='V1')"); assert_eq!( - ParserError::ParserError("Expected a `TABLE` keyword, found: 'table_name'".to_string()), + ParserError::ParserError("Expected: a `TABLE` keyword, found: 'table_name'".to_string()), res.unwrap_err() ); } @@ -8400,19 +8578,19 @@ fn parse_uncache_table() { let res = parse_sql_statements("UNCACHE TABLE 'table_name' foo"); assert_eq!( - ParserError::ParserError("Expected an `EOF`, found: foo".to_string()), + ParserError::ParserError("Expected: end of statement, found: foo".to_string()), res.unwrap_err() ); let res = parse_sql_statements("UNCACHE 'table_name' foo"); assert_eq!( - ParserError::ParserError("Expected a `TABLE` keyword, found: 'table_name'".to_string()), + ParserError::ParserError("Expected: TABLE, found: 'table_name'".to_string()), res.unwrap_err() ); let res = parse_sql_statements("UNCACHE IF EXISTS 'table_name' foo"); assert_eq!( - ParserError::ParserError("Expected a `TABLE` keyword, found: IF".to_string()), + ParserError::ParserError("Expected: TABLE, found: IF".to_string()), res.unwrap_err() ); } @@ -8605,6 +8783,7 @@ fn parse_pivot_table() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }), aggregate_functions: vec![ expected_function("a", None), @@ -8612,7 +8791,7 @@ fn parse_pivot_table() { expected_function("c", Some("u")), ], value_column: vec![Ident::new("a"), Ident::new("MONTH")], - pivot_values: vec![ + value_source: PivotValueSource::List(vec![ ExprWithAlias { expr: Expr::Value(number("1")), alias: Some(Ident::new("x")) @@ -8625,7 +8804,8 @@ fn parse_pivot_table() { expr: Expr::Identifier(Ident::new("three")), alias: Some(Ident::new("y")) }, - ], + ]), + default_on_null: None, alias: Some(TableAlias { name: Ident { value: "p".to_string(), @@ -8673,6 +8853,7 @@ fn parse_unpivot_table() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }), value: Ident { value: "quantity".to_string(), @@ -8739,6 +8920,7 @@ fn parse_pivot_unpivot_table() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }), value: Ident { value: "population".to_string(), @@ -8763,7 +8945,7 @@ fn parse_pivot_unpivot_table() { alias: None }], value_column: vec![Ident::new("year")], - pivot_values: vec![ + value_source: PivotValueSource::List(vec![ ExprWithAlias { expr: Expr::Value(Value::SingleQuotedString("population_2000".to_string())), alias: None @@ -8772,7 +8954,8 @@ fn parse_pivot_unpivot_table() { expr: Expr::Value(Value::SingleQuotedString("population_2010".to_string())), alias: None }, - ], + ]), + default_on_null: None, alias: Some(TableAlias { name: Ident::new("p"), columns: vec![] @@ -8823,9 +9006,11 @@ fn parse_non_latin_identifiers() { #[test] fn parse_trailing_comma() { + // At the moment, DuckDB is the only dialect that allows + // trailing commas anywhere in the query let trailing_commas = TestedDialects { - dialects: vec![Box::new(GenericDialect {})], - options: Some(ParserOptions::new().with_trailing_commas(true)), + dialects: vec![Box::new(DuckDbDialect {})], + options: None, }; trailing_commas.one_statement_parses_to( @@ -8843,11 +9028,91 @@ fn parse_trailing_comma() { "SELECT DISTINCT ON (album_id) name FROM track", ); + trailing_commas.one_statement_parses_to( + "CREATE TABLE employees (name text, age int,)", + "CREATE TABLE employees (name TEXT, age INT)", + ); + + trailing_commas.one_statement_parses_to( + "GRANT USAGE, SELECT, INSERT, ON p TO u", + "GRANT USAGE, SELECT, INSERT ON p TO u", + ); + + trailing_commas.verified_stmt("SELECT album_id, name FROM track"); + trailing_commas.verified_stmt("SELECT * FROM track ORDER BY milliseconds"); + trailing_commas.verified_stmt("SELECT DISTINCT ON (album_id) name FROM track"); + + // check quoted "from" identifier edge-case + trailing_commas.one_statement_parses_to( + r#"SELECT "from", FROM "from""#, + r#"SELECT "from" FROM "from""#, + ); + trailing_commas.verified_stmt(r#"SELECT "from" FROM "from""#); + + // doesn't allow any trailing commas + let trailing_commas = TestedDialects { + dialects: vec![Box::new(GenericDialect {})], + options: None, + }; + + assert_eq!( + trailing_commas + .parse_sql_statements("SELECT name, age, from employees;") + .unwrap_err(), + ParserError::ParserError("Expected an expression, found: from".to_string()) + ); + + assert_eq!( + trailing_commas + .parse_sql_statements("REVOKE USAGE, SELECT, ON p TO u") + .unwrap_err(), + ParserError::ParserError("Expected: a privilege keyword, found: ON".to_string()) + ); + + assert_eq!( + trailing_commas + .parse_sql_statements("CREATE TABLE employees (name text, age int,)") + .unwrap_err(), + ParserError::ParserError( + "Expected: column name or constraint definition, found: )".to_string() + ) + ); +} + +#[test] +fn parse_projection_trailing_comma() { + // Some dialects allow trailing commas only in the projection + let trailing_commas = TestedDialects { + dialects: vec![Box::new(SnowflakeDialect {}), Box::new(BigQueryDialect {})], + options: None, + }; + + trailing_commas.one_statement_parses_to( + "SELECT album_id, name, FROM track", + "SELECT album_id, name FROM track", + ); + trailing_commas.verified_stmt("SELECT album_id, name FROM track"); trailing_commas.verified_stmt("SELECT * FROM track ORDER BY milliseconds"); trailing_commas.verified_stmt("SELECT DISTINCT ON (album_id) name FROM track"); + + assert_eq!( + trailing_commas + .parse_sql_statements("SELECT * FROM track ORDER BY milliseconds,") + .unwrap_err(), + ParserError::ParserError("Expected: an expression:, found: EOF".to_string()) + ); + + assert_eq!( + trailing_commas + .parse_sql_statements("CREATE TABLE employees (name text, age int,)") + .unwrap_err(), + ParserError::ParserError( + "Expected: column name or constraint definition, found: )".to_string() + ), + ); } #[test] @@ -8885,6 +9150,7 @@ fn parse_call() { assert_eq!( verified_stmt("CALL my_procedure('a')"), Statement::Call(Function { + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( @@ -8984,12 +9250,14 @@ fn parse_unload() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -9007,7 +9275,9 @@ fn parse_unload() { fetch: None, locks: vec![], for_clause: None, - order_by: vec![], + order_by: None, + settings: None, + format_clause: None, }), to: Ident { value: "s3://...".to_string(), @@ -9126,13 +9396,15 @@ fn parse_connect_by() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], into: None, lateral_views: vec![], + prewhere: None, selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -9210,17 +9482,19 @@ fn parse_connect_by() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], into: None, lateral_views: vec![], + prewhere: None, selection: Some(Expr::BinaryOp { left: Box::new(Expr::Identifier(Ident::new("employee_id"))), op: BinaryOperator::NotEq, right: Box::new(Expr::Value(number("42"))), }), - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -9286,6 +9560,7 @@ fn test_selective_aggregation() { vec![ SelectItem::UnnamedExpr(Expr::Function(Function { name: ObjectName(vec![Ident::new("ARRAY_AGG")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( @@ -9303,6 +9578,7 @@ fn test_selective_aggregation() { SelectItem::ExprWithAlias { expr: Expr::Function(Function { name: ObjectName(vec![Ident::new("ARRAY_AGG")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( @@ -9338,15 +9614,18 @@ fn test_group_by_grouping_sets() { all_dialects_where(|d| d.supports_group_by_expr()) .verified_only_select(sql) .group_by, - GroupByExpr::Expressions(vec![Expr::GroupingSets(vec![ - vec![ - Expr::Identifier(Ident::new("city")), - Expr::Identifier(Ident::new("car_model")) - ], - vec![Expr::Identifier(Ident::new("city")),], - vec![Expr::Identifier(Ident::new("car_model"))], + GroupByExpr::Expressions( + vec![Expr::GroupingSets(vec![ + vec![ + Expr::Identifier(Ident::new("city")), + Expr::Identifier(Ident::new("car_model")) + ], + vec![Expr::Identifier(Ident::new("city")),], + vec![Expr::Identifier(Ident::new("car_model"))], + vec![] + ])], vec![] - ])]) + ) ); } @@ -9363,6 +9642,7 @@ fn test_match_recognize() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }; fn check(options: &str, expect: TableFactor) { @@ -9394,6 +9674,7 @@ fn test_match_recognize() { expr: Expr::Identifier(Ident::new("price_date")), asc: None, nulls_first: None, + with_fill: None, }], measures: vec![ Measure { @@ -9797,6 +10078,101 @@ fn test_dictionary_syntax() { ) } +#[test] +fn test_map_syntax() { + fn check(sql: &str, expect: Expr) { + assert_eq!( + all_dialects_where(|d| d.support_map_literal_syntax()).verified_expr(sql), + expect + ); + } + + check( + "MAP {'Alberta': 'Edmonton', 'Manitoba': 'Winnipeg'}", + Expr::Map(Map { + entries: vec![ + MapEntry { + key: Box::new(Expr::Value(Value::SingleQuotedString("Alberta".to_owned()))), + value: Box::new(Expr::Value(Value::SingleQuotedString( + "Edmonton".to_owned(), + ))), + }, + MapEntry { + key: Box::new(Expr::Value(Value::SingleQuotedString( + "Manitoba".to_owned(), + ))), + value: Box::new(Expr::Value(Value::SingleQuotedString( + "Winnipeg".to_owned(), + ))), + }, + ], + }), + ); + + fn number_expr(s: &str) -> Expr { + Expr::Value(number(s)) + } + + check( + "MAP {1: 10.0, 2: 20.0}", + Expr::Map(Map { + entries: vec![ + MapEntry { + key: Box::new(number_expr("1")), + value: Box::new(number_expr("10.0")), + }, + MapEntry { + key: Box::new(number_expr("2")), + value: Box::new(number_expr("20.0")), + }, + ], + }), + ); + + check( + "MAP {[1, 2, 3]: 10.0, [4, 5, 6]: 20.0}", + Expr::Map(Map { + entries: vec![ + MapEntry { + key: Box::new(Expr::Array(Array { + elem: vec![number_expr("1"), number_expr("2"), number_expr("3")], + named: false, + })), + value: Box::new(Expr::Value(number("10.0"))), + }, + MapEntry { + key: Box::new(Expr::Array(Array { + elem: vec![number_expr("4"), number_expr("5"), number_expr("6")], + named: false, + })), + value: Box::new(Expr::Value(number("20.0"))), + }, + ], + }), + ); + + check( + "MAP {'a': 10, 'b': 20}['a']", + Expr::Subscript { + expr: Box::new(Expr::Map(Map { + entries: vec![ + MapEntry { + key: Box::new(Expr::Value(Value::SingleQuotedString("a".to_owned()))), + value: Box::new(number_expr("10")), + }, + MapEntry { + key: Box::new(Expr::Value(Value::SingleQuotedString("b".to_owned()))), + value: Box::new(number_expr("20")), + }, + ], + })), + subscript: Box::new(Subscript::Index { + index: Expr::Value(Value::SingleQuotedString("a".to_owned())), + }), + }, + ); +} + #[test] fn parse_within_group() { verified_expr("PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sales_amount)"); @@ -9844,14 +10220,14 @@ fn tests_select_values_without_parens_and_set_op() { assert_eq!(SetOperator::Union, op); match *left { SetExpr::Select(_) => {} - _ => panic!("Expected a SELECT statement"), + _ => panic!("Expected: a SELECT statement"), } match *right { SetExpr::Select(_) => {} - _ => panic!("Expected a SELECT statement"), + _ => panic!("Expected: a SELECT statement"), } } - _ => panic!("Expected a SET OPERATION"), + _ => panic!("Expected: a SET OPERATION"), } } @@ -9885,6 +10261,48 @@ fn parse_select_wildcard_with_except() { .parse_sql_statements("SELECT * EXCEPT () FROM employee_table") .unwrap_err() .to_string(), - "sql parser error: Expected identifier, found: )" + "sql parser error: Expected: identifier, found: )" ); } + +#[test] +fn parse_auto_increment_too_large() { + let dialect = GenericDialect {}; + let u64_max = u64::MAX; + let sql = + format!("CREATE TABLE foo (bar INT NOT NULL AUTO_INCREMENT) AUTO_INCREMENT=1{u64_max}"); + + let res = Parser::new(&dialect) + .try_with_sql(&sql) + .expect("tokenize to work") + .parse_statements(); + + assert!(res.is_err(), "{res:?}"); +} + +#[test] +fn test_group_by_nothing() { + let Select { group_by, .. } = all_dialects_where(|d| d.supports_group_by_expr()) + .verified_only_select("SELECT count(1) FROM t GROUP BY ()"); + { + std::assert_eq!( + GroupByExpr::Expressions(vec![Expr::Tuple(vec![])], vec![]), + group_by + ); + } + + let Select { group_by, .. } = all_dialects_where(|d| d.supports_group_by_expr()) + .verified_only_select("SELECT name, count(1) FROM t GROUP BY name, ()"); + { + std::assert_eq!( + GroupByExpr::Expressions( + vec![ + Identifier(Ident::new("name".to_string())), + Expr::Tuple(vec![]) + ], + vec![] + ), + group_by + ); + } +} diff --git a/tests/sqlparser_custom_dialect.rs b/tests/sqlparser_custom_dialect.rs index 516591382..5b29047a4 100644 --- a/tests/sqlparser_custom_dialect.rs +++ b/tests/sqlparser_custom_dialect.rs @@ -125,6 +125,28 @@ fn custom_statement_parser() -> Result<(), ParserError> { Ok(()) } +#[test] +fn test_map_syntax_not_support_default() -> Result<(), ParserError> { + #[derive(Debug)] + struct MyDialect {} + + impl Dialect for MyDialect { + fn is_identifier_start(&self, ch: char) -> bool { + is_identifier_start(ch) + } + + fn is_identifier_part(&self, ch: char) -> bool { + is_identifier_part(ch) + } + } + + let dialect = MyDialect {}; + let sql = "SELECT MAP {1: 2}"; + let ast = Parser::parse_sql(&dialect, sql); + assert!(ast.is_err()); + Ok(()) +} + fn is_identifier_start(ch: char) -> bool { ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_' } diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 8f0579fc9..280b97b49 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -1,5 +1,5 @@ use sqlparser::ast::*; -use sqlparser::dialect::DatabricksDialect; +use sqlparser::dialect::{DatabricksDialect, GenericDialect}; use sqlparser::parser::ParserError; use test_utils::*; @@ -13,6 +13,13 @@ fn databricks() -> TestedDialects { } } +fn databricks_and_generic() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(DatabricksDialect {}), Box::new(GenericDialect {})], + options: None, + } +} + #[test] fn test_databricks_identifiers() { // databricks uses backtick for delimited identifiers @@ -57,7 +64,7 @@ fn test_databricks_exists() { let res = databricks().parse_sql_statements("SELECT EXISTS ("); assert_eq!( // TODO: improve this error message... - ParserError::ParserError("Expected an expression:, found: EOF".to_string()), + ParserError::ParserError("Expected: an expression:, found: EOF".to_string()), res.unwrap_err(), ); } @@ -124,3 +131,61 @@ fn test_databricks_lambdas() { ); databricks().verified_expr("transform(array(1, 2, 3), x -> x + 1)"); } + +#[test] +fn test_values_clause() { + let values = Values { + explicit_row: false, + rows: vec![ + vec![ + Expr::Value(Value::DoubleQuotedString("one".to_owned())), + Expr::Value(number("1")), + ], + vec![ + Expr::Value(Value::SingleQuotedString("two".to_owned())), + Expr::Value(number("2")), + ], + ], + }; + + let query = databricks().verified_query(r#"VALUES ("one", 1), ('two', 2)"#); + assert_eq!(SetExpr::Values(values.clone()), *query.body); + + // VALUES is permitted in a FROM clause without a subquery + let query = databricks().verified_query_with_canonical( + r#"SELECT * FROM VALUES ("one", 1), ('two', 2)"#, + r#"SELECT * FROM (VALUES ("one", 1), ('two', 2))"#, + ); + let Some(TableFactor::Derived { subquery, .. }) = query + .body + .as_select() + .map(|select| &select.from[0].relation) + else { + panic!("expected subquery"); + }; + assert_eq!(SetExpr::Values(values), *subquery.body); + + // values is also a valid table name + let query = databricks_and_generic().verified_query(concat!( + "WITH values AS (SELECT 42) ", + "SELECT * FROM values", + )); + assert_eq!( + Some(&TableFactor::Table { + name: ObjectName(vec![Ident::new("values")]), + alias: None, + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + with_ordinality: false, + }), + query + .body + .as_select() + .map(|select| &select.from[0].relation) + ); + + // TODO: support this example from https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-values.html#examples + // databricks().verified_query("VALUES 1, 2, 3"); +} diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index a84da5378..0e61b86c9 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -166,12 +166,14 @@ fn test_select_union_by_name() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -204,12 +206,14 @@ fn test_select_union_by_name() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -488,6 +492,7 @@ fn test_duckdb_named_argument_function_with_assignment_operator() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("FUN")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![ @@ -528,8 +533,8 @@ fn test_array_index() { _ => panic!("Expected an expression with alias"), }; assert_eq!( - &Expr::ArrayIndex { - obj: Box::new(Expr::Array(Array { + &Expr::Subscript { + expr: Box::new(Expr::Array(Array { elem: vec![ Expr::Value(Value::SingleQuotedString("a".to_owned())), Expr::Value(Value::SingleQuotedString("b".to_owned())), @@ -537,8 +542,105 @@ fn test_array_index() { ], named: false })), - indexes: vec![Expr::Value(number("3"))] + subscript: Box::new(Subscript::Index { + index: Expr::Value(number("3")) + }) }, expr ); } + +#[test] +fn test_duckdb_union_datatype() { + let sql = "CREATE TABLE tbl1 (one UNION(a INT), two UNION(a INT, b INT), nested UNION(a UNION(b INT)))"; + let stmt = duckdb_and_generic().verified_stmt(sql); + assert_eq!( + Statement::CreateTable(CreateTable { + or_replace: Default::default(), + temporary: Default::default(), + external: Default::default(), + global: Default::default(), + if_not_exists: Default::default(), + transient: Default::default(), + volatile: Default::default(), + name: ObjectName(vec!["tbl1".into()]), + columns: vec![ + ColumnDef { + name: "one".into(), + data_type: DataType::Union(vec![UnionField { + field_name: "a".into(), + field_type: DataType::Int(None) + }]), + collation: Default::default(), + options: Default::default() + }, + ColumnDef { + name: "two".into(), + data_type: DataType::Union(vec![ + UnionField { + field_name: "a".into(), + field_type: DataType::Int(None) + }, + UnionField { + field_name: "b".into(), + field_type: DataType::Int(None) + } + ]), + collation: Default::default(), + options: Default::default() + }, + ColumnDef { + name: "nested".into(), + data_type: DataType::Union(vec![UnionField { + field_name: "a".into(), + field_type: DataType::Union(vec![UnionField { + field_name: "b".into(), + field_type: DataType::Int(None) + }]) + }]), + collation: Default::default(), + options: Default::default() + } + ], + constraints: Default::default(), + hive_distribution: HiveDistributionStyle::NONE, + hive_formats: Some(HiveFormat { + row_format: Default::default(), + serde_properties: Default::default(), + storage: Default::default(), + location: Default::default() + }), + table_properties: Default::default(), + with_options: Default::default(), + file_format: Default::default(), + location: Default::default(), + query: Default::default(), + without_rowid: Default::default(), + like: Default::default(), + clone: Default::default(), + engine: Default::default(), + comment: Default::default(), + auto_increment_offset: Default::default(), + default_charset: Default::default(), + collation: Default::default(), + on_commit: Default::default(), + on_cluster: Default::default(), + primary_key: Default::default(), + order_by: Default::default(), + partition_by: Default::default(), + cluster_by: Default::default(), + options: Default::default(), + strict: Default::default(), + copy_grants: Default::default(), + enable_schema_evolution: Default::default(), + change_tracking: Default::default(), + data_retention_time_in_days: Default::default(), + max_data_extension_time_in_days: Default::default(), + default_ddl_collation: Default::default(), + with_aggregation_policy: Default::default(), + with_row_access_policy: Default::default(), + with_tags: Default::default() + }), + stmt + ); +} diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 78db48ec2..5f0b9f575 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -17,8 +17,8 @@ use sqlparser::ast::{ CreateFunctionBody, CreateFunctionUsing, Expr, Function, FunctionArgumentList, - FunctionArguments, FunctionDefinition, Ident, ObjectName, OneOrManyWithParens, SelectItem, - Statement, TableFactor, UnaryOperator, + FunctionArguments, Ident, ObjectName, OneOrManyWithParens, SelectItem, Statement, TableFactor, + UnaryOperator, Value, }; use sqlparser::dialect::{GenericDialect, HiveDialect, MsSqlDialect}; use sqlparser::parser::{ParserError, ParserOptions}; @@ -284,7 +284,7 @@ fn set_statement_with_minus() { assert_eq!( hive().parse_sql_statements("SET hive.tez.java.opts = -"), Err(ParserError::ParserError( - "Expected variable value, found: EOF".to_string() + "Expected: variable value, found: EOF".to_string() )) ) } @@ -296,22 +296,23 @@ fn parse_create_function() { Statement::CreateFunction { temporary, name, - params, + function_body, + using, .. } => { assert!(temporary); assert_eq!(name.to_string(), "mydb.myfunc"); assert_eq!( - params, - CreateFunctionBody { - as_: Some(FunctionDefinition::SingleQuotedDef( - "org.random.class.Name".to_string() - )), - using: Some(CreateFunctionUsing::Jar( - "hdfs://somewhere.com:8020/very/far".to_string() - )), - ..Default::default() - } + function_body, + Some(CreateFunctionBody::AsBeforeOptions(Expr::Value( + Value::SingleQuotedString("org.random.class.Name".to_string()) + ))) + ); + assert_eq!( + using, + Some(CreateFunctionUsing::Jar( + "hdfs://somewhere.com:8020/very/far".to_string() + )), ) } _ => unreachable!(), @@ -326,14 +327,14 @@ fn parse_create_function() { assert_eq!( unsupported_dialects.parse_sql_statements(sql).unwrap_err(), ParserError::ParserError( - "Expected an object type after CREATE, found: FUNCTION".to_string() + "Expected: an object type after CREATE, found: FUNCTION".to_string() ) ); let sql = "CREATE TEMPORARY FUNCTION mydb.myfunc AS 'org.random.class.Name' USING JAR"; assert_eq!( hive().parse_sql_statements(sql).unwrap_err(), - ParserError::ParserError("Expected literal string, found: EOF".to_string()), + ParserError::ParserError("Expected: literal string, found: EOF".to_string()), ); } @@ -358,6 +359,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); @@ -380,6 +382,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], @@ -397,7 +400,7 @@ fn parse_delimited_identifiers() { assert_eq!(&Expr::Identifier(Ident::with_quote('"', "simple id")), expr); assert_eq!(&Ident::with_quote('"', "column alias"), alias); } - _ => panic!("Expected ExprWithAlias"), + _ => panic!("Expected: ExprWithAlias"), } hive().verified_stmt(r#"CREATE TABLE "foo" ("bar" "int")"#); diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 5d61c6ab9..3e8b6afbf 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -64,6 +64,7 @@ fn parse_table_time_travel() { Value::SingleQuotedString(version) ))), partitions: vec![], + with_ordinality: false, }, joins: vec![] },] @@ -102,7 +103,9 @@ fn parse_create_procedure() { fetch: None, locks: vec![], for_clause: None, - order_by: vec![], + order_by: None, + settings: None, + format_clause: None, body: Box::new(SetExpr::Select(Box::new(Select { distinct: None, top: None, @@ -110,8 +113,9 @@ fn parse_create_procedure() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -332,6 +336,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); @@ -354,6 +359,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], @@ -437,6 +443,12 @@ fn parse_for_json_expect_ast() { ); } +#[test] +fn parse_ampersand_arobase() { + // In SQL Server, a&@b means (a) & (@b), in PostgreSQL it means (a) &@ (b) + ms().expr_parses_to("a&@b", "a & @b"); +} + #[test] fn parse_cast_varchar_max() { ms_and_generic().verified_expr("CAST('foo' AS VARCHAR(MAX))"); @@ -475,7 +487,7 @@ fn parse_convert() { let error_sql = "SELECT CONVERT(INT, 'foo',) FROM T"; assert_eq!( - ParserError::ParserError("Expected an expression:, found: )".to_owned()), + ParserError::ParserError("Expected: an expression:, found: )".to_owned()), ms().parse_sql_statements(error_sql).unwrap_err() ); } @@ -516,12 +528,14 @@ fn parse_substring_in_select() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![] }], lateral_views: vec![], + prewhere: None, selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -532,13 +546,15 @@ fn parse_substring_in_select() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, }), query ); diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 1e185915b..b0b29f347 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -470,7 +470,7 @@ fn parse_set_variables() { fn parse_create_table_auto_increment() { let sql = "CREATE TABLE foo (bar INT PRIMARY KEY AUTO_INCREMENT)"; match mysql().verified_stmt(sql) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name.to_string(), "foo"); assert_eq!( vec![ColumnDef { @@ -541,12 +541,12 @@ fn parse_create_table_primary_and_unique_key() { for (sql, index_type_display) in sqls.iter().zip(index_type_display) { match mysql().one_statement_parses_to(sql, "") { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, columns, constraints, .. - } => { + }) => { assert_eq!(name.to_string(), "foo"); let expected_constraint = table_constraint_unique_primary_ctor( @@ -609,9 +609,9 @@ fn parse_create_table_primary_and_unique_key_with_index_options() { for (sql, index_type_display) in sqls.iter().zip(index_type_display) { match mysql_and_generic().one_statement_parses_to(sql, "") { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, constraints, .. - } => { + }) => { assert_eq!(name.to_string(), "foo"); let expected_constraint = table_constraint_unique_primary_ctor( @@ -647,9 +647,9 @@ fn parse_create_table_primary_and_unique_key_with_index_type() { for (sql, index_type_display) in sqls.iter().zip(index_type_display) { match mysql_and_generic().one_statement_parses_to(sql, "") { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, constraints, .. - } => { + }) => { assert_eq!(name.to_string(), "foo"); let expected_constraint = table_constraint_unique_primary_ctor( @@ -690,7 +690,7 @@ fn parse_create_table_comment() { for sql in [canonical, with_equal] { match mysql().one_statement_parses_to(sql, canonical) { - Statement::CreateTable { name, comment, .. } => { + Statement::CreateTable(CreateTable { name, comment, .. }) => { assert_eq!(name.to_string(), "foo"); assert_eq!(comment.expect("Should exist").to_string(), "baz"); } @@ -708,11 +708,11 @@ fn parse_create_table_auto_increment_offset() { for sql in [canonical, with_equal] { match mysql().one_statement_parses_to(sql, canonical) { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, auto_increment_offset, .. - } => { + }) => { assert_eq!(name.to_string(), "foo"); assert_eq!( auto_increment_offset.expect("Should exist").to_string(), @@ -728,7 +728,7 @@ fn parse_create_table_auto_increment_offset() { fn parse_create_table_set_enum() { let sql = "CREATE TABLE foo (bar SET('a', 'b'), baz ENUM('a', 'b'))"; match mysql().verified_stmt(sql) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name.to_string(), "foo"); assert_eq!( vec![ @@ -756,13 +756,13 @@ fn parse_create_table_set_enum() { fn parse_create_table_engine_default_charset() { let sql = "CREATE TABLE foo (id INT(11)) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3"; match mysql().verified_stmt(sql) { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, columns, engine, default_charset, .. - } => { + }) => { assert_eq!(name.to_string(), "foo"); assert_eq!( vec![ColumnDef { @@ -773,7 +773,13 @@ fn parse_create_table_engine_default_charset() { },], columns ); - assert_eq!(engine, Some("InnoDB".to_string())); + assert_eq!( + engine, + Some(TableEngine { + name: "InnoDB".to_string(), + parameters: None + }) + ); assert_eq!(default_charset, Some("utf8mb3".to_string())); } _ => unreachable!(), @@ -784,12 +790,12 @@ fn parse_create_table_engine_default_charset() { fn parse_create_table_collate() { let sql = "CREATE TABLE foo (id INT(11)) COLLATE=utf8mb4_0900_ai_ci"; match mysql().verified_stmt(sql) { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, columns, collation, .. - } => { + }) => { assert_eq!(name.to_string(), "foo"); assert_eq!( vec![ColumnDef { @@ -806,11 +812,38 @@ fn parse_create_table_collate() { } } +#[test] +fn parse_create_table_both_options_and_as_query() { + let sql = "CREATE TABLE foo (id INT(11)) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb4_0900_ai_ci AS SELECT 1"; + match mysql_and_generic().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + collation, + query, + .. + }) => { + assert_eq!(name.to_string(), "foo"); + assert_eq!(collation, Some("utf8mb4_0900_ai_ci".to_string())); + assert_eq!( + query.unwrap().body.as_select().unwrap().projection, + vec![SelectItem::UnnamedExpr(Expr::Value(number("1")))] + ); + } + _ => unreachable!(), + } + + let sql = r"CREATE TABLE foo (id INT(11)) ENGINE=InnoDB AS SELECT 1 DEFAULT CHARSET=utf8mb3"; + assert!(matches!( + mysql_and_generic().parse_sql_statements(sql), + Err(ParserError::ParserError(_)) + )); +} + #[test] fn parse_create_table_comment_character_set() { let sql = "CREATE TABLE foo (s TEXT CHARACTER SET utf8mb4 COMMENT 'comment')"; match mysql().verified_stmt(sql) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name.to_string(), "foo"); assert_eq!( vec![ColumnDef { @@ -857,7 +890,7 @@ fn parse_create_table_gencol() { fn parse_quote_identifiers() { let sql = "CREATE TABLE `PRIMARY` (`BEGIN` INT PRIMARY KEY)"; match mysql().verified_stmt(sql) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name.to_string(), "`PRIMARY`"); assert_eq!( vec![ColumnDef { @@ -900,8 +933,9 @@ fn parse_escaped_quote_identifiers_with_escape() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -912,13 +946,15 @@ fn parse_escaped_quote_identifiers_with_escape() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, })) ); } @@ -947,8 +983,9 @@ fn parse_escaped_quote_identifiers_with_no_escape() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -959,13 +996,15 @@ fn parse_escaped_quote_identifiers_with_no_escape() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, })) ); } @@ -991,8 +1030,9 @@ fn parse_escaped_backticks_with_escape() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -1003,13 +1043,15 @@ fn parse_escaped_backticks_with_escape() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, })) ); } @@ -1035,8 +1077,9 @@ fn parse_escaped_backticks_with_no_escape() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -1047,13 +1090,15 @@ fn parse_escaped_backticks_with_no_escape() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, })) ); } @@ -1126,7 +1171,7 @@ fn check_roundtrip_of_escaped_string() { fn parse_create_table_with_minimum_display_width() { let sql = "CREATE TABLE foo (bar_tinyint TINYINT(3), bar_smallint SMALLINT(5), bar_mediumint MEDIUMINT(6), bar_int INT(11), bar_bigint BIGINT(20))"; match mysql().verified_stmt(sql) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name.to_string(), "foo"); assert_eq!( vec![ @@ -1172,7 +1217,7 @@ fn parse_create_table_with_minimum_display_width() { fn parse_create_table_unsigned() { let sql = "CREATE TABLE foo (bar_tinyint TINYINT(3) UNSIGNED, bar_smallint SMALLINT(5) UNSIGNED, bar_mediumint MEDIUMINT(13) UNSIGNED, bar_int INT(11) UNSIGNED, bar_bigint BIGINT(20) UNSIGNED)"; match mysql().verified_stmt(sql) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name.to_string(), "foo"); assert_eq!( vec![ @@ -1251,13 +1296,15 @@ fn parse_simple_insert() { ] ] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, })), source ); @@ -1293,13 +1340,15 @@ fn parse_ignore_insert() { Expr::Value(number("1")) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, })), source ); @@ -1335,13 +1384,15 @@ fn parse_priority_insert() { Expr::Value(number("1")) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, })), source ); @@ -1374,13 +1425,15 @@ fn parse_priority_insert() { Expr::Value(number("1")) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, })), source ); @@ -1421,13 +1474,15 @@ fn parse_insert_as() { "2024-01-01".to_string() ))]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, })), source ); @@ -1480,13 +1535,15 @@ fn parse_insert_as() { Expr::Value(Value::SingleQuotedString("2024-01-01".to_string())) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, })), source ); @@ -1523,13 +1580,15 @@ fn parse_replace_insert() { Expr::Value(number("1")) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, })), source ); @@ -1560,13 +1619,15 @@ fn parse_empty_row_insert() { explicit_row: false, rows: vec![vec![], vec![]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, })), source ); @@ -1620,36 +1681,48 @@ fn parse_insert_with_on_duplicate_update() { Expr::Value(Value::Boolean(true)), ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, })), source ); assert_eq!( Some(OnInsert::DuplicateKeyUpdate(vec![ Assignment { - id: vec![Ident::new("description".to_string())], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new( + "description".to_string() + )])), value: call("VALUES", [Expr::Identifier(Ident::new("description"))]), }, Assignment { - id: vec![Ident::new("perm_create".to_string())], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new( + "perm_create".to_string() + )])), value: call("VALUES", [Expr::Identifier(Ident::new("perm_create"))]), }, Assignment { - id: vec![Ident::new("perm_read".to_string())], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new( + "perm_read".to_string() + )])), value: call("VALUES", [Expr::Identifier(Ident::new("perm_read"))]), }, Assignment { - id: vec![Ident::new("perm_update".to_string())], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new( + "perm_update".to_string() + )])), value: call("VALUES", [Expr::Identifier(Ident::new("perm_update"))]), }, Assignment { - id: vec![Ident::new("perm_delete".to_string())], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new( + "perm_delete".to_string() + )])), value: call("VALUES", [Expr::Identifier(Ident::new("perm_delete"))]), }, ])), @@ -1682,12 +1755,14 @@ fn parse_select_with_numeric_prefix_column_name() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![] }], lateral_views: vec![], + prewhere: None, selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -1735,12 +1810,14 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![] }], lateral_views: vec![], + prewhere: None, selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -1799,6 +1876,7 @@ fn parse_update_with_joins() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![Join { relation: TableFactor::Table { @@ -1811,6 +1889,7 @@ fn parse_update_with_joins() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::CompoundIdentifier(vec![ @@ -1829,7 +1908,10 @@ fn parse_update_with_joins() { ); assert_eq!( vec![Assignment { - id: vec![Ident::new("o"), Ident::new("completed")], + target: AssignmentTarget::ColumnName(ObjectName(vec![ + Ident::new("o"), + Ident::new("completed") + ])), value: Expr::Value(Value::Boolean(true)) }], assignments @@ -1864,6 +1946,7 @@ fn parse_delete_with_order_by() { }), asc: Some(false), nulls_first: None, + with_fill: None, }], order_by ); @@ -2231,12 +2314,14 @@ fn parse_substring_in_select() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![] }], lateral_views: vec![], + prewhere: None, selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -2247,13 +2332,15 @@ fn parse_substring_in_select() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, }), query ); @@ -2321,7 +2408,7 @@ fn parse_kill() { fn parse_table_colum_option_on_update() { let sql1 = "CREATE TABLE foo (`modification_time` DATETIME ON UPDATE CURRENT_TIMESTAMP())"; match mysql().verified_stmt(sql1) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name.to_string(), "foo"); assert_eq!( vec![ColumnDef { @@ -2499,7 +2586,7 @@ fn parse_fulltext_expression() { } #[test] -#[should_panic = "Expected FULLTEXT or SPATIAL option without constraint name, found: cons"] +#[should_panic = "Expected: FULLTEXT or SPATIAL option without constraint name, found: cons"] fn parse_create_table_with_fulltext_definition_should_not_accept_constraint_name() { mysql_and_generic().verified_stmt("CREATE TABLE tb (c1 INT, CONSTRAINT cons FULLTEXT (c1))"); } @@ -2539,8 +2626,9 @@ fn parse_hex_string_introducer() { })], from: vec![], lateral_views: vec![], + prewhere: None, selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -2552,13 +2640,15 @@ fn parse_hex_string_introducer() { into: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, })) ) } @@ -2622,7 +2712,7 @@ fn parse_create_table_with_column_collate() { let sql = "CREATE TABLE tb (id TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci)"; let canonical = "CREATE TABLE tb (id TEXT COLLATE utf8mb4_0900_ai_ci CHARACTER SET utf8mb4)"; match mysql().one_statement_parses_to(sql, canonical) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name.to_string(), "tb"); assert_eq!( vec![ColumnDef { diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index d68ebd556..5ac421da0 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -317,7 +317,7 @@ fn parse_create_table_with_defaults() { active int NOT NULL ) WITH (fillfactor = 20, user_catalog_table = true, autovacuum_vacuum_threshold = 100)"; match pg_and_generic().one_statement_parses_to(sql, "") { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, columns, constraints, @@ -327,7 +327,7 @@ fn parse_create_table_with_defaults() { file_format: None, location: None, .. - } => { + }) => { use pretty_assertions::assert_eq; assert_eq!("public.customer", name.to_string()); assert_eq!( @@ -537,12 +537,12 @@ fn parse_create_table_constraints_only() { let sql = "CREATE TABLE t (CONSTRAINT positive CHECK (2 > 1))"; let ast = pg_and_generic().verified_stmt(sql); match ast { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, columns, constraints, .. - } => { + }) => { assert_eq!("t", name.to_string()); assert!(columns.is_empty()); assert_eq!( @@ -648,7 +648,7 @@ fn parse_alter_table_alter_column_add_generated() { "ALTER TABLE t ALTER COLUMN id ADD GENERATED ( INCREMENT 1 MINVALUE 1 )", ); assert_eq!( - ParserError::ParserError("Expected AS, found: (".to_string()), + ParserError::ParserError("Expected: AS, found: (".to_string()), res.unwrap_err() ); @@ -656,14 +656,14 @@ fn parse_alter_table_alter_column_add_generated() { "ALTER TABLE t ALTER COLUMN id ADD GENERATED AS IDENTITY ( INCREMENT )", ); assert_eq!( - ParserError::ParserError("Expected a value, found: )".to_string()), + ParserError::ParserError("Expected: a value, found: )".to_string()), res.unwrap_err() ); let res = pg().parse_sql_statements("ALTER TABLE t ALTER COLUMN id ADD GENERATED AS IDENTITY ("); assert_eq!( - ParserError::ParserError("Expected ), found: EOF".to_string()), + ParserError::ParserError("Expected: ), found: EOF".to_string()), res.unwrap_err() ); } @@ -713,16 +713,88 @@ fn parse_alter_table_add_columns() { } } +#[test] +fn parse_alter_table_owner_to() { + struct TestCase { + sql: &'static str, + expected_owner: Owner, + } + + let test_cases = vec![ + TestCase { + sql: "ALTER TABLE tab OWNER TO new_owner", + expected_owner: Owner::Ident(Ident::new("new_owner".to_string())), + }, + TestCase { + sql: "ALTER TABLE tab OWNER TO postgres", + expected_owner: Owner::Ident(Ident::new("postgres".to_string())), + }, + TestCase { + sql: "ALTER TABLE tab OWNER TO CREATE", // treats CREATE as an identifier + expected_owner: Owner::Ident(Ident::new("CREATE".to_string())), + }, + TestCase { + sql: "ALTER TABLE tab OWNER TO \"new_owner\"", + expected_owner: Owner::Ident(Ident::with_quote('\"', "new_owner".to_string())), + }, + TestCase { + sql: "ALTER TABLE tab OWNER TO CURRENT_USER", + expected_owner: Owner::CurrentUser, + }, + TestCase { + sql: "ALTER TABLE tab OWNER TO CURRENT_ROLE", + expected_owner: Owner::CurrentRole, + }, + TestCase { + sql: "ALTER TABLE tab OWNER TO SESSION_USER", + expected_owner: Owner::SessionUser, + }, + ]; + + for case in test_cases { + match pg_and_generic().verified_stmt(case.sql) { + Statement::AlterTable { + name, + if_exists: _, + only: _, + operations, + location: _, + } => { + assert_eq!(name.to_string(), "tab"); + assert_eq!( + operations, + vec![AlterTableOperation::OwnerTo { + new_owner: case.expected_owner.clone() + }] + ); + } + _ => unreachable!("Expected an AlterTable statement"), + } + } + + let res = pg().parse_sql_statements("ALTER TABLE tab OWNER TO CREATE FOO"); + assert_eq!( + ParserError::ParserError("Expected: end of statement, found: FOO".to_string()), + res.unwrap_err() + ); + + let res = pg().parse_sql_statements("ALTER TABLE tab OWNER TO 4"); + assert_eq!( + ParserError::ParserError("Expected: CURRENT_USER, CURRENT_ROLE, SESSION_USER or identifier after OWNER TO. sql parser error: Expected: identifier, found: 4".to_string()), + res.unwrap_err() + ); +} + #[test] fn parse_create_table_if_not_exists() { let sql = "CREATE TABLE IF NOT EXISTS uk_cities ()"; let ast = pg_and_generic().verified_stmt(sql); match ast { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, if_not_exists: true, .. - } => { + }) => { assert_eq!("uk_cities", name.to_string()); } _ => unreachable!(), @@ -733,25 +805,25 @@ fn parse_create_table_if_not_exists() { fn parse_bad_if_not_exists() { let res = pg().parse_sql_statements("CREATE TABLE NOT EXISTS uk_cities ()"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: EXISTS".to_string()), + ParserError::ParserError("Expected: end of statement, found: EXISTS".to_string()), res.unwrap_err() ); let res = pg().parse_sql_statements("CREATE TABLE IF EXISTS uk_cities ()"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: EXISTS".to_string()), + ParserError::ParserError("Expected: end of statement, found: EXISTS".to_string()), res.unwrap_err() ); let res = pg().parse_sql_statements("CREATE TABLE IF uk_cities ()"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: uk_cities".to_string()), + ParserError::ParserError("Expected: end of statement, found: uk_cities".to_string()), res.unwrap_err() ); let res = pg().parse_sql_statements("CREATE TABLE IF NOT uk_cities ()"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: NOT".to_string()), + ParserError::ParserError("Expected: end of statement, found: NOT".to_string()), res.unwrap_err() ); } @@ -1074,8 +1146,9 @@ fn parse_copy_to() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), having: None, named_window: vec![], window_before_qualify: false, @@ -1086,13 +1159,15 @@ fn parse_copy_to() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, })), to: true, target: CopyTarget::File { @@ -1300,21 +1375,21 @@ fn parse_set() { assert_eq!( pg_and_generic().parse_sql_statements("SET"), Err(ParserError::ParserError( - "Expected identifier, found: EOF".to_string() + "Expected: identifier, found: EOF".to_string() )), ); assert_eq!( pg_and_generic().parse_sql_statements("SET a b"), Err(ParserError::ParserError( - "Expected equals sign or TO, found: b".to_string() + "Expected: equals sign or TO, found: b".to_string() )), ); assert_eq!( pg_and_generic().parse_sql_statements("SET a ="), Err(ParserError::ParserError( - "Expected variable value, found: EOF".to_string() + "Expected: variable value, found: EOF".to_string() )), ); } @@ -1557,7 +1632,7 @@ fn parse_pg_on_conflict() { assert_eq!( OnConflictAction::DoUpdate(DoUpdate { assignments: vec![Assignment { - id: vec!["dname".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["dname".into()])), value: Expr::CompoundIdentifier(vec!["EXCLUDED".into(), "dname".into()]) },], selection: None @@ -1588,14 +1663,14 @@ fn parse_pg_on_conflict() { OnConflictAction::DoUpdate(DoUpdate { assignments: vec![ Assignment { - id: vec!["dname".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["dname".into()])), value: Expr::CompoundIdentifier(vec![ "EXCLUDED".into(), "dname".into() ]) }, Assignment { - id: vec!["area".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["area".into()])), value: Expr::CompoundIdentifier(vec!["EXCLUDED".into(), "area".into()]) }, ], @@ -1645,7 +1720,7 @@ fn parse_pg_on_conflict() { assert_eq!( OnConflictAction::DoUpdate(DoUpdate { assignments: vec![Assignment { - id: vec!["dname".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["dname".into()])), value: Expr::Value(Value::Placeholder("$1".to_string())) },], selection: Some(Expr::BinaryOp { @@ -1682,7 +1757,7 @@ fn parse_pg_on_conflict() { assert_eq!( OnConflictAction::DoUpdate(DoUpdate { assignments: vec![Assignment { - id: vec!["dname".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["dname".into()])), value: Expr::Value(Value::Placeholder("$1".to_string())) },], selection: Some(Expr::BinaryOp { @@ -1757,6 +1832,29 @@ fn parse_pg_returning() { }; } +fn test_operator(operator: &str, dialect: &TestedDialects, expected: BinaryOperator) { + let operator_tokens = + sqlparser::tokenizer::Tokenizer::new(&PostgreSqlDialect {}, &format!("a{operator}b")) + .tokenize() + .unwrap(); + assert_eq!( + operator_tokens.len(), + 3, + "binary op should be 3 tokens, not {operator_tokens:?}" + ); + let expected_expr = Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("a"))), + op: expected, + right: Box::new(Expr::Identifier(Ident::new("b"))), + }; + let str_expr_canonical = format!("a {operator} b"); + assert_eq!(expected_expr, dialect.verified_expr(&str_expr_canonical)); + assert_eq!( + expected_expr, + dialect.expr_parses_to(&format!("a{operator}b"), &str_expr_canonical) + ); +} + #[test] fn parse_pg_binary_ops() { let binary_ops = &[ @@ -1770,18 +1868,73 @@ fn parse_pg_binary_ops() { ]; for (str_op, op, dialects) in binary_ops { - let select = dialects.verified_only_select(&format!("SELECT a {} b", &str_op)); - assert_eq!( - SelectItem::UnnamedExpr(Expr::BinaryOp { - left: Box::new(Expr::Identifier(Ident::new("a"))), - op: op.clone(), - right: Box::new(Expr::Identifier(Ident::new("b"))), - }), - select.projection[0] - ); + test_operator(str_op, dialects, op.clone()); + } +} + +#[test] +fn parse_pg_custom_binary_ops() { + // Postgres supports declaring custom binary operators, using any character in the following set: + // + - * / < > = ~ ! @ # % ^ & | ` ? + + // Here, we test the ones used by common extensions + let operators = [ + // PostGIS + "&&&", // n-D bounding boxes intersect + "&<", // (is strictly to the left of) + "&>", // (is strictly to the right of) + "|=|", // distance between A and B trajectories at their closest point of approach + "<<#>>", // n-D distance between A and B bounding boxes + "|>>", // A's bounding box is strictly above B's. + "~=", // bounding box is the same + // PGroonga + "&@", // Full text search by a keyword + "&@~", // Full text search by easy to use query language + "&@*", // Similar search + "&`", // Advanced search by ECMAScript like query language + "&@|", // Full text search by an array of keywords + "&@~|", // Full text search by an array of queries in easy to use query language + // pgtrgm + "<<%", // second argument has a continuous extent of an ordered trigram set that matches word boundaries + "%>>", // commutator of <<% + "<<<->", // distance between arguments + // hstore + "#=", // Replace fields with matching values from hstore + // ranges + "-|-", // Is adjacent to + // pg_similarity + "~++", // L1 distance + "~##", // Cosine Distance + "~-~", // Dice Coefficient + "~!!", // Euclidean Distance + "~@~", // Hamming Distance + "~??", // Jaccard Coefficient + "~%%", // Jaro Distance + "~@@", // Jaro-Winkler Distance + "~==", // Levenshtein Distance + "~^^", // Matching Coefficient + "~||", // Monge-Elkan Coefficient + "~#~", // Needleman-Wunsch Coefficient + "~**", // Overlap Coefficient + "~~~", // Q-Gram Distance + "~=~", // Smith-Waterman Coefficient + "~!~", // Smith-Waterman-Gotoh Coefficient + "~*~", // Soundex Distance + // soundex_operator + ">@@<", // Soundex matches + "<@@>", // Soundex doesn't match + ]; + for op in &operators { + test_operator(op, &pg(), BinaryOperator::Custom(op.to_string())); } } +#[test] +fn parse_ampersand_arobase() { + // In SQL Server, a&@b means (a) & (@b), in PostgreSQL it means (a) &@ (b) + pg().expr_parses_to("a&@b", "a &@ b"); +} + #[test] fn parse_pg_unary_ops() { let pg_unary_ops = &[ @@ -1873,9 +2026,11 @@ fn parse_array_index_expr() { let sql = "SELECT foo[0] FROM foos"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::ArrayIndex { - obj: Box::new(Expr::Identifier(Ident::new("foo"))), - indexes: vec![num[0].clone()], + &Expr::Subscript { + expr: Box::new(Expr::Identifier(Ident::new("foo"))), + subscript: Box::new(Subscript::Index { + index: num[0].clone() + }), }, expr_from_projection(only(&select.projection)), ); @@ -1883,9 +2038,16 @@ fn parse_array_index_expr() { let sql = "SELECT foo[0][0] FROM foos"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::ArrayIndex { - obj: Box::new(Expr::Identifier(Ident::new("foo"))), - indexes: vec![num[0].clone(), num[0].clone()], + &Expr::Subscript { + expr: Box::new(Expr::Subscript { + expr: Box::new(Expr::Identifier(Ident::new("foo"))), + subscript: Box::new(Subscript::Index { + index: num[0].clone() + }), + }), + subscript: Box::new(Subscript::Index { + index: num[0].clone() + }), }, expr_from_projection(only(&select.projection)), ); @@ -1893,19 +2055,27 @@ fn parse_array_index_expr() { let sql = r#"SELECT bar[0]["baz"]["fooz"] FROM foos"#; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::ArrayIndex { - obj: Box::new(Expr::Identifier(Ident::new("bar"))), - indexes: vec![ - num[0].clone(), - Expr::Identifier(Ident { - value: "baz".to_string(), - quote_style: Some('"') + &Expr::Subscript { + expr: Box::new(Expr::Subscript { + expr: Box::new(Expr::Subscript { + expr: Box::new(Expr::Identifier(Ident::new("bar"))), + subscript: Box::new(Subscript::Index { + index: num[0].clone() + }) }), - Expr::Identifier(Ident { + subscript: Box::new(Subscript::Index { + index: Expr::Identifier(Ident { + value: "baz".to_string(), + quote_style: Some('"') + }) + }) + }), + subscript: Box::new(Subscript::Index { + index: Expr::Identifier(Ident { value: "fooz".to_string(), quote_style: Some('"') }) - ], + }) }, expr_from_projection(only(&select.projection)), ); @@ -1913,26 +2083,33 @@ fn parse_array_index_expr() { let sql = "SELECT (CAST(ARRAY[ARRAY[2, 3]] AS INT[][]))[1][2]"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::ArrayIndex { - obj: Box::new(Expr::Nested(Box::new(Expr::Cast { - kind: CastKind::Cast, - expr: Box::new(Expr::Array(Array { - elem: vec![Expr::Array(Array { - elem: vec![num[2].clone(), num[3].clone(),], + &Expr::Subscript { + expr: Box::new(Expr::Subscript { + expr: Box::new(Expr::Nested(Box::new(Expr::Cast { + kind: CastKind::Cast, + expr: Box::new(Expr::Array(Array { + elem: vec![Expr::Array(Array { + elem: vec![num[2].clone(), num[3].clone(),], + named: true, + })], named: true, - })], - named: true, - })), - data_type: DataType::Array(ArrayElemTypeDef::SquareBracket( - Box::new(DataType::Array(ArrayElemTypeDef::SquareBracket( - Box::new(DataType::Int(None)), + })), + data_type: DataType::Array(ArrayElemTypeDef::SquareBracket( + Box::new(DataType::Array(ArrayElemTypeDef::SquareBracket( + Box::new(DataType::Int(None)), + None + ))), None - ))), - None - )), - format: None, - }))), - indexes: vec![num[1].clone(), num[2].clone()], + )), + format: None, + }))), + subscript: Box::new(Subscript::Index { + index: num[1].clone() + }), + }), + subscript: Box::new(Subscript::Index { + index: num[2].clone() + }), }, expr_from_projection(only(&select.projection)), ); @@ -1948,11 +2125,121 @@ fn parse_array_index_expr() { ); } +#[test] +fn parse_array_subscript() { + let tests = [ + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[2]", + Subscript::Index { + index: Expr::Value(number("2")), + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[foo]", + Subscript::Index { + index: Expr::Identifier(Ident::new("foo")), + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[2:5]", + Subscript::Slice { + lower_bound: Some(Expr::Value(number("2"))), + upper_bound: Some(Expr::Value(number("5"))), + stride: None, + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[2:5:3]", + Subscript::Slice { + lower_bound: Some(Expr::Value(number("2"))), + upper_bound: Some(Expr::Value(number("5"))), + stride: Some(Expr::Value(number("3"))), + }, + ), + ( + "arr[array_length(arr) - 3:array_length(arr) - 1]", + Subscript::Slice { + lower_bound: Some(Expr::BinaryOp { + left: Box::new(call("array_length", [Expr::Identifier(Ident::new("arr"))])), + op: BinaryOperator::Minus, + right: Box::new(Expr::Value(number("3"))), + }), + upper_bound: Some(Expr::BinaryOp { + left: Box::new(call("array_length", [Expr::Identifier(Ident::new("arr"))])), + op: BinaryOperator::Minus, + right: Box::new(Expr::Value(number("1"))), + }), + stride: None, + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[:5]", + Subscript::Slice { + lower_bound: None, + upper_bound: Some(Expr::Value(number("5"))), + stride: None, + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[2:]", + Subscript::Slice { + lower_bound: Some(Expr::Value(number("2"))), + upper_bound: None, + stride: None, + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[:]", + Subscript::Slice { + lower_bound: None, + upper_bound: None, + stride: None, + }, + ), + ]; + for (sql, expect) in tests { + let Expr::Subscript { subscript, .. } = pg_and_generic().verified_expr(sql) else { + panic!("expected subscript expr"); + }; + assert_eq!(expect, *subscript); + } + + pg_and_generic().verified_expr("schedule[:2][2:]"); +} + +#[test] +fn parse_array_multi_subscript() { + let expr = pg_and_generic().verified_expr("make_array(1, 2, 3)[1:2][2]"); + assert_eq!( + Expr::Subscript { + expr: Box::new(Expr::Subscript { + expr: Box::new(call( + "make_array", + vec![ + Expr::Value(number("1")), + Expr::Value(number("2")), + Expr::Value(number("3")) + ] + )), + subscript: Box::new(Subscript::Slice { + lower_bound: Some(Expr::Value(number("1"))), + upper_bound: Some(Expr::Value(number("2"))), + stride: None, + }), + }), + subscript: Box::new(Subscript::Index { + index: Expr::Value(number("2")), + }), + }, + expr, + ); +} + #[test] fn parse_create_index() { let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2)"; match pg().verified_stmt(sql) { - Statement::CreateIndex { + Statement::CreateIndex(CreateIndex { name: Some(ObjectName(name)), table_name: ObjectName(table_name), using, @@ -1963,7 +2250,7 @@ fn parse_create_index() { nulls_distinct: None, include, predicate: None, - } => { + }) => { assert_eq_vec(&["my_index"], &name); assert_eq_vec(&["my_table"], &table_name); assert_eq!(None, using); @@ -1981,7 +2268,7 @@ fn parse_create_index() { fn parse_create_anonymous_index() { let sql = "CREATE INDEX ON my_table(col1,col2)"; match pg().verified_stmt(sql) { - Statement::CreateIndex { + Statement::CreateIndex(CreateIndex { name, table_name: ObjectName(table_name), using, @@ -1992,7 +2279,7 @@ fn parse_create_anonymous_index() { include, nulls_distinct: None, predicate: None, - } => { + }) => { assert_eq!(None, name); assert_eq_vec(&["my_table"], &table_name); assert_eq!(None, using); @@ -2010,7 +2297,7 @@ fn parse_create_anonymous_index() { fn parse_create_index_concurrently() { let sql = "CREATE INDEX CONCURRENTLY IF NOT EXISTS my_index ON my_table(col1,col2)"; match pg().verified_stmt(sql) { - Statement::CreateIndex { + Statement::CreateIndex(CreateIndex { name: Some(ObjectName(name)), table_name: ObjectName(table_name), using, @@ -2021,7 +2308,7 @@ fn parse_create_index_concurrently() { include, nulls_distinct: None, predicate: None, - } => { + }) => { assert_eq_vec(&["my_index"], &name); assert_eq_vec(&["my_table"], &table_name); assert_eq!(None, using); @@ -2039,7 +2326,7 @@ fn parse_create_index_concurrently() { fn parse_create_index_with_predicate() { let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2) WHERE col3 IS NULL"; match pg().verified_stmt(sql) { - Statement::CreateIndex { + Statement::CreateIndex(CreateIndex { name: Some(ObjectName(name)), table_name: ObjectName(table_name), using, @@ -2050,7 +2337,7 @@ fn parse_create_index_with_predicate() { include, nulls_distinct: None, predicate: Some(_), - } => { + }) => { assert_eq_vec(&["my_index"], &name); assert_eq_vec(&["my_table"], &table_name); assert_eq!(None, using); @@ -2068,7 +2355,7 @@ fn parse_create_index_with_predicate() { fn parse_create_index_with_include() { let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2) INCLUDE (col3)"; match pg().verified_stmt(sql) { - Statement::CreateIndex { + Statement::CreateIndex(CreateIndex { name: Some(ObjectName(name)), table_name: ObjectName(table_name), using, @@ -2079,7 +2366,7 @@ fn parse_create_index_with_include() { include, nulls_distinct: None, predicate: None, - } => { + }) => { assert_eq_vec(&["my_index"], &name); assert_eq_vec(&["my_table"], &table_name); assert_eq!(None, using); @@ -2097,7 +2384,7 @@ fn parse_create_index_with_include() { fn parse_create_index_with_nulls_distinct() { let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2) NULLS NOT DISTINCT"; match pg().verified_stmt(sql) { - Statement::CreateIndex { + Statement::CreateIndex(CreateIndex { name: Some(ObjectName(name)), table_name: ObjectName(table_name), using, @@ -2108,7 +2395,7 @@ fn parse_create_index_with_nulls_distinct() { include, nulls_distinct: Some(nulls_distinct), predicate: None, - } => { + }) => { assert_eq_vec(&["my_index"], &name); assert_eq_vec(&["my_table"], &table_name); assert_eq!(None, using); @@ -2124,7 +2411,7 @@ fn parse_create_index_with_nulls_distinct() { let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2) NULLS DISTINCT"; match pg().verified_stmt(sql) { - Statement::CreateIndex { + Statement::CreateIndex(CreateIndex { name: Some(ObjectName(name)), table_name: ObjectName(table_name), using, @@ -2135,7 +2422,7 @@ fn parse_create_index_with_nulls_distinct() { include, nulls_distinct: Some(nulls_distinct), predicate: None, - } => { + }) => { assert_eq_vec(&["my_index"], &name); assert_eq_vec(&["my_table"], &table_name); assert_eq!(None, using); @@ -2157,6 +2444,7 @@ fn parse_array_subquery_expr() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("ARRAY")]), + parameters: FunctionArguments::None, args: FunctionArguments::Subquery(Box::new(Query { with: None, body: Box::new(SetExpr::SetOperation { @@ -2169,8 +2457,9 @@ fn parse_array_subquery_expr() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -2188,8 +2477,9 @@ fn parse_array_subquery_expr() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -2201,13 +2491,15 @@ fn parse_array_subquery_expr() { connect_by: None, }))), }), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], for_clause: None, + settings: None, + format_clause: None, })), filter: None, null_treatment: None, @@ -2473,7 +2765,7 @@ fn parse_json_table_is_not_reserved() { name: ObjectName(name), .. } => assert_eq!("JSON_TABLE", name[0].value), - other => panic!("Expected JSON_TABLE to be parsed as a table name, but got {other:?}"), + other => panic!("Expected: JSON_TABLE to be parsed as a table name, but got {other:?}"), } } @@ -2517,6 +2809,7 @@ fn test_composite_value() { Ident::new("information_schema"), Ident::new("_pg_expandarray") ]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Array( @@ -2662,7 +2955,7 @@ fn parse_escaped_literal_string() { .parse_sql_statements(sql) .unwrap_err() .to_string(), - "sql parser error: Unterminated encoded string literal at Line: 1, Column 8" + "sql parser error: Unterminated encoded string literal at Line: 1, Column: 8" ); let sql = r"SELECT E'\u0001', E'\U0010FFFF', E'\xC', E'\x25', E'\2', E'\45', E'\445'"; @@ -2705,7 +2998,7 @@ fn parse_escaped_literal_string() { .parse_sql_statements(sql) .unwrap_err() .to_string(), - "sql parser error: Unterminated encoded string literal at Line: 1, Column 8" + "sql parser error: Unterminated encoded string literal at Line: 1, Column: 8" ); } } @@ -2743,6 +3036,7 @@ fn parse_current_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_CATALOG")]), + parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, filter: None, @@ -2754,6 +3048,7 @@ fn parse_current_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_USER")]), + parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, filter: None, @@ -2765,6 +3060,7 @@ fn parse_current_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("SESSION_USER")]), + parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, filter: None, @@ -2776,6 +3072,7 @@ fn parse_current_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("USER")]), + parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, filter: None, @@ -3204,6 +3501,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); @@ -3226,6 +3524,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], @@ -3243,7 +3542,7 @@ fn parse_delimited_identifiers() { assert_eq!(&Expr::Identifier(Ident::with_quote('"', "simple id")), expr); assert_eq!(&Ident::with_quote('"', "column alias"), alias); } - _ => panic!("Expected ExprWithAlias"), + _ => panic!("Expected: ExprWithAlias"), } pg().verified_stmt(r#"CREATE TABLE "foo" ("bar" "int")"#); @@ -3285,16 +3584,18 @@ fn parse_create_function() { OperateFunctionArg::unnamed(DataType::Integer(None)), ]), return_type: Some(DataType::Integer(None)), - params: CreateFunctionBody { - language: Some("SQL".into()), - behavior: Some(FunctionBehavior::Immutable), - called_on_null: Some(FunctionCalledOnNull::Strict), - parallel: Some(FunctionParallel::Safe), - as_: Some(FunctionDefinition::SingleQuotedDef( - "select $1 + $2;".into() - )), - ..Default::default() - }, + language: Some("SQL".into()), + behavior: Some(FunctionBehavior::Immutable), + called_on_null: Some(FunctionCalledOnNull::Strict), + parallel: Some(FunctionParallel::Safe), + function_body: Some(CreateFunctionBody::AsBeforeOptions(Expr::Value( + Value::SingleQuotedString("select $1 + $2;".into()) + ))), + if_not_exists: false, + using: None, + determinism_specifier: None, + options: None, + remote_connection: None, } ); } @@ -3403,6 +3704,108 @@ fn parse_drop_function() { ); } +#[test] +fn parse_drop_procedure() { + let sql = "DROP PROCEDURE IF EXISTS test_proc"; + assert_eq!( + pg().verified_stmt(sql), + Statement::DropProcedure { + if_exists: true, + proc_desc: vec![DropFunctionDesc { + name: ObjectName(vec![Ident { + value: "test_proc".to_string(), + quote_style: None + }]), + args: None + }], + option: None + } + ); + + let sql = "DROP PROCEDURE IF EXISTS test_proc(a INTEGER, IN b INTEGER = 1)"; + assert_eq!( + pg().verified_stmt(sql), + Statement::DropProcedure { + if_exists: true, + proc_desc: vec![DropFunctionDesc { + name: ObjectName(vec![Ident { + value: "test_proc".to_string(), + quote_style: None + }]), + args: Some(vec![ + OperateFunctionArg::with_name("a", DataType::Integer(None)), + OperateFunctionArg { + mode: Some(ArgMode::In), + name: Some("b".into()), + data_type: DataType::Integer(None), + default_expr: Some(Expr::Value(Value::Number("1".parse().unwrap(), false))), + } + ]), + }], + option: None + } + ); + + let sql = "DROP PROCEDURE IF EXISTS test_proc1(a INTEGER, IN b INTEGER = 1), test_proc2(a VARCHAR, IN b INTEGER = 1)"; + assert_eq!( + pg().verified_stmt(sql), + Statement::DropProcedure { + if_exists: true, + proc_desc: vec![ + DropFunctionDesc { + name: ObjectName(vec![Ident { + value: "test_proc1".to_string(), + quote_style: None + }]), + args: Some(vec![ + OperateFunctionArg::with_name("a", DataType::Integer(None)), + OperateFunctionArg { + mode: Some(ArgMode::In), + name: Some("b".into()), + data_type: DataType::Integer(None), + default_expr: Some(Expr::Value(Value::Number( + "1".parse().unwrap(), + false + ))), + } + ]), + }, + DropFunctionDesc { + name: ObjectName(vec![Ident { + value: "test_proc2".to_string(), + quote_style: None + }]), + args: Some(vec![ + OperateFunctionArg::with_name("a", DataType::Varchar(None)), + OperateFunctionArg { + mode: Some(ArgMode::In), + name: Some("b".into()), + data_type: DataType::Integer(None), + default_expr: Some(Expr::Value(Value::Number( + "1".parse().unwrap(), + false + ))), + } + ]), + } + ], + option: None + } + ); + + let res = pg().parse_sql_statements("DROP PROCEDURE testproc DROP"); + assert_eq!( + ParserError::ParserError("Expected: end of statement, found: DROP".to_string()), + res.unwrap_err() + ); + + let res = pg().parse_sql_statements("DROP PROCEDURE testproc SET NULL"); + assert_eq!( + ParserError::ParserError("Expected: end of statement, found: SET".to_string()), + res.unwrap_err() + ); +} + #[test] fn parse_dollar_quoted_string() { let sql = "SELECT $$hello$$, $tag_name$world$tag_name$, $$Foo$Bar$$, $$Foo$Bar$$col_name, $$$$, $tag_name$$tag_name$"; @@ -3490,14 +3893,17 @@ fn parse_select_group_by_grouping_sets() { "SELECT brand, size, sum(sales) FROM items_sold GROUP BY size, GROUPING SETS ((brand), (size), ())" ); assert_eq!( - GroupByExpr::Expressions(vec![ - Expr::Identifier(Ident::new("size")), - Expr::GroupingSets(vec![ - vec![Expr::Identifier(Ident::new("brand"))], - vec![Expr::Identifier(Ident::new("size"))], - vec![], - ]), - ]), + GroupByExpr::Expressions( + vec![ + Expr::Identifier(Ident::new("size")), + Expr::GroupingSets(vec![ + vec![Expr::Identifier(Ident::new("brand"))], + vec![Expr::Identifier(Ident::new("size"))], + vec![], + ]), + ], + vec![] + ), select.group_by ); } @@ -3508,13 +3914,16 @@ fn parse_select_group_by_rollup() { "SELECT brand, size, sum(sales) FROM items_sold GROUP BY size, ROLLUP (brand, size)", ); assert_eq!( - GroupByExpr::Expressions(vec![ - Expr::Identifier(Ident::new("size")), - Expr::Rollup(vec![ - vec![Expr::Identifier(Ident::new("brand"))], - vec![Expr::Identifier(Ident::new("size"))], - ]), - ]), + GroupByExpr::Expressions( + vec![ + Expr::Identifier(Ident::new("size")), + Expr::Rollup(vec![ + vec![Expr::Identifier(Ident::new("brand"))], + vec![Expr::Identifier(Ident::new("size"))], + ]), + ], + vec![] + ), select.group_by ); } @@ -3525,13 +3934,16 @@ fn parse_select_group_by_cube() { "SELECT brand, size, sum(sales) FROM items_sold GROUP BY size, CUBE (brand, size)", ); assert_eq!( - GroupByExpr::Expressions(vec![ - Expr::Identifier(Ident::new("size")), - Expr::Cube(vec![ - vec![Expr::Identifier(Ident::new("brand"))], - vec![Expr::Identifier(Ident::new("size"))], - ]), - ]), + GroupByExpr::Expressions( + vec![ + Expr::Identifier(Ident::new("size")), + Expr::Cube(vec![ + vec![Expr::Identifier(Ident::new("brand"))], + vec![Expr::Identifier(Ident::new("size"))], + ]), + ], + vec![] + ), select.group_by ); } @@ -3565,10 +3977,10 @@ fn parse_create_table_with_alias() { int2_col INT2, float8_col FLOAT8, float4_col FLOAT4, - bool_col BOOL, + bool_col BOOL );"; match pg_and_generic().one_statement_parses_to(sql, "") { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, columns, constraints, @@ -3578,7 +3990,7 @@ fn parse_create_table_with_alias() { file_format: None, location: None, .. - } => { + }) => { assert_eq!("public.datatype_aliases", name.to_string()); assert_eq!( columns, @@ -3627,6 +4039,50 @@ fn parse_create_table_with_alias() { } } +#[test] +fn parse_create_table_with_partition_by() { + let sql = "CREATE TABLE t1 (a INT, b TEXT) PARTITION BY RANGE(a)"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateTable(create_table) => { + assert_eq!("t1", create_table.name.to_string()); + assert_eq!( + vec![ + ColumnDef { + name: "a".into(), + data_type: DataType::Int(None), + collation: None, + options: vec![] + }, + ColumnDef { + name: "b".into(), + data_type: DataType::Text, + collation: None, + options: vec![] + } + ], + create_table.columns + ); + match *create_table.partition_by.unwrap() { + Expr::Function(f) => { + assert_eq!("RANGE", f.name.to_string()); + assert_eq!( + FunctionArguments::List(FunctionArgumentList { + duplicate_treatment: None, + clauses: vec![], + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier(Ident::new("a")) + ))], + }), + f.args + ); + } + _ => unreachable!(), + } + } + _ => unreachable!(), + } +} + #[test] fn parse_join_constraint_unnest_alias() { assert_eq!( @@ -3643,7 +4099,8 @@ fn parse_join_constraint_unnest_alias() { Ident::new("a") ])], with_offset: false, - with_offset_alias: None + with_offset_alias: None, + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), @@ -3705,13 +4162,15 @@ fn test_simple_postgres_insert_with_alias() { Expr::Value(Value::Number("123".to_string(), false)) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], - for_clause: None + for_clause: None, + settings: None, + format_clause: None, })), partitioned: None, after_columns: vec![], @@ -3772,13 +4231,15 @@ fn test_simple_postgres_insert_with_alias() { )) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], - for_clause: None + for_clause: None, + settings: None, + format_clause: None, })), partitioned: None, after_columns: vec![], @@ -3835,13 +4296,15 @@ fn test_simple_insert_with_quoted_alias() { Expr::Value(Value::SingleQuotedString("0123".to_string())) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, fetch: None, locks: vec![], - for_clause: None + for_clause: None, + settings: None, + format_clause: None, })), partitioned: None, after_columns: vec![], @@ -3882,3 +4345,99 @@ fn parse_mat_cte() { let sql2 = r#"WITH cte AS NOT MATERIALIZED (SELECT id FROM accounts) SELECT id FROM cte"#; pg().verified_stmt(sql2); } + +#[test] +fn parse_at_time_zone() { + pg_and_generic().verified_expr("CURRENT_TIMESTAMP AT TIME ZONE tz"); + pg_and_generic().verified_expr("CURRENT_TIMESTAMP AT TIME ZONE ('America/' || 'Los_Angeles')"); + + // check precedence + let expr = Expr::BinaryOp { + left: Box::new(Expr::AtTimeZone { + timestamp: Box::new(Expr::TypedString { + data_type: DataType::Timestamp(None, TimezoneInfo::None), + value: "2001-09-28 01:00".to_owned(), + }), + time_zone: Box::new(Expr::Cast { + kind: CastKind::DoubleColon, + expr: Box::new(Expr::Value(Value::SingleQuotedString( + "America/Los_Angeles".to_owned(), + ))), + data_type: DataType::Text, + format: None, + }), + }), + op: BinaryOperator::Plus, + right: Box::new(Expr::Interval(Interval { + value: Box::new(Expr::Value(Value::SingleQuotedString( + "23 hours".to_owned(), + ))), + leading_field: None, + leading_precision: None, + last_field: None, + fractional_seconds_precision: None, + })), + }; + pretty_assertions::assert_eq!( + pg_and_generic().verified_expr( + "TIMESTAMP '2001-09-28 01:00' AT TIME ZONE 'America/Los_Angeles'::TEXT + INTERVAL '23 hours'", + ), + expr + ); +} + +#[test] +fn parse_create_table_with_options() { + let sql = "CREATE TABLE t (c INT) WITH (foo = 'bar', a = 123)"; + match pg().verified_stmt(sql) { + Statement::CreateTable(CreateTable { with_options, .. }) => { + assert_eq!( + vec![ + SqlOption { + name: "foo".into(), + value: Expr::Value(Value::SingleQuotedString("bar".into())), + }, + SqlOption { + name: "a".into(), + value: Expr::Value(number("123")), + }, + ], + with_options + ); + } + _ => unreachable!(), + } +} + +#[test] +fn test_table_function_with_ordinality() { + let from = pg_and_generic() + .verified_only_select("SELECT * FROM generate_series(1, 10) WITH ORDINALITY AS t") + .from; + assert_eq!(1, from.len()); + match from[0].relation { + TableFactor::Table { + ref name, + with_ordinality: true, + .. + } => { + assert_eq!("generate_series", name.to_string().as_str()); + } + _ => panic!("Expecting TableFactor::Table with ordinality"), + } +} + +#[test] +fn test_table_unnest_with_ordinality() { + let from = pg_and_generic() + .verified_only_select("SELECT * FROM UNNEST([10, 20, 30]) WITH ORDINALITY AS t") + .from; + assert_eq!(1, from.len()); + match from[0].relation { + TableFactor::UNNEST { + with_ordinality: true, + .. + } => {} + _ => panic!("Expecting TableFactor::UNNEST with ordinality"), + } +} diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 0a5710ff4..440116e02 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -48,6 +48,7 @@ fn test_square_brackets_over_db_schema_table_name() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], } @@ -94,6 +95,7 @@ fn test_double_quotes_over_db_schema_table_name() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], } @@ -114,6 +116,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); @@ -136,6 +139,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 25eaa2f71..7a2288cbb 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -18,7 +18,7 @@ use sqlparser::ast::helpers::stmt_data_loading::{ DataLoadingOption, DataLoadingOptionType, StageLoadSelectItem, }; use sqlparser::ast::*; -use sqlparser::dialect::{GenericDialect, SnowflakeDialect}; +use sqlparser::dialect::{Dialect, GenericDialect, SnowflakeDialect}; use sqlparser::parser::{ParserError, ParserOptions}; use sqlparser::tokenizer::*; use test_utils::*; @@ -33,20 +33,293 @@ use pretty_assertions::assert_eq; fn test_snowflake_create_table() { let sql = "CREATE TABLE _my_$table (am00unt number)"; match snowflake_and_generic().verified_stmt(sql) { - Statement::CreateTable { name, .. } => { + Statement::CreateTable(CreateTable { name, .. }) => { assert_eq!("_my_$table", name.to_string()); } _ => unreachable!(), } } +#[test] +fn test_snowflake_create_or_replace_table() { + let sql = "CREATE OR REPLACE TABLE my_table (a number)"; + match snowflake().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, or_replace, .. + }) => { + assert_eq!("my_table", name.to_string()); + assert!(or_replace); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_or_replace_table_copy_grants() { + let sql = "CREATE OR REPLACE TABLE my_table (a number) COPY GRANTS"; + match snowflake().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + or_replace, + copy_grants, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert!(or_replace); + assert!(copy_grants); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_or_replace_table_copy_grants_at_end() { + let sql = "CREATE OR REPLACE TABLE my_table COPY GRANTS (a number) "; + let parsed = "CREATE OR REPLACE TABLE my_table (a number) COPY GRANTS"; + match snowflake().one_statement_parses_to(sql, parsed) { + Statement::CreateTable(CreateTable { + name, + or_replace, + copy_grants, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert!(or_replace); + assert!(copy_grants); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_or_replace_table_copy_grants_cta() { + let sql = "CREATE OR REPLACE TABLE my_table COPY GRANTS AS SELECT 1 AS a"; + match snowflake().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + or_replace, + copy_grants, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert!(or_replace); + assert!(copy_grants); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_table_enable_schema_evolution() { + let sql = "CREATE TABLE my_table (a number) ENABLE_SCHEMA_EVOLUTION=TRUE"; + match snowflake().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + enable_schema_evolution, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!(Some(true), enable_schema_evolution); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_table_change_tracking() { + let sql = "CREATE TABLE my_table (a number) CHANGE_TRACKING=TRUE"; + match snowflake().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + change_tracking, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!(Some(true), change_tracking); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_table_data_retention_time_in_days() { + let sql = "CREATE TABLE my_table (a number) DATA_RETENTION_TIME_IN_DAYS=5"; + match snowflake().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + data_retention_time_in_days, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!(Some(5), data_retention_time_in_days); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_table_max_data_extension_time_in_days() { + let sql = "CREATE TABLE my_table (a number) MAX_DATA_EXTENSION_TIME_IN_DAYS=5"; + match snowflake().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + max_data_extension_time_in_days, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!(Some(5), max_data_extension_time_in_days); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_table_with_aggregation_policy() { + match snowflake() + .verified_stmt("CREATE TABLE my_table (a number) WITH AGGREGATION POLICY policy_name") + { + Statement::CreateTable(CreateTable { + name, + with_aggregation_policy, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!( + Some("policy_name".to_string()), + with_aggregation_policy.map(|name| name.to_string()) + ); + } + _ => unreachable!(), + } + + match snowflake() + .parse_sql_statements("CREATE TABLE my_table (a number) AGGREGATION POLICY policy_name") + .unwrap() + .pop() + .unwrap() + { + Statement::CreateTable(CreateTable { + name, + with_aggregation_policy, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!( + Some("policy_name".to_string()), + with_aggregation_policy.map(|name| name.to_string()) + ); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_table_with_row_access_policy() { + match snowflake().verified_stmt( + "CREATE TABLE my_table (a number, b number) WITH ROW ACCESS POLICY policy_name ON (a)", + ) { + Statement::CreateTable(CreateTable { + name, + with_row_access_policy, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!( + Some("WITH ROW ACCESS POLICY policy_name ON (a)".to_string()), + with_row_access_policy.map(|policy| policy.to_string()) + ); + } + _ => unreachable!(), + } + + match snowflake() + .parse_sql_statements( + "CREATE TABLE my_table (a number, b number) ROW ACCESS POLICY policy_name ON (a)", + ) + .unwrap() + .pop() + .unwrap() + { + Statement::CreateTable(CreateTable { + name, + with_row_access_policy, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!( + Some("WITH ROW ACCESS POLICY policy_name ON (a)".to_string()), + with_row_access_policy.map(|policy| policy.to_string()) + ); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_table_with_tag() { + match snowflake() + .verified_stmt("CREATE TABLE my_table (a number) WITH TAG (A='TAG A', B='TAG B')") + { + Statement::CreateTable(CreateTable { + name, with_tags, .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!( + Some(vec![ + Tag::new("A".into(), "TAG A".to_string()), + Tag::new("B".into(), "TAG B".to_string()) + ]), + with_tags + ); + } + _ => unreachable!(), + } + + match snowflake() + .parse_sql_statements("CREATE TABLE my_table (a number) TAG (A='TAG A', B='TAG B')") + .unwrap() + .pop() + .unwrap() + { + Statement::CreateTable(CreateTable { + name, with_tags, .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!( + Some(vec![ + Tag::new("A".into(), "TAG A".to_string()), + Tag::new("B".into(), "TAG B".to_string()) + ]), + with_tags + ); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_table_default_ddl_collation() { + let sql = "CREATE TABLE my_table (a number) DEFAULT_DDL_COLLATION='de'"; + match snowflake().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + default_ddl_collation, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!(Some("de".to_string()), default_ddl_collation); + } + _ => unreachable!(), + } +} + #[test] fn test_snowflake_create_transient_table() { let sql = "CREATE TRANSIENT TABLE CUSTOMER (id INT, name VARCHAR(255))"; match snowflake_and_generic().verified_stmt(sql) { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, transient, .. - } => { + }) => { assert_eq!("CUSTOMER", name.to_string()); assert!(transient) } @@ -54,6 +327,162 @@ fn test_snowflake_create_transient_table() { } } +#[test] +fn test_snowflake_create_table_column_comment() { + let sql = "CREATE TABLE my_table (a STRING COMMENT 'some comment')"; + match snowflake().verified_stmt(sql) { + Statement::CreateTable(CreateTable { name, columns, .. }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!( + vec![ColumnDef { + name: "a".into(), + data_type: DataType::String(None), + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Comment("some comment".to_string()) + }], + collation: None + }], + columns + ) + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_local_table() { + match snowflake().verified_stmt("CREATE TABLE my_table (a INT)") { + Statement::CreateTable(CreateTable { name, global, .. }) => { + assert_eq!("my_table", name.to_string()); + assert!(global.is_none()) + } + _ => unreachable!(), + } + + match snowflake().verified_stmt("CREATE LOCAL TABLE my_table (a INT)") { + Statement::CreateTable(CreateTable { name, global, .. }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!(Some(false), global) + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_global_table() { + match snowflake().verified_stmt("CREATE GLOBAL TABLE my_table (a INT)") { + Statement::CreateTable(CreateTable { name, global, .. }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!(Some(true), global) + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_invalid_local_global_table() { + assert_eq!( + snowflake().parse_sql_statements("CREATE LOCAL GLOBAL TABLE my_table (a INT)"), + Err(ParserError::ParserError( + "Expected: an SQL statement, found: LOCAL".to_string() + )) + ); + + assert_eq!( + snowflake().parse_sql_statements("CREATE GLOBAL LOCAL TABLE my_table (a INT)"), + Err(ParserError::ParserError( + "Expected: an SQL statement, found: GLOBAL".to_string() + )) + ); +} + +#[test] +fn test_snowflake_create_invalid_temporal_table() { + assert_eq!( + snowflake().parse_sql_statements("CREATE TEMP TEMPORARY TABLE my_table (a INT)"), + Err(ParserError::ParserError( + "Expected: an object type after CREATE, found: TEMPORARY".to_string() + )) + ); + + assert_eq!( + snowflake().parse_sql_statements("CREATE TEMP VOLATILE TABLE my_table (a INT)"), + Err(ParserError::ParserError( + "Expected: an object type after CREATE, found: VOLATILE".to_string() + )) + ); + + assert_eq!( + snowflake().parse_sql_statements("CREATE TEMP TRANSIENT TABLE my_table (a INT)"), + Err(ParserError::ParserError( + "Expected: an object type after CREATE, found: TRANSIENT".to_string() + )) + ); +} + +#[test] +fn test_snowflake_create_table_if_not_exists() { + match snowflake().verified_stmt("CREATE TABLE IF NOT EXISTS my_table (a INT)") { + Statement::CreateTable(CreateTable { + name, + if_not_exists, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert!(if_not_exists) + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_table_cluster_by() { + match snowflake().verified_stmt("CREATE TABLE my_table (a INT) CLUSTER BY (a, b)") { + Statement::CreateTable(CreateTable { + name, cluster_by, .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!( + Some(WrappedCollection::Parentheses(vec![ + Ident::new("a"), + Ident::new("b"), + ])), + cluster_by + ) + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_table_comment() { + match snowflake().verified_stmt("CREATE TABLE my_table (a INT) COMMENT = 'some comment'") { + Statement::CreateTable(CreateTable { name, comment, .. }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!("some comment", comment.unwrap().to_string()); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_table_incomplete_statement() { + assert_eq!( + snowflake().parse_sql_statements("CREATE TABLE my_table"), + Err(ParserError::ParserError( + "unexpected end of input".to_string() + )) + ); + + assert_eq!( + snowflake().parse_sql_statements("CREATE TABLE my_table; (c int)"), + Err(ParserError::ParserError( + "unexpected end of input".to_string() + )) + ); +} + #[test] fn test_snowflake_single_line_tokenize() { let sql = "CREATE TABLE# this is a comment \ntable_1"; @@ -91,6 +520,57 @@ fn test_snowflake_single_line_tokenize() { assert_eq!(expected, tokens); } +#[test] +fn parse_sf_create_or_replace_view_with_comment_missing_equal() { + assert!(snowflake_and_generic() + .parse_sql_statements("CREATE OR REPLACE VIEW v COMMENT = 'hello, world' AS SELECT 1") + .is_ok()); + + assert!(snowflake_and_generic() + .parse_sql_statements("CREATE OR REPLACE VIEW v COMMENT 'hello, world' AS SELECT 1") + .is_err()); +} + +#[test] +fn parse_sf_create_or_replace_with_comment_for_snowflake() { + let sql = "CREATE OR REPLACE VIEW v COMMENT = 'hello, world' AS SELECT 1"; + let dialect = test_utils::TestedDialects { + dialects: vec![Box::new(SnowflakeDialect {}) as Box], + options: None, + }; + + match dialect.verified_stmt(sql) { + Statement::CreateView { + name, + columns, + or_replace, + options, + query, + materialized, + cluster_by, + comment, + with_no_schema_binding: late_binding, + if_not_exists, + temporary, + .. + } => { + assert_eq!("v", name.to_string()); + assert_eq!(columns, vec![]); + assert_eq!(options, CreateTableOptions::None); + assert_eq!("SELECT 1", query.to_string()); + assert!(!materialized); + assert!(or_replace); + assert_eq!(cluster_by, vec![]); + assert!(comment.is_some()); + assert_eq!(comment.expect("expected comment"), "hello, world"); + assert!(!late_binding); + assert!(!if_not_exists); + assert!(!temporary); + } + _ => unreachable!(), + } +} + #[test] fn test_sf_derived_table_in_parenthesis() { // Nesting a subquery in an extra set of parentheses is non-standard, @@ -344,6 +824,36 @@ fn parse_semi_structured_data_traversal() { })], select.projection ); + + // a json access used as a key to another json access + assert_eq!( + snowflake().verified_expr("a[b:c]"), + Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![JsonPathElem::Bracket { + key: Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("b"))), + path: JsonPath { + path: vec![JsonPathElem::Dot { + key: "c".to_owned(), + quoted: false + }] + } + } + }] + } + } + ); + + // unquoted object keys cannot start with a digit + assert_eq!( + snowflake() + .parse_sql_statements("SELECT a:42") + .unwrap_err() + .to_string(), + "sql parser error: Expected: variant object key name, found: 42" + ); } #[test] @@ -360,6 +870,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); @@ -382,6 +893,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], @@ -399,7 +911,7 @@ fn parse_delimited_identifiers() { assert_eq!(&Expr::Identifier(Ident::with_quote('"', "simple id")), expr); assert_eq!(&Ident::with_quote('"', "column alias"), alias); } - _ => panic!("Expected ExprWithAlias"), + _ => panic!("Expected: ExprWithAlias"), } snowflake().verified_stmt(r#"CREATE TABLE "foo" ("bar" "int")"#); @@ -505,6 +1017,44 @@ fn test_select_wildcard_with_rename() { assert_eq!(expected, select.projection[0]); } +#[test] +fn test_select_wildcard_with_replace_and_rename() { + let select = snowflake_and_generic().verified_only_select( + "SELECT * REPLACE (col_z || col_z AS col_z) RENAME (col_z AS col_zz) FROM data", + ); + let expected = SelectItem::Wildcard(WildcardAdditionalOptions { + opt_replace: Some(ReplaceSelectItem { + items: vec![Box::new(ReplaceSelectElement { + expr: Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("col_z"))), + op: BinaryOperator::StringConcat, + right: Box::new(Expr::Identifier(Ident::new("col_z"))), + }, + column_name: Ident::new("col_z"), + as_keyword: true, + })], + }), + opt_rename: Some(RenameSelectItem::Multiple(vec![IdentWithAlias { + ident: Ident::new("col_z"), + alias: Ident::new("col_zz"), + }])), + ..Default::default() + }); + assert_eq!(expected, select.projection[0]); + + // rename cannot precede replace + // https://docs.snowflake.com/en/sql-reference/sql/select#parameters + assert_eq!( + snowflake_and_generic() + .parse_sql_statements( + "SELECT * RENAME (col_z AS col_zz) REPLACE (col_z || col_z AS col_z) FROM data" + ) + .unwrap_err() + .to_string(), + "sql parser error: Expected: end of statement, found: REPLACE" + ); +} + #[test] fn test_select_wildcard_with_exclude_and_rename() { let select = snowflake_and_generic() @@ -520,12 +1070,13 @@ fn test_select_wildcard_with_exclude_and_rename() { assert_eq!(expected, select.projection[0]); // rename cannot precede exclude + // https://docs.snowflake.com/en/sql-reference/sql/select#parameters assert_eq!( snowflake_and_generic() .parse_sql_statements("SELECT * RENAME col_a AS col_b EXCLUDE col_z FROM data") .unwrap_err() .to_string(), - "sql parser error: Expected end of statement, found: EXCLUDE" + "sql parser error: Expected: end of statement, found: EXCLUDE" ); } @@ -625,13 +1176,13 @@ fn parse_snowflake_declare_cursor() { let error_sql = "DECLARE c1 CURSOR SELECT id FROM invoices"; assert_eq!( - ParserError::ParserError("Expected FOR, found: SELECT".to_owned()), + ParserError::ParserError("Expected: FOR, found: SELECT".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); let error_sql = "DECLARE c1 CURSOR res"; assert_eq!( - ParserError::ParserError("Expected FOR, found: res".to_owned()), + ParserError::ParserError("Expected: FOR, found: res".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); } @@ -679,13 +1230,13 @@ fn parse_snowflake_declare_result_set() { let error_sql = "DECLARE res RESULTSET DEFAULT"; assert_eq!( - ParserError::ParserError("Expected an expression:, found: EOF".to_owned()), + ParserError::ParserError("Expected: an expression:, found: EOF".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); let error_sql = "DECLARE res RESULTSET :="; assert_eq!( - ParserError::ParserError("Expected an expression:, found: EOF".to_owned()), + ParserError::ParserError("Expected: an expression:, found: EOF".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); } @@ -771,19 +1322,19 @@ fn parse_snowflake_declare_variable() { let error_sql = "DECLARE profit INT 2"; assert_eq!( - ParserError::ParserError("Expected end of statement, found: 2".to_owned()), + ParserError::ParserError("Expected: end of statement, found: 2".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); let error_sql = "DECLARE profit INT DEFAULT"; assert_eq!( - ParserError::ParserError("Expected an expression:, found: EOF".to_owned()), + ParserError::ParserError("Expected: an expression:, found: EOF".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); let error_sql = "DECLARE profit DEFAULT"; assert_eq!( - ParserError::ParserError("Expected an expression:, found: EOF".to_owned()), + ParserError::ParserError("Expected: an expression:, found: EOF".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); } @@ -818,7 +1369,7 @@ fn parse_snowflake_declare_multi_statements() { let error_sql = "DECLARE profit DEFAULT 42 c1 CURSOR FOR res;"; assert_eq!( - ParserError::ParserError("Expected end of statement, found: c1".to_owned()), + ParserError::ParserError("Expected: end of statement, found: c1".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); } @@ -1393,7 +1944,7 @@ fn test_snowflake_trim() { // missing comma separation let error_sql = "SELECT TRIM('xyz' 'a')"; assert_eq!( - ParserError::ParserError("Expected ), found: 'a'".to_owned()), + ParserError::ParserError("Expected: ), found: 'a'".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); } @@ -1555,7 +2106,7 @@ fn test_select_wildcard_with_ilike_double_quote() { let res = snowflake().parse_sql_statements(r#"SELECT * ILIKE "%id" FROM tbl"#); assert_eq!( res.unwrap_err().to_string(), - "sql parser error: Expected ilike pattern, found: \"%id\"" + "sql parser error: Expected: ilike pattern, found: \"%id\"" ); } @@ -1564,7 +2115,7 @@ fn test_select_wildcard_with_ilike_number() { let res = snowflake().parse_sql_statements(r#"SELECT * ILIKE 42 FROM tbl"#); assert_eq!( res.unwrap_err().to_string(), - "sql parser error: Expected ilike pattern, found: 42" + "sql parser error: Expected: ilike pattern, found: 42" ); } @@ -1573,6 +2124,135 @@ fn test_select_wildcard_with_ilike_replace() { let res = snowflake().parse_sql_statements(r#"SELECT * ILIKE '%id%' EXCLUDE col FROM tbl"#); assert_eq!( res.unwrap_err().to_string(), - "sql parser error: Expected end of statement, found: EXCLUDE" + "sql parser error: Expected: end of statement, found: EXCLUDE" + ); +} + +#[test] +fn first_value_ignore_nulls() { + snowflake().verified_only_select(concat!( + "SELECT FIRST_VALUE(column2 IGNORE NULLS) ", + "OVER (PARTITION BY column1 ORDER BY column2) ", + "FROM some_table" + )); +} + +#[test] +fn test_pivot() { + // pivot on static list of values with default + #[rustfmt::skip] + snowflake().verified_only_select(concat!( + "SELECT * ", + "FROM quarterly_sales ", + "PIVOT(SUM(amount) ", + "FOR quarter IN (", + "'2023_Q1', ", + "'2023_Q2', ", + "'2023_Q3', ", + "'2023_Q4', ", + "'2024_Q1') ", + "DEFAULT ON NULL (0)", + ") ", + "ORDER BY empid", + )); + + // dynamic pivot from subquery + #[rustfmt::skip] + snowflake().verified_only_select(concat!( + "SELECT * ", + "FROM quarterly_sales ", + "PIVOT(SUM(amount) FOR quarter IN (", + "SELECT DISTINCT quarter ", + "FROM ad_campaign_types_by_quarter ", + "WHERE television = true ", + "ORDER BY quarter)", + ") ", + "ORDER BY empid", + )); + + // dynamic pivot on any value (with order by) + #[rustfmt::skip] + snowflake().verified_only_select(concat!( + "SELECT * ", + "FROM quarterly_sales ", + "PIVOT(SUM(amount) FOR quarter IN (ANY ORDER BY quarter)) ", + "ORDER BY empid", + )); + + // dynamic pivot on any value (without order by) + #[rustfmt::skip] + snowflake().verified_only_select(concat!( + "SELECT * ", + "FROM sales_data ", + "PIVOT(SUM(total_sales) FOR fis_quarter IN (ANY)) ", + "WHERE fis_year IN (2023) ", + "ORDER BY region", + )); +} + +#[test] +fn asof_joins() { + #[rustfmt::skip] + let query = snowflake_and_generic().verified_only_select(concat!( + "SELECT * ", + "FROM trades_unixtime AS tu ", + "ASOF JOIN quotes_unixtime AS qu ", + "MATCH_CONDITION (tu.trade_time >= qu.quote_time)", + )); + + assert_eq!( + query.from[0], + TableWithJoins { + relation: table_with_alias("trades_unixtime", "tu"), + joins: vec![Join { + relation: table_with_alias("quotes_unixtime", "qu"), + join_operator: JoinOperator::AsOf { + match_condition: Expr::BinaryOp { + left: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("tu"), + Ident::new("trade_time"), + ])), + op: BinaryOperator::GtEq, + right: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("qu"), + Ident::new("quote_time"), + ])), + }, + constraint: JoinConstraint::None, + }, + }], + } ); + + #[rustfmt::skip] + snowflake_and_generic().verified_query(concat!( + "SELECT t.stock_symbol, t.trade_time, t.quantity, q.quote_time, q.price ", + "FROM trades AS t ASOF JOIN quotes AS q ", + "MATCH_CONDITION (t.trade_time >= quote_time) ", + "ON t.stock_symbol = q.stock_symbol ", + "ORDER BY t.stock_symbol", + )); + + #[rustfmt::skip] + snowflake_and_generic().verified_query(concat!( + "SELECT t.stock_symbol, c.company_name, t.trade_time, t.quantity, q.quote_time, q.price ", + "FROM trades AS t ASOF JOIN quotes AS q ", + "MATCH_CONDITION (t.trade_time <= quote_time) ", + "USING(stock_symbol) ", + "JOIN companies AS c ON c.stock_symbol = t.stock_symbol ", + "ORDER BY t.stock_symbol", + )); + + #[rustfmt::skip] + snowflake_and_generic().verified_query(concat!( + "SELECT * ", + "FROM snowtime AS s ", + "ASOF JOIN raintime AS r ", + "MATCH_CONDITION (s.observed >= r.observed) ", + "ON s.state = r.state ", + "ASOF JOIN preciptime AS p ", + "MATCH_CONDITION (s.observed >= p.observed) ", + "ON s.state = p.state ", + "ORDER BY s.observed", + )); } diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 5742754c0..629ab5fc2 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -122,11 +122,11 @@ fn pragma_eq_placeholder_style() { fn parse_create_table_without_rowid() { let sql = "CREATE TABLE t (a INT) WITHOUT ROWID"; match sqlite_and_generic().verified_stmt(sql) { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, without_rowid: true, .. - } => { + }) => { assert_eq!("t", name.to_string()); } _ => unreachable!(), @@ -167,9 +167,11 @@ fn parse_create_view_temporary_if_not_exists() { materialized, options, cluster_by, + comment, with_no_schema_binding: late_binding, if_not_exists, temporary, + .. } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -178,6 +180,7 @@ fn parse_create_view_temporary_if_not_exists() { assert!(!or_replace); assert_eq!(options, CreateTableOptions::None); assert_eq!(cluster_by, vec![]); + assert!(comment.is_none()); assert!(!late_binding); assert!(if_not_exists); assert!(temporary); @@ -198,7 +201,7 @@ fn double_equality_operator() { fn parse_create_table_auto_increment() { let sql = "CREATE TABLE foo (bar INT PRIMARY KEY AUTOINCREMENT)"; match sqlite_and_generic().verified_stmt(sql) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name.to_string(), "foo"); assert_eq!( vec![ColumnDef { @@ -232,7 +235,7 @@ fn parse_create_table_auto_increment() { fn parse_create_sqlite_quote() { let sql = "CREATE TABLE `PRIMARY` (\"KEY\" INT, [INDEX] INT)"; match sqlite().verified_stmt(sql) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name.to_string(), "`PRIMARY`"); assert_eq!( vec![ @@ -293,7 +296,7 @@ fn test_placeholder() { #[test] fn parse_create_table_with_strict() { let sql = "CREATE TABLE Fruits (id TEXT NOT NULL PRIMARY KEY) STRICT"; - if let Statement::CreateTable { name, strict, .. } = sqlite().verified_stmt(sql) { + if let Statement::CreateTable(CreateTable { name, strict, .. }) = sqlite().verified_stmt(sql) { assert_eq!(name.to_string(), "Fruits"); assert!(strict); } @@ -332,6 +335,7 @@ fn parse_window_function_with_filter() { select.projection, vec![SelectItem::UnnamedExpr(Expr::Function(Function { name: ObjectName(vec![Ident::new(func_name)]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( @@ -371,6 +375,41 @@ fn parse_attach_database() { } } +#[test] +fn parse_update_tuple_row_values() { + // See https://github.com/sqlparser-rs/sqlparser-rs/issues/1311 + assert_eq!( + sqlite().verified_stmt("UPDATE x SET (a, b) = (1, 2)"), + Statement::Update { + assignments: vec![Assignment { + target: AssignmentTarget::Tuple(vec![ + ObjectName(vec![Ident::new("a"),]), + ObjectName(vec![Ident::new("b"),]), + ]), + value: Expr::Tuple(vec![ + Expr::Value(Value::Number("1".parse().unwrap(), false)), + Expr::Value(Value::Number("2".parse().unwrap(), false)) + ]) + }], + selection: None, + table: TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident::new("x")]), + alias: None, + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + with_ordinality: false, + }, + joins: vec![], + }, + from: None, + returning: None + } + ); +} + #[test] fn parse_where_in_empty_list() { let sql = "SELECT * FROM t1 WHERE a IN ()"; @@ -392,7 +431,7 @@ fn invalid_empty_list() { let sql = "SELECT * FROM t1 WHERE a IN (,,)"; let sqlite = sqlite_with_options(ParserOptions::new().with_trailing_commas(true)); assert_eq!( - "sql parser error: Expected an expression:, found: ,", + "sql parser error: Expected: an expression:, found: ,", sqlite.parse_sql_statements(sql).unwrap_err().to_string() ); } @@ -416,17 +455,17 @@ fn parse_start_transaction_with_modifier() { }; let res = unsupported_dialects.parse_sql_statements("BEGIN DEFERRED"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: DEFERRED".to_string()), + ParserError::ParserError("Expected: end of statement, found: DEFERRED".to_string()), res.unwrap_err(), ); let res = unsupported_dialects.parse_sql_statements("BEGIN IMMEDIATE"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: IMMEDIATE".to_string()), + ParserError::ParserError("Expected: end of statement, found: IMMEDIATE".to_string()), res.unwrap_err(), ); let res = unsupported_dialects.parse_sql_statements("BEGIN EXCLUSIVE"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: EXCLUSIVE".to_string()), + ParserError::ParserError("Expected: end of statement, found: EXCLUSIVE".to_string()), res.unwrap_err(), ); }