diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index b5744e863..d22b45c08 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -35,6 +35,8 @@ jobs: - uses: actions/checkout@v4 - name: Setup Rust Toolchain uses: ./.github/actions/setup-builder + with: + rust-version: "1.86.0" - run: cargo clippy --all-targets --all-features -- -D warnings benchmark-lint: @@ -43,6 +45,8 @@ jobs: - uses: actions/checkout@v4 - name: Setup Rust Toolchain uses: ./.github/actions/setup-builder + with: + rust-version: "1.86.0" - run: cd sqlparser_bench && cargo clippy --all-targets --all-features -- -D warnings compile: diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index bb85eb06c..65355559a 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -820,7 +820,7 @@ impl fmt::Display for AlterColumnOperation { AlterColumnOperation::SetDefault { value } => { write!(f, "SET DEFAULT {value}") } - AlterColumnOperation::DropDefault {} => { + AlterColumnOperation::DropDefault => { write!(f, "DROP DEFAULT") } AlterColumnOperation::SetDataType { data_type, using } => { diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 139d6af22..65248924a 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -651,17 +651,17 @@ pub enum Expr { /// such as maps, arrays, and lists: /// - Array /// - A 1-dim array `a[1]` will be represented like: - /// `CompoundFieldAccess(Ident('a'), vec![Subscript(1)]` + /// `CompoundFieldAccess(Ident('a'), vec![Subscript(1)]` /// - A 2-dim array `a[1][2]` will be represented like: - /// `CompoundFieldAccess(Ident('a'), vec![Subscript(1), Subscript(2)]` + /// `CompoundFieldAccess(Ident('a'), vec![Subscript(1), Subscript(2)]` /// - Map or Struct (Bracket-style) /// - A map `a['field1']` will be represented like: - /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field')]` + /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field')]` /// - A 2-dim map `a['field1']['field2']` will be represented like: - /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field1'), Subscript('field2')]` + /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field1'), Subscript('field2')]` /// - Struct (Dot-style) (only effect when the chain contains both subscript and expr) /// - A struct access `a[field1].field2` will be represented like: - /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field1'), Ident('field2')]` + /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field1'), Ident('field2')]` /// - If a struct access likes `a.field1.field2`, it will be represented by CompoundIdentifier([a, field1, field2]) CompoundFieldAccess { root: Box, @@ -3283,6 +3283,18 @@ pub enum Statement { option: Option, }, /// ```sql + /// CREATE EXTERNAL VOLUME + /// ``` + /// See + CreateExternalVolume { + or_replace: bool, + if_not_exists: bool, + name: ObjectName, + storage_locations: Vec, + allow_writes: Option, + comment: Option, + }, + /// ```sql /// CREATE PROCEDURE /// ``` CreateProcedure { @@ -4171,6 +4183,39 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::CreateExternalVolume { + or_replace, + if_not_exists, + name, + storage_locations, + allow_writes, + comment, + } => { + write!( + f, + "CREATE {or_replace}EXTERNAL VOLUME {if_not_exists}{name}", + or_replace = if *or_replace { "OR REPLACE " } else { "" }, + if_not_exists = if *if_not_exists { " IF NOT EXISTS" } else { "" }, + )?; + if !storage_locations.is_empty() { + write!( + f, + " STORAGE_LOCATIONS = ({})", + storage_locations + .iter() + .map(|loc| format!("({})", loc)) + .collect::>() + .join(", ") + )?; + } + if let Some(true) = allow_writes { + write!(f, " ALLOW_WRITES = TRUE")?; + } + if let Some(c) = comment { + write!(f, " COMMENT = '{c}'")?; + } + Ok(()) + } Statement::CreateProcedure { name, or_alter, @@ -7314,7 +7359,7 @@ impl fmt::Display for CopyTarget { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { use CopyTarget::*; match self { - Stdin { .. } => write!(f, "STDIN"), + Stdin => write!(f, "STDIN"), Stdout => write!(f, "STDOUT"), File { filename } => write!(f, "'{}'", value::escape_single_quote_string(filename)), Program { command } => write!( @@ -8871,6 +8916,74 @@ impl fmt::Display for NullInclusion { } } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct CloudProviderParams { + pub name: String, + pub provider: String, + pub base_url: Option, + pub aws_role_arn: Option, + pub aws_access_point_arn: Option, + pub aws_external_id: Option, + pub azure_tenant_id: Option, + pub storage_endpoint: Option, + pub use_private_link_endpoint: Option, + pub encryption: KeyValueOptions, + pub credentials: KeyValueOptions, +} + +impl fmt::Display for CloudProviderParams { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "NAME = '{}' STORAGE_PROVIDER = '{}'", + self.name, self.provider + )?; + + if let Some(base_url) = &self.base_url { + write!(f, " STORAGE_BASE_URL = '{base_url}'")?; + } + + if let Some(arn) = &self.aws_role_arn { + write!(f, " STORAGE_AWS_ROLE_ARN = '{arn}'")?; + } + + if let Some(ap_arn) = &self.aws_access_point_arn { + write!(f, " STORAGE_AWS_ACCESS_POINT_ARN = '{ap_arn}'")?; + } + + if let Some(ext_id) = &self.aws_external_id { + write!(f, " STORAGE_AWS_EXTERNAL_ID = '{ext_id}'")?; + } + + if let Some(tenant_id) = &self.azure_tenant_id { + write!(f, " AZURE_TENANT_ID = '{tenant_id}'")?; + } + + if let Some(endpoint) = &self.storage_endpoint { + write!(f, " STORAGE_ENDPOINT = '{endpoint}'")?; + } + + if let Some(use_pl) = self.use_private_link_endpoint { + write!( + f, + " USE_PRIVATELINK_ENDPOINT = {}", + if use_pl { "TRUE" } else { "FALSE" } + )?; + } + + if !self.encryption.options.is_empty() { + write!(f, " ENCRYPTION=({})", self.encryption)?; + } + + if !self.credentials.options.is_empty() { + write!(f, " CREDENTIALS=({})", self.credentials)?; + } + Ok(()) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 5827022ab..a88147f23 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -251,6 +251,7 @@ impl Spanned for Values { /// - [Statement::CreateFunction] /// - [Statement::CreateTrigger] /// - [Statement::DropTrigger] +/// - [Statement::CreateExternalVolume] /// - [Statement::CreateProcedure] /// - [Statement::CreateMacro] /// - [Statement::CreateStage] @@ -468,6 +469,7 @@ impl Spanned for Statement { Statement::CreateFunction { .. } => Span::empty(), Statement::CreateTrigger { .. } => Span::empty(), Statement::DropTrigger { .. } => Span::empty(), + Statement::CreateExternalVolume { .. } => Span::empty(), Statement::CreateProcedure { .. } => Span::empty(), Statement::CreateMacro { .. } => Span::empty(), Statement::CreateStage { .. } => Span::empty(), diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index d455fdf11..d458dd803 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use super::keywords::RESERVED_FOR_IDENTIFIER; #[cfg(not(feature = "std"))] use crate::alloc::string::ToString; use crate::ast::helpers::key_value_options::{KeyValueOption, KeyValueOptionType, KeyValueOptions}; @@ -24,10 +25,11 @@ use crate::ast::helpers::stmt_data_loading::{ FileStagingCommand, StageLoadSelectItem, StageParamsObject, }; use crate::ast::{ - CatalogSyncNamespaceMode, ColumnOption, ColumnPolicy, ColumnPolicyProperty, ContactEntry, - CopyIntoSnowflakeKind, Ident, IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, - IdentityPropertyKind, IdentityPropertyOrder, ObjectName, RowAccessPolicy, ShowObjects, - Statement, StorageSerializationPolicy, TagsColumnOption, WrappedCollection, + CatalogSyncNamespaceMode, CloudProviderParams, ColumnOption, ColumnPolicy, + ColumnPolicyProperty, ContactEntry, CopyIntoSnowflakeKind, Ident, IdentityParameters, + IdentityProperty, IdentityPropertyFormatKind, IdentityPropertyKind, IdentityPropertyOrder, + ObjectName, RowAccessPolicy, ShowObjects, Statement, StorageSerializationPolicy, + TagsColumnOption, WrappedCollection, }; use crate::dialect::{Dialect, Precedence}; use crate::keywords::Keyword; @@ -42,8 +44,6 @@ use alloc::vec::Vec; #[cfg(not(feature = "std"))] use alloc::{format, vec}; -use super::keywords::RESERVED_FOR_IDENTIFIER; - /// A [`Dialect`] for [Snowflake](https://www.snowflake.com/) #[derive(Debug, Default)] pub struct SnowflakeDialect; @@ -179,6 +179,8 @@ impl Dialect for SnowflakeDialect { )); } else if parser.parse_keyword(Keyword::DATABASE) { return Some(parse_create_database(or_replace, transient, parser)); + } else if parser.parse_keywords(&[Keyword::EXTERNAL, Keyword::VOLUME]) { + return Some(parse_create_external_volume(or_replace, parser)); } else { // need to go back with the cursor let mut back = 1; @@ -702,6 +704,146 @@ pub fn parse_create_database( Ok(builder.build()) } +fn parse_create_external_volume( + or_replace: bool, + parser: &mut Parser, +) -> Result { + let if_not_exists = parser.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let name = parser.parse_object_name(false)?; + let mut comment = None; + let mut allow_writes = None; + let mut storage_locations = Vec::new(); + + // STORAGE_LOCATIONS (...) + if parser.parse_keywords(&[Keyword::STORAGE_LOCATIONS]) { + parser.expect_token(&Token::Eq)?; + storage_locations = parse_storage_locations(parser)?; + }; + + // ALLOW_WRITES [ = true | false ] + if parser.parse_keyword(Keyword::ALLOW_WRITES) { + parser.expect_token(&Token::Eq)?; + allow_writes = Some(parser.parse_boolean_string()?); + } + + // COMMENT = '...' + if parser.parse_keyword(Keyword::COMMENT) { + parser.expect_token(&Token::Eq)?; + comment = Some(parser.parse_literal_string()?); + } + + if storage_locations.is_empty() { + return Err(ParserError::ParserError( + "STORAGE_LOCATIONS is required for CREATE EXTERNAL VOLUME".to_string(), + )); + } + + Ok(Statement::CreateExternalVolume { + or_replace, + if_not_exists, + name, + allow_writes, + comment, + storage_locations, + }) +} + +fn parse_storage_locations(parser: &mut Parser) -> Result, ParserError> { + let mut locations = Vec::new(); + parser.expect_token(&Token::LParen)?; + + loop { + parser.expect_token(&Token::LParen)?; + + // START OF ONE CloudProviderParams BLOCK + let mut name = None; + let mut provider = None; + let mut base_url = None; + let mut aws_role_arn = None; + let mut aws_access_point_arn = None; + let mut aws_external_id = None; + let mut azure_tenant_id = None; + let mut storage_endpoint = None; + let mut use_private_link_endpoint = None; + let mut encryption: KeyValueOptions = KeyValueOptions { options: vec![] }; + let mut credentials: KeyValueOptions = KeyValueOptions { options: vec![] }; + + loop { + if parser.parse_keyword(Keyword::NAME) { + parser.expect_token(&Token::Eq)?; + name = Some(parser.parse_literal_string()?); + } else if parser.parse_keyword(Keyword::STORAGE_PROVIDER) { + parser.expect_token(&Token::Eq)?; + provider = Some(parser.parse_literal_string()?); + } else if parser.parse_keyword(Keyword::STORAGE_BASE_URL) { + parser.expect_token(&Token::Eq)?; + base_url = Some(parser.parse_literal_string()?); + } else if parser.parse_keyword(Keyword::STORAGE_AWS_ROLE_ARN) { + parser.expect_token(&Token::Eq)?; + aws_role_arn = Some(parser.parse_literal_string()?); + } else if parser.parse_keyword(Keyword::STORAGE_AWS_ACCESS_POINT_ARN) { + parser.expect_token(&Token::Eq)?; + aws_access_point_arn = Some(parser.parse_literal_string()?); + } else if parser.parse_keyword(Keyword::STORAGE_AWS_EXTERNAL_ID) { + parser.expect_token(&Token::Eq)?; + aws_external_id = Some(parser.parse_literal_string()?); + } else if parser.parse_keyword(Keyword::AZURE_TENANT_ID) { + parser.expect_token(&Token::Eq)?; + azure_tenant_id = Some(parser.parse_literal_string()?); + } else if parser.parse_keyword(Keyword::STORAGE_ENDPOINT) { + parser.expect_token(&Token::Eq)?; + storage_endpoint = Some(parser.parse_literal_string()?); + } else if parser.parse_keyword(Keyword::USE_PRIVATELINK_ENDPOINT) { + parser.expect_token(&Token::Eq)?; + use_private_link_endpoint = Some(parser.parse_boolean_string()?); + } else if parser.parse_keyword(Keyword::ENCRYPTION) { + parser.expect_token(&Token::Eq)?; + encryption = KeyValueOptions { + options: parse_parentheses_options(parser)?, + }; + } else if parser.parse_keyword(Keyword::CREDENTIALS) { + parser.expect_token(&Token::Eq)?; + credentials = KeyValueOptions { + options: parse_parentheses_options(parser)?, + }; + } else if parser.consume_token(&Token::RParen) { + break; + } else { + return parser.expected("a valid key or closing paren", parser.peek_token()); + } + } + + let Some(name) = name else { + return parser.expected("NAME = '...'", parser.peek_token()); + }; + + let Some(provider) = provider else { + return parser.expected("STORAGE_PROVIDER = '...'", parser.peek_token()); + }; + + locations.push(CloudProviderParams { + name, + provider, + base_url, + aws_role_arn, + aws_access_point_arn, + aws_external_id, + azure_tenant_id, + storage_endpoint, + use_private_link_endpoint, + encryption, + credentials, + }); + // EXIT if next token is RParen + if parser.consume_token(&Token::RParen) { + break; + } + // Otherwise expect a comma before next object + parser.expect_token(&Token::Comma)?; + } + Ok(locations) +} + pub fn parse_storage_serialization_policy( parser: &mut Parser, ) -> Result { diff --git a/src/keywords.rs b/src/keywords.rs index acaa542d2..128fe1b07 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -18,14 +18,14 @@ //! This module defines //! 1) a list of constants for every keyword //! 2) an `ALL_KEYWORDS` array with every keyword in it -//! This is not a list of *reserved* keywords: some of these can be -//! parsed as identifiers if the parser decides so. This means that -//! new keywords can be added here without affecting the parse result. +//! This is not a list of *reserved* keywords: some of these can be +//! parsed as identifiers if the parser decides so. This means that +//! new keywords can be added here without affecting the parse result. //! //! As a matter of fact, most of these keywords are not used at all -//! and could be removed. +//! and could be removed. //! 3) a `RESERVED_FOR_TABLE_ALIAS` array with keywords reserved in a -//! "table alias" context. +//! "table alias" context. #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -89,6 +89,7 @@ define_keywords!( ALIAS, ALL, ALLOCATE, + ALLOW_WRITES, ALTER, ALWAYS, ANALYZE, @@ -119,6 +120,7 @@ define_keywords!( AUTO_INCREMENT, AVG, AVRO, + AZURE_TENANT_ID, BACKWARD, BASE64, BASE_LOCATION, @@ -829,7 +831,14 @@ define_keywords!( STDIN, STDOUT, STEP, + STORAGE_AWS_ACCESS_POINT_ARN, + STORAGE_AWS_EXTERNAL_ID, + STORAGE_AWS_ROLE_ARN, + STORAGE_BASE_URL, + STORAGE_ENDPOINT, STORAGE_INTEGRATION, + STORAGE_LOCATIONS, + STORAGE_PROVIDER, STORAGE_SERIALIZATION_POLICY, STORED, STRICT, @@ -932,6 +941,7 @@ define_keywords!( USE, USER, USER_RESOURCES, + USE_PRIVATELINK_ENDPOINT, USING, UUID, VACUUM, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9da1799b1..a3d20362d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4385,7 +4385,7 @@ impl<'a> Parser<'a> { self.parse_create_view(or_replace, temporary, create_view_params) } else if self.parse_keyword(Keyword::POLICY) { self.parse_create_policy() - } else if self.parse_keyword(Keyword::EXTERNAL) { + } else if self.parse_keywords(&[Keyword::EXTERNAL, Keyword::TABLE]) { self.parse_create_external_table(or_replace) } else if self.parse_keyword(Keyword::FUNCTION) { self.parse_create_function(or_replace, temporary) @@ -5226,7 +5226,6 @@ impl<'a> Parser<'a> { &mut self, or_replace: bool, ) -> Result { - self.expect_keyword_is(Keyword::TABLE)?; let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name(false)?; let (columns, constraints) = self.parse_columns()?; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 5ad553b0c..ff1ef2138 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -14069,8 +14069,7 @@ fn test_table_sample() { #[test] fn overflow() { - let expr = std::iter::repeat("1") - .take(1000) + let expr = std::iter::repeat_n("1", 1000) .collect::>() .join(" + "); let sql = format!("SELECT {}", expr); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 8513055a1..b6fb38a03 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -3599,3 +3599,22 @@ fn test_create_database() { .to_string(); assert!(err.contains("Expected"), "Unexpected error: {err}"); } + +#[test] +fn test_external_volume() { + snowflake().verified_stmt("CREATE OR REPLACE EXTERNAL VOLUME exvol STORAGE_LOCATIONS = ((NAME = 'my-s3-us-west-2' STORAGE_PROVIDER = 'S3' STORAGE_BASE_URL = 's3://my-example-bucket/' STORAGE_AWS_ROLE_ARN = 'arn:aws:iam::123456789012:role/myrole' ENCRYPTION=(TYPE='AWS_SSE_KMS' KMS_KEY_ID='1234abcd-12ab-34cd-56ef-1234567890ab'))) ALLOW_WRITES = TRUE"); + snowflake().verified_stmt("CREATE EXTERNAL VOLUME exvol STORAGE_LOCATIONS = ((NAME = 'my-us-east-1' STORAGE_PROVIDER = 'GCS' STORAGE_BASE_URL = 'gcs://mybucket1/path1/' ENCRYPTION=(TYPE='GCS_SSE_KMS' KMS_KEY_ID='1234abcd-12ab-34cd-56ef-1234567890ab'))) ALLOW_WRITES = TRUE"); + snowflake().verified_stmt("CREATE EXTERNAL VOLUME exvol STORAGE_LOCATIONS = ((NAME = 'my-azure-northeurope' STORAGE_PROVIDER = 'AZURE' STORAGE_BASE_URL = 'azure://exampleacct.blob.core.windows.net/my_container_northeurope/' AZURE_TENANT_ID = 'a123b4c5-1234-123a-a12b-1a23b45678c9')) ALLOW_WRITES = TRUE"); + snowflake().verified_stmt("CREATE OR REPLACE EXTERNAL VOLUME ext_vol_s3_compat STORAGE_LOCATIONS = ((NAME = 'my_s3_compat_storage_location' STORAGE_PROVIDER = 'S3COMPAT' STORAGE_BASE_URL = 's3compat://mybucket/unload/mys3compatdata' STORAGE_ENDPOINT = 'example.com' CREDENTIALS=(AWS_KEY_ID='1a2b3c...' AWS_SECRET_KEY='4x5y6z...')))", ); + snowflake().verified_stmt("CREATE OR REPLACE EXTERNAL VOLUME mem STORAGE_LOCATIONS = ((NAME = 'mem' STORAGE_PROVIDER = 'MEMORY'))"); + snowflake().verified_stmt("CREATE OR REPLACE EXTERNAL VOLUME file STORAGE_LOCATIONS = ((NAME = 'file' STORAGE_PROVIDER = 'FILE' STORAGE_BASE_URL = '/home/user/'))"); + + let err = snowflake() + .parse_sql_statements("CREATE EXTERNAL VOLUME name NAME") + .unwrap_err() + .to_string(); + assert!( + err.contains("STORAGE_LOCATIONS is required for CREATE EXTERNAL VOLUME"), + "Unexpected error: {err}" + ); +}