From 1678c36977ecadadb0c8fb956667801628d43190 Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Thu, 24 Jul 2025 20:07:31 +0800 Subject: [PATCH 1/6] feat: support export data for bigquery --- src/ast/mod.rs | 24 ++++++++++++++++++++++++ src/ast/spans.rs | 5 ++++- src/parser/mod.rs | 12 ++++++++++++ tests/sqlparser_bigquery.rs | 12 ++++++++++++ 4 files changed, 52 insertions(+), 1 deletion(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 1798223f3..afefb8b30 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -4355,6 +4355,15 @@ pub enum Statement { /// /// See [ReturnStatement] Return(ReturnStatement), + + /// Export data statement + /// + /// Example: + /// ```sql + /// EXPORT DATA OPTIONS(uri='gs://bucket/folder/*', format='PARQUET', overwrite=true) AS + /// SELECT field1, field2 FROM mydataset.table1 ORDER BY field1 LIMIT 10 + /// ``` + ExportData(ExportData), } /// ```sql @@ -6193,6 +6202,7 @@ impl fmt::Display for Statement { Statement::Return(r) => write!(f, "{r}"), Statement::List(command) => write!(f, "LIST {command}"), Statement::Remove(command) => write!(f, "REMOVE {command}"), + Statement::ExportData(e) => write!(f, "{e}"), } } } @@ -10125,6 +10135,20 @@ impl fmt::Display for MemberOf { } } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct ExportData { + pub options: Vec, + pub query: Box, +} + +impl fmt::Display for ExportData { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "EXPORT DATA OPTIONS({}) AS {}", display_comma_separated(&self.options), self.query) + } +} + #[cfg(test)] mod tests { use crate::tokenizer::Location; diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 3e82905e1..1ef3567ad 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::ast::{query::SelectItemQualifiedWildcardKind, ColumnOptions}; +use crate::ast::{query::SelectItemQualifiedWildcardKind, ColumnOptions, ExportData}; use core::iter; use crate::tokenizer::Span; @@ -531,6 +531,9 @@ impl Spanned for Statement { Statement::Print { .. } => Span::empty(), Statement::Return { .. } => Span::empty(), Statement::List(..) | Statement::Remove(..) => Span::empty(), + Statement::ExportData(ExportData { options, query }) => union_spans( + options.iter().map(|i| i.span()).chain(core::iter::once(query.span())) + ), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8d5a55da0..dc2bea267 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -640,6 +640,7 @@ impl<'a> Parser<'a> { Keyword::COMMENT if self.dialect.supports_comment_on() => self.parse_comment(), Keyword::PRINT => self.parse_print(), Keyword::RETURN => self.parse_return(), + Keyword::EXPORT => self.parse_export(), _ => self.expected("an SQL statement", next_token), }, Token::LParen => { @@ -16472,6 +16473,17 @@ impl<'a> Parser<'a> { } } + fn parse_export(&mut self) -> Result { + self.expect_keyword(Keyword::DATA)?; + self.expect_keyword(Keyword::OPTIONS)?; + self.expect_token(&Token::LParen)?; + let options = self.parse_comma_separated(|p| p.parse_sql_option())?; + self.expect_token(&Token::RParen)?; + self.expect_keyword(Keyword::AS)?; + let query = self.parse_query()?; + Ok(Statement::ExportData(ExportData { options, query })) + } + /// Consume the parser and return its underlying token buffer pub fn into_tokens(self) -> Vec { self.tokens diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 2ba54d3e1..981647e37 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -2566,3 +2566,15 @@ fn test_struct_trailing_and_nested_bracket() { ) ); } + +#[test] +fn test_export() { + bigquery().verified_stmt(concat!( + "EXPORT DATA OPTIONS(", + "uri = 'gs://bucket/folder/*', ", + "format = 'PARQUET', ", + "overwrite = true", + ") AS ", + "SELECT field1, field2 FROM mydataset.table1 ORDER BY field1 LIMIT 10", + )); +} From 1786faad8f612b20887da73470c0cf3edf3adcd1 Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Thu, 24 Jul 2025 20:11:16 +0800 Subject: [PATCH 2/6] fmt --- src/ast/mod.rs | 7 ++++++- src/ast/spans.rs | 5 ++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 6b243644d..cfbdd421c 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -10150,7 +10150,12 @@ pub struct ExportData { impl fmt::Display for ExportData { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "EXPORT DATA OPTIONS({}) AS {}", display_comma_separated(&self.options), self.query) + write!( + f, + "EXPORT DATA OPTIONS({}) AS {}", + display_comma_separated(&self.options), + self.query + ) } } /// Creates a user diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 9c802d549..4c1024d52 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -532,7 +532,10 @@ impl Spanned for Statement { Statement::Return { .. } => Span::empty(), Statement::List(..) | Statement::Remove(..) => Span::empty(), Statement::ExportData(ExportData { options, query }) => union_spans( - options.iter().map(|i| i.span()).chain(core::iter::once(query.span())) + options + .iter() + .map(|i| i.span()) + .chain(core::iter::once(query.span())), ), Statement::CreateUser(..) => Span::empty(), } From 1a7e8d969ae509d99f5f9aa1136092da002a77c6 Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Thu, 24 Jul 2025 20:28:15 +0800 Subject: [PATCH 3/6] with connection --- src/ast/mod.rs | 22 ++++++++++++++++------ src/ast/spans.rs | 9 +++++++-- src/parser/mod.rs | 12 +++++++++++- tests/sqlparser_bigquery.rs | 9 +++++++++ 4 files changed, 43 insertions(+), 9 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index cfbdd421c..93e663796 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -10146,16 +10146,26 @@ impl fmt::Display for MemberOf { pub struct ExportData { pub options: Vec, pub query: Box, + pub connection: Option, } impl fmt::Display for ExportData { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "EXPORT DATA OPTIONS({}) AS {}", - display_comma_separated(&self.options), - self.query - ) + if let Some(connection) = &self.connection { + write!( + f, + "EXPORT DATA WITH CONNECTION {connection} OPTIONS({}) AS {}", + display_comma_separated(&self.options), + self.query + ) + } else { + write!( + f, + "EXPORT DATA OPTIONS({}) AS {}", + display_comma_separated(&self.options), + self.query + ) + } } } /// Creates a user diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 4c1024d52..7f96465b6 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -531,11 +531,16 @@ impl Spanned for Statement { Statement::Print { .. } => Span::empty(), Statement::Return { .. } => Span::empty(), Statement::List(..) | Statement::Remove(..) => Span::empty(), - Statement::ExportData(ExportData { options, query }) => union_spans( + Statement::ExportData(ExportData { + options, + query, + connection, + }) => union_spans( options .iter() .map(|i| i.span()) - .chain(core::iter::once(query.span())), + .chain(core::iter::once(query.span())) + .chain(connection.iter().map(|i| i.span())), ), Statement::CreateUser(..) => Span::empty(), } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9895ef216..0c6f421fc 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -16513,13 +16513,23 @@ impl<'a> Parser<'a> { fn parse_export(&mut self) -> Result { self.expect_keyword(Keyword::DATA)?; + + let connection = if self.parse_keywords(&[Keyword::WITH, Keyword::CONNECTION]) { + Some(self.parse_object_name(false)?) + } else { + None + }; self.expect_keyword(Keyword::OPTIONS)?; self.expect_token(&Token::LParen)?; let options = self.parse_comma_separated(|p| p.parse_sql_option())?; self.expect_token(&Token::RParen)?; self.expect_keyword(Keyword::AS)?; let query = self.parse_query()?; - Ok(Statement::ExportData(ExportData { options, query })) + Ok(Statement::ExportData(ExportData { + options, + query, + connection, + })) } /// Consume the parser and return its underlying token buffer diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 981647e37..53ca0ac2a 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -2577,4 +2577,13 @@ fn test_export() { ") AS ", "SELECT field1, field2 FROM mydataset.table1 ORDER BY field1 LIMIT 10", )); + + bigquery().verified_stmt(concat!( + "EXPORT DATA WITH CONNECTION myconnection.myproject.us OPTIONS(", + "uri = 'gs://bucket/folder/*', ", + "format = 'PARQUET', ", + "overwrite = true", + ") AS ", + "SELECT field1, field2 FROM mydataset.table1 ORDER BY field1 LIMIT 10", + )); } From 84a0e12191994e66daf70afcc669eaa58c600e68 Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Sat, 26 Jul 2025 09:47:19 +0800 Subject: [PATCH 4/6] more tests --- src/ast/mod.rs | 1 + src/parser/mod.rs | 12 +- tests/sqlparser_bigquery.rs | 219 +++++++++++++++++++++++++++++++++++- 3 files changed, 226 insertions(+), 6 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 144168521..bd464e52d 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -4362,6 +4362,7 @@ pub enum Statement { /// EXPORT DATA OPTIONS(uri='gs://bucket/folder/*', format='PARQUET', overwrite=true) AS /// SELECT field1, field2 FROM mydataset.table1 ORDER BY field1 LIMIT 10 /// ``` + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/export-statements) ExportData(ExportData), /// ```sql /// CREATE [OR REPLACE] USER [IF NOT EXISTS] diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 5672d8091..5ea57f6f8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -645,7 +645,10 @@ impl<'a> Parser<'a> { Keyword::COMMENT if self.dialect.supports_comment_on() => self.parse_comment(), Keyword::PRINT => self.parse_print(), Keyword::RETURN => self.parse_return(), - Keyword::EXPORT => self.parse_export(), + Keyword::EXPORT => { + self.prev_token(); + self.parse_export_data() + } _ => self.expected("an SQL statement", next_token), }, Token::LParen => { @@ -16524,8 +16527,11 @@ impl<'a> Parser<'a> { } } - fn parse_export(&mut self) -> Result { - self.expect_keyword(Keyword::DATA)?; + /// /// Parse a `EXPORT DATA` statement. + /// + /// See [Statement::ExportData] + fn parse_export_data(&mut self) -> Result { + self.expect_keywords(&[Keyword::EXPORT, Keyword::DATA])?; let connection = if self.parse_keywords(&[Keyword::WITH, Keyword::CONNECTION]) { Some(self.parse_object_name(false)?) diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index d723d39ed..d69544505 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -20,10 +20,12 @@ mod test_utils; use std::ops::Deref; +use sqlparser::ast::helpers::attached_token::AttachedToken; use sqlparser::ast::*; use sqlparser::dialect::{BigQueryDialect, GenericDialect}; +use sqlparser::keywords::Keyword; use sqlparser::parser::{ParserError, ParserOptions}; -use sqlparser::tokenizer::{Location, Span}; +use sqlparser::tokenizer::{Location, Span, Token, TokenWithSpan, Word}; use test_utils::*; #[test] @@ -2569,7 +2571,7 @@ fn test_struct_trailing_and_nested_bracket() { #[test] fn test_export_data() { - bigquery().verified_stmt(concat!( + let stmt = bigquery().verified_stmt(concat!( "EXPORT DATA OPTIONS(", "uri = 'gs://bucket/folder/*', ", "format = 'PARQUET', ", @@ -2577,8 +2579,100 @@ fn test_export_data() { ") AS ", "SELECT field1, field2 FROM mydataset.table1 ORDER BY field1 LIMIT 10", )); + assert_eq!( + stmt, + Statement::ExportData(ExportData { + options: vec![ + SqlOption::KeyValue { + key: Ident::new("uri"), + value: Expr::Value( + Value::SingleQuotedString("gs://bucket/folder/*".to_owned()) + .with_empty_span() + ), + }, + SqlOption::KeyValue { + key: Ident::new("format"), + value: Expr::Value( + Value::SingleQuotedString("PARQUET".to_owned()).with_empty_span() + ), + }, + SqlOption::KeyValue { + key: Ident::new("overwrite"), + value: Expr::Value(Value::Boolean(true).with_empty_span()), + }, + ], + connection: None, + query: Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(Select { + select_token: AttachedToken(TokenWithSpan::new( + Token::Word(Word { + value: "SELECT".to_string(), + quote_style: None, + keyword: Keyword::SELECT, + }), + Span::empty() + )), + distinct: None, + top: None, + top_before_distinct: false, + projection: vec![ + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("field1"))), + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("field2"))), + ], + exclude: None, + into: None, + from: vec![TableWithJoins { + relation: table_from_name(ObjectName::from(vec![ + Ident::new("mydataset"), + Ident::new("table1") + ])), + joins: vec![], + }], + lateral_views: vec![], + prewhere: None, + selection: None, + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + window_before_qualify: false, + value_table_mode: None, + connect_by: None, + flavor: SelectFlavor::Standard, + }))), + order_by: Some(OrderBy { + kind: OrderByKind::Expressions(vec![OrderByExpr { + expr: Expr::Identifier(Ident::new("field1")), + options: OrderByOptions { + asc: None, + nulls_first: None, + }, + with_fill: None, + },]), + interpolate: None, + }), + limit_clause: Some(LimitClause::LimitOffset { + limit: Some(Expr::Value( + Value::Number("10".to_string(), false).with_empty_span() + )), + offset: None, + limit_by: vec![], + }), + fetch: None, + locks: vec![], + for_clause: None, + settings: None, + format_clause: None, + pipe_operators: vec![], + }) + }) + ); - bigquery().verified_stmt(concat!( + let stmt = bigquery().verified_stmt(concat!( "EXPORT DATA WITH CONNECTION myconnection.myproject.us OPTIONS(", "uri = 'gs://bucket/folder/*', ", "format = 'PARQUET', ", @@ -2586,6 +2680,125 @@ fn test_export_data() { ") AS ", "SELECT field1, field2 FROM mydataset.table1 ORDER BY field1 LIMIT 10", )); + + assert_eq!( + stmt, + Statement::ExportData(ExportData { + options: vec![ + SqlOption::KeyValue { + key: Ident::new("uri"), + value: Expr::Value( + Value::SingleQuotedString("gs://bucket/folder/*".to_owned()) + .with_empty_span() + ), + }, + SqlOption::KeyValue { + key: Ident::new("format"), + value: Expr::Value( + Value::SingleQuotedString("PARQUET".to_owned()).with_empty_span() + ), + }, + SqlOption::KeyValue { + key: Ident::new("overwrite"), + value: Expr::Value(Value::Boolean(true).with_empty_span()), + }, + ], + connection: Some(ObjectName::from(vec![ + Ident::new("myconnection"), + Ident::new("myproject"), + Ident::new("us") + ])), + query: Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(Select { + select_token: AttachedToken(TokenWithSpan::new( + Token::Word(Word { + value: "SELECT".to_string(), + quote_style: None, + keyword: Keyword::SELECT, + }), + Span::empty() + )), + distinct: None, + top: None, + top_before_distinct: false, + projection: vec![ + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("field1"))), + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("field2"))), + ], + exclude: None, + into: None, + from: vec![TableWithJoins { + relation: table_from_name(ObjectName::from(vec![ + Ident::new("mydataset"), + Ident::new("table1") + ])), + joins: vec![], + }], + lateral_views: vec![], + prewhere: None, + selection: None, + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + window_before_qualify: false, + value_table_mode: None, + connect_by: None, + flavor: SelectFlavor::Standard, + }))), + order_by: Some(OrderBy { + kind: OrderByKind::Expressions(vec![OrderByExpr { + expr: Expr::Identifier(Ident::new("field1")), + options: OrderByOptions { + asc: None, + nulls_first: None, + }, + with_fill: None, + },]), + interpolate: None, + }), + limit_clause: Some(LimitClause::LimitOffset { + limit: Some(Expr::Value( + Value::Number("10".to_string(), false).with_empty_span() + )), + offset: None, + limit_by: vec![], + }), + fetch: None, + locks: vec![], + for_clause: None, + settings: None, + format_clause: None, + pipe_operators: vec![], + }) + }) + ); + + // at least one option (uri) is required + let err = bigquery() + .parse_sql_statements(concat!( + "EXPORT DATA OPTIONS() AS ", + "SELECT field1, field2 FROM mydataset.table1 ORDER BY field1 LIMIT 10", + )) + .unwrap_err(); + assert_eq!( + err.to_string(), + "sql parser error: Expected: identifier, found: )" + ); + + let err = bigquery() + .parse_sql_statements(concat!( + "EXPORT DATA AS SELECT field1, field2 FROM mydataset.table1 ORDER BY field1 LIMIT 10", + )) + .unwrap_err(); + assert_eq!( + err.to_string(), + "sql parser error: Expected: OPTIONS, found: AS" + ); } #[test] From 2ed8e683a4ebd30b4d0b6876d93421d968de1736 Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Sat, 26 Jul 2025 15:35:31 +0800 Subject: [PATCH 5/6] update --- tests/sqlparser_bigquery.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index d69544505..1a0f2e1f5 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -2657,7 +2657,7 @@ fn test_export_data() { }), limit_clause: Some(LimitClause::LimitOffset { limit: Some(Expr::Value( - Value::Number("10".to_string(), false).with_empty_span() + Value::Number("10".into(), false).with_empty_span() )), offset: None, limit_by: vec![], @@ -2763,7 +2763,7 @@ fn test_export_data() { }), limit_clause: Some(LimitClause::LimitOffset { limit: Some(Expr::Value( - Value::Number("10".to_string(), false).with_empty_span() + Value::Number("10".into(), false).with_empty_span() )), offset: None, limit_by: vec![], From bfcddd3f94bc9c7fe37853f2902a049b21137747 Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Sat, 26 Jul 2025 15:46:24 +0800 Subject: [PATCH 6/6] update --- tests/sqlparser_bigquery.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 1a0f2e1f5..10a356717 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -2656,9 +2656,7 @@ fn test_export_data() { interpolate: None, }), limit_clause: Some(LimitClause::LimitOffset { - limit: Some(Expr::Value( - Value::Number("10".into(), false).with_empty_span() - )), + limit: Some(Expr::Value(number("10").with_empty_span())), offset: None, limit_by: vec![], }), @@ -2762,9 +2760,7 @@ fn test_export_data() { interpolate: None, }), limit_clause: Some(LimitClause::LimitOffset { - limit: Some(Expr::Value( - Value::Number("10".into(), false).with_empty_span() - )), + limit: Some(Expr::Value(number("10").with_empty_span())), offset: None, limit_by: vec![], }),