diff --git a/CHANGELOG.md b/CHANGELOG.md index d39c76cdb..60c25da18 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented - Support `ON { UPDATE | DELETE } { RESTRICT | CASCADE | SET NULL | NO ACTION | SET DEFAULT }` in `FOREIGN KEY` constraints (#170) - thanks @c7hm4r! - Support basic forms of `CREATE SCHEMA` and `DROP SCHEMA` (#173) - thanks @alex-dukhno! - Support `NULLS FIRST`/`LAST` in `ORDER BY` expressions (#176) - thanks @houqp! +- Support `LISTAGG()` (#174) - thanks @maxcountryman! ### Fixed - Report an error for unterminated string literals (#165) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index a867abcf9..2dbf42b29 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -224,6 +224,8 @@ pub enum Expr { /// A parenthesized subquery `(SELECT ...)`, used in expression like /// `SELECT (subquery) AS x` or `WHERE (subquery) = x` Subquery(Box), + /// The `LISTAGG` function `SELECT LISTAGG(...) WITHIN GROUP (ORDER BY ...)` + ListAgg(ListAgg), } impl fmt::Display for Expr { @@ -299,6 +301,7 @@ impl fmt::Display for Expr { } Expr::Exists(s) => write!(f, "EXISTS ({})", s), Expr::Subquery(s) => write!(f, "({})", s), + Expr::ListAgg(listagg) => write!(f, "{}", listagg), } } } @@ -850,6 +853,77 @@ impl FromStr for FileFormat { } } +/// A `LISTAGG` invocation `LISTAGG( [ DISTINCT ] [, ] [ON OVERFLOW ] ) ) +/// [ WITHIN GROUP (ORDER BY [, ...] ) ]` +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ListAgg { + pub distinct: bool, + pub expr: Box, + pub separator: Option>, + pub on_overflow: Option, + pub within_group: Vec, +} + +impl fmt::Display for ListAgg { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "LISTAGG({}{}", + if self.distinct { "DISTINCT " } else { "" }, + self.expr + )?; + if let Some(separator) = &self.separator { + write!(f, ", {}", separator)?; + } + if let Some(on_overflow) = &self.on_overflow { + write!(f, "{}", on_overflow)?; + } + write!(f, ")")?; + if !self.within_group.is_empty() { + write!( + f, + " WITHIN GROUP (ORDER BY {})", + display_comma_separated(&self.within_group) + )?; + } + Ok(()) + } +} + +/// The `ON OVERFLOW` clause of a LISTAGG invocation +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum ListAggOnOverflow { + /// `ON OVERFLOW ERROR` + Error, + + /// `ON OVERFLOW TRUNCATE [ ] WITH[OUT] COUNT` + Truncate { + filler: Option>, + with_count: bool, + }, +} + +impl fmt::Display for ListAggOnOverflow { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, " ON OVERFLOW")?; + match self { + ListAggOnOverflow::Error => write!(f, " ERROR"), + ListAggOnOverflow::Truncate { filler, with_count } => { + write!(f, " TRUNCATE")?; + if let Some(filler) = filler { + write!(f, " {}", filler)?; + } + if *with_count { + write!(f, " WITH")?; + } else { + write!(f, " WITHOUT")?; + } + write!(f, " COUNT") + } + } + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum ObjectType { Table, diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index a01871c6e..ee59a1c92 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -161,6 +161,7 @@ define_keywords!( END_FRAME, END_PARTITION, EQUALS, + ERROR, ESCAPE, EVERY, EXCEPT, @@ -230,6 +231,7 @@ define_keywords!( LIKE, LIKE_REGEX, LIMIT, + LISTAGG, LN, LOCAL, LOCALTIME, @@ -279,6 +281,7 @@ define_keywords!( OUT, OUTER, OVER, + OVERFLOW, OVERLAPS, OVERLAY, PARAMETER, diff --git a/src/parser.rs b/src/parser.rs index 608ac4736..c0345736f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -191,6 +191,7 @@ impl Parser { "EXISTS" => self.parse_exists_expr(), "EXTRACT" => self.parse_extract_expr(), "INTERVAL" => self.parse_literal_interval(), + "LISTAGG" => self.parse_listagg_expr(), "NOT" => Ok(Expr::UnaryOp { op: UnaryOperator::Not, expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?), @@ -272,14 +273,7 @@ impl Parser { pub fn parse_function(&mut self, name: ObjectName) -> Result { self.expect_token(&Token::LParen)?; - let all = self.parse_keyword("ALL"); - let distinct = self.parse_keyword("DISTINCT"); - if all && distinct { - return parser_err!(format!( - "Cannot specify both ALL and DISTINCT in function: {}", - name.to_string(), - )); - } + let distinct = self.parse_all_or_distinct()?; let args = self.parse_optional_args()?; let over = if self.parse_keyword("OVER") { // TBD: support window names (`OVER mywin`) in place of inline specification @@ -423,6 +417,66 @@ impl Parser { }) } + /// Parse a SQL LISTAGG expression, e.g. `LISTAGG(...) WITHIN GROUP (ORDER BY ...)`. + pub fn parse_listagg_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let distinct = self.parse_all_or_distinct()?; + let expr = Box::new(self.parse_expr()?); + // While ANSI SQL would would require the separator, Redshift makes this optional. Here we + // choose to make the separator optional as this provides the more general implementation. + let separator = if self.consume_token(&Token::Comma) { + Some(Box::new(self.parse_expr()?)) + } else { + None + }; + let on_overflow = if self.parse_keywords(vec!["ON", "OVERFLOW"]) { + if self.parse_keyword("ERROR") { + Some(ListAggOnOverflow::Error) + } else { + self.expect_keyword("TRUNCATE")?; + let filler = match self.peek_token() { + Some(Token::Word(kw)) if kw.keyword == "WITH" || kw.keyword == "WITHOUT" => { + None + } + Some(Token::SingleQuotedString(_)) + | Some(Token::NationalStringLiteral(_)) + | Some(Token::HexStringLiteral(_)) => Some(Box::new(self.parse_expr()?)), + _ => self.expected( + "either filler, WITH, or WITHOUT in LISTAGG", + self.peek_token(), + )?, + }; + let with_count = self.parse_keyword("WITH"); + if !with_count && !self.parse_keyword("WITHOUT") { + self.expected("either WITH or WITHOUT in LISTAGG", self.peek_token())?; + } + self.expect_keyword("COUNT")?; + Some(ListAggOnOverflow::Truncate { filler, with_count }) + } + } else { + None + }; + self.expect_token(&Token::RParen)?; + // Once again ANSI SQL requires WITHIN GROUP, but Redshift does not. Again we choose the + // more general implementation. + let within_group = if self.parse_keywords(vec!["WITHIN", "GROUP"]) { + self.expect_token(&Token::LParen)?; + self.expect_keywords(&["ORDER", "BY"])?; + let order_by_expr = self.parse_comma_separated(Parser::parse_order_by_expr)?; + self.expect_token(&Token::RParen)?; + order_by_expr + } else { + vec![] + }; + Ok(Expr::ListAgg(ListAgg { + distinct, + expr, + separator, + on_overflow, + within_group, + })) + } + // This function parses date/time fields for both the EXTRACT function-like // operator and interval qualifiers. EXTRACT supports a wider set of // date/time fields than interval qualifiers, so this function may need to @@ -851,6 +905,18 @@ impl Parser { Ok(values) } + /// Parse either `ALL` or `DISTINCT`. Returns `true` if `DISTINCT` is parsed and results in a + /// `ParserError` if both `ALL` and `DISTINCT` are fround. + pub fn parse_all_or_distinct(&mut self) -> Result { + let all = self.parse_keyword("ALL"); + let distinct = self.parse_keyword("DISTINCT"); + if all && distinct { + return parser_err!("Cannot specify both ALL and DISTINCT".to_string()); + } else { + Ok(distinct) + } + } + /// Parse a SQL CREATE statement pub fn parse_create(&mut self) -> Result { if self.parse_keyword("TABLE") { @@ -1635,11 +1701,7 @@ impl Parser { /// Parse a restricted `SELECT` statement (no CTEs / `UNION` / `ORDER BY`), /// assuming the initial `SELECT` was already consumed pub fn parse_select(&mut self) -> Result { - let all = self.parse_keyword("ALL"); - let distinct = self.parse_keyword("DISTINCT"); - if all && distinct { - return parser_err!("Cannot specify both ALL and DISTINCT in SELECT"); - } + let distinct = self.parse_all_or_distinct()?; let top = if self.parse_keyword("TOP") { Some(self.parse_top()?) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index c87fcf3a2..257b48230 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -244,7 +244,7 @@ fn parse_select_all() { fn parse_select_all_distinct() { let result = parse_sql_statements("SELECT ALL DISTINCT name FROM customer"); assert_eq!( - ParserError::ParserError("Cannot specify both ALL and DISTINCT in SELECT".to_string()), + ParserError::ParserError("Cannot specify both ALL and DISTINCT".to_string()), result.unwrap_err(), ); } @@ -357,9 +357,7 @@ fn parse_select_count_distinct() { let sql = "SELECT COUNT(ALL DISTINCT + x) FROM customer"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError( - "Cannot specify both ALL and DISTINCT in function: COUNT".to_string() - ), + ParserError::ParserError("Cannot specify both ALL and DISTINCT".to_string()), res.unwrap_err() ); } @@ -914,6 +912,58 @@ fn parse_extract() { ); } +#[test] +fn parse_listagg() { + let sql = "SELECT LISTAGG(DISTINCT dateid, ', ' ON OVERFLOW TRUNCATE '%' WITHOUT COUNT) \ + WITHIN GROUP (ORDER BY id, username)"; + let select = verified_only_select(sql); + + verified_stmt("SELECT LISTAGG(sellerid) WITHIN GROUP (ORDER BY dateid)"); + verified_stmt("SELECT LISTAGG(dateid)"); + verified_stmt("SELECT LISTAGG(DISTINCT dateid)"); + verified_stmt("SELECT LISTAGG(dateid ON OVERFLOW ERROR)"); + verified_stmt("SELECT LISTAGG(dateid ON OVERFLOW TRUNCATE N'...' WITH COUNT)"); + verified_stmt("SELECT LISTAGG(dateid ON OVERFLOW TRUNCATE X'deadbeef' WITH COUNT)"); + + let expr = Box::new(Expr::Identifier(Ident::new("dateid"))); + let on_overflow = Some(ListAggOnOverflow::Truncate { + filler: Some(Box::new(Expr::Value(Value::SingleQuotedString( + "%".to_string(), + )))), + with_count: false, + }); + let within_group = vec![ + OrderByExpr { + expr: Expr::Identifier(Ident { + value: "id".to_string(), + quote_style: None, + }), + asc: None, + nulls_first: None, + }, + OrderByExpr { + expr: Expr::Identifier(Ident { + value: "username".to_string(), + quote_style: None, + }), + asc: None, + nulls_first: None, + }, + ]; + assert_eq!( + &Expr::ListAgg(ListAgg { + distinct: true, + expr, + separator: Some(Box::new(Expr::Value(Value::SingleQuotedString( + ", ".to_string() + )))), + on_overflow, + within_group + }), + expr_from_projection(only(&select.projection)) + ); +} + #[test] fn parse_create_table() { let sql = "CREATE TABLE uk_cities (\