Skip to content

provide LISTAGG implementation #174

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 29 commits into from
May 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
6896953
provide LISTAGG implementation
maxcountryman May 29, 2020
c6216ec
provide a reminder for LISTAGG syntax
maxcountryman May 29, 2020
48d6a8b
prefer Vec<T> over Option<Vec<T>>
maxcountryman May 29, 2020
eba849a
ensure we do not explicitly print implied `ALL`
maxcountryman May 29, 2020
a95fa55
refactor ListAggOnOverflow into an enum
maxcountryman May 29, 2020
ee144b2
provide parse_all_or_distinct abstraction
maxcountryman May 29, 2020
1e41d30
clean up expr and separator parsing a bit
maxcountryman May 29, 2020
1e952b6
combine token checking into its logical block
maxcountryman May 29, 2020
5736b9d
ensure cargo fmt
maxcountryman May 29, 2020
bcc41a3
extend doc-comment with example
maxcountryman May 30, 2020
a16c3a4
eliminate unnecessary error variable
maxcountryman May 30, 2020
e1ad011
ensure filler is optional
maxcountryman May 30, 2020
158c650
prefer format over generalized abstraction
maxcountryman May 30, 2020
b2cb822
shorten with_count binding
maxcountryman May 30, 2020
b504072
rework display for listagg
maxcountryman May 30, 2020
7e53d37
ensure enum variants have proper doc-comments
maxcountryman May 30, 2020
d1ff825
add back in implementation note
maxcountryman May 30, 2020
87ac2a1
relocate comment to proper place
maxcountryman May 30, 2020
95c5312
ensure proper comment formatting
maxcountryman May 30, 2020
4e17b47
ensure backticks for formatting
maxcountryman May 30, 2020
7a25837
cleanup doc-comment
maxcountryman May 30, 2020
0596074
ensure consistent fmt implementation
maxcountryman May 30, 2020
f1f8c4a
clean up filler parsing a bit
maxcountryman May 30, 2020
5964599
clarify error context
maxcountryman May 30, 2020
ebe00d7
expand valid filler types
maxcountryman May 30, 2020
815854f
revert back to with and without checking
maxcountryman May 30, 2020
2b7d116
ensure nulls_first is provided
maxcountryman May 30, 2020
f92443b
placate cargo fmt
maxcountryman May 30, 2020
cd471e8
Update CHANGELOG
nickolay May 30, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented
- Support `ON { UPDATE | DELETE } { RESTRICT | CASCADE | SET NULL | NO ACTION | SET DEFAULT }` in `FOREIGN KEY` constraints (#170) - thanks @c7hm4r!
- Support basic forms of `CREATE SCHEMA` and `DROP SCHEMA` (#173) - thanks @alex-dukhno!
- Support `NULLS FIRST`/`LAST` in `ORDER BY` expressions (#176) - thanks @houqp!
- Support `LISTAGG()` (#174) - thanks @maxcountryman!

### Fixed
- Report an error for unterminated string literals (#165)
Expand Down
74 changes: 74 additions & 0 deletions src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,8 @@ pub enum Expr {
/// A parenthesized subquery `(SELECT ...)`, used in expression like
/// `SELECT (subquery) AS x` or `WHERE (subquery) = x`
Subquery(Box<Query>),
/// The `LISTAGG` function `SELECT LISTAGG(...) WITHIN GROUP (ORDER BY ...)`
ListAgg(ListAgg),
}

impl fmt::Display for Expr {
Expand Down Expand Up @@ -299,6 +301,7 @@ impl fmt::Display for Expr {
}
Expr::Exists(s) => write!(f, "EXISTS ({})", s),
Expr::Subquery(s) => write!(f, "({})", s),
Expr::ListAgg(listagg) => write!(f, "{}", listagg),
}
}
}
Expand Down Expand Up @@ -850,6 +853,77 @@ impl FromStr for FileFormat {
}
}

/// A `LISTAGG` invocation `LISTAGG( [ DISTINCT ] <expr>[, <separator> ] [ON OVERFLOW <on_overflow>] ) )
/// [ WITHIN GROUP (ORDER BY <within_group1>[, ...] ) ]`
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ListAgg {
pub distinct: bool,
pub expr: Box<Expr>,
pub separator: Option<Box<Expr>>,
pub on_overflow: Option<ListAggOnOverflow>,
pub within_group: Vec<OrderByExpr>,
}

impl fmt::Display for ListAgg {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"LISTAGG({}{}",
if self.distinct { "DISTINCT " } else { "" },
self.expr
)?;
if let Some(separator) = &self.separator {
write!(f, ", {}", separator)?;
}
if let Some(on_overflow) = &self.on_overflow {
write!(f, "{}", on_overflow)?;
}
write!(f, ")")?;
if !self.within_group.is_empty() {
write!(
f,
" WITHIN GROUP (ORDER BY {})",
display_comma_separated(&self.within_group)
)?;
}
Ok(())
}
}

/// The `ON OVERFLOW` clause of a LISTAGG invocation
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum ListAggOnOverflow {
/// `ON OVERFLOW ERROR`
Error,

/// `ON OVERFLOW TRUNCATE [ <filler> ] WITH[OUT] COUNT`
Truncate {
filler: Option<Box<Expr>>,
with_count: bool,
},
}

impl fmt::Display for ListAggOnOverflow {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, " ON OVERFLOW")?;
match self {
ListAggOnOverflow::Error => write!(f, " ERROR"),
ListAggOnOverflow::Truncate { filler, with_count } => {
write!(f, " TRUNCATE")?;
if let Some(filler) = filler {
write!(f, " {}", filler)?;
}
if *with_count {
write!(f, " WITH")?;
} else {
write!(f, " WITHOUT")?;
}
write!(f, " COUNT")
}
}
}
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum ObjectType {
Table,
Expand Down
3 changes: 3 additions & 0 deletions src/dialect/keywords.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ define_keywords!(
END_FRAME,
END_PARTITION,
EQUALS,
ERROR,
ESCAPE,
EVERY,
EXCEPT,
Expand Down Expand Up @@ -230,6 +231,7 @@ define_keywords!(
LIKE,
LIKE_REGEX,
LIMIT,
LISTAGG,
LN,
LOCAL,
LOCALTIME,
Expand Down Expand Up @@ -279,6 +281,7 @@ define_keywords!(
OUT,
OUTER,
OVER,
OVERFLOW,
OVERLAPS,
OVERLAY,
PARAMETER,
Expand Down
88 changes: 75 additions & 13 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ impl Parser {
"EXISTS" => self.parse_exists_expr(),
"EXTRACT" => self.parse_extract_expr(),
"INTERVAL" => self.parse_literal_interval(),
"LISTAGG" => self.parse_listagg_expr(),
"NOT" => Ok(Expr::UnaryOp {
op: UnaryOperator::Not,
expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?),
Expand Down Expand Up @@ -272,14 +273,7 @@ impl Parser {

pub fn parse_function(&mut self, name: ObjectName) -> Result<Expr, ParserError> {
self.expect_token(&Token::LParen)?;
let all = self.parse_keyword("ALL");
let distinct = self.parse_keyword("DISTINCT");
if all && distinct {
return parser_err!(format!(
"Cannot specify both ALL and DISTINCT in function: {}",
name.to_string(),
));
}
let distinct = self.parse_all_or_distinct()?;
let args = self.parse_optional_args()?;
let over = if self.parse_keyword("OVER") {
// TBD: support window names (`OVER mywin`) in place of inline specification
Expand Down Expand Up @@ -423,6 +417,66 @@ impl Parser {
})
}

/// Parse a SQL LISTAGG expression, e.g. `LISTAGG(...) WITHIN GROUP (ORDER BY ...)`.
pub fn parse_listagg_expr(&mut self) -> Result<Expr, ParserError> {
self.expect_token(&Token::LParen)?;
let distinct = self.parse_all_or_distinct()?;
let expr = Box::new(self.parse_expr()?);
// While ANSI SQL would would require the separator, Redshift makes this optional. Here we
// choose to make the separator optional as this provides the more general implementation.
let separator = if self.consume_token(&Token::Comma) {
Some(Box::new(self.parse_expr()?))
} else {
None
};
let on_overflow = if self.parse_keywords(vec!["ON", "OVERFLOW"]) {
if self.parse_keyword("ERROR") {
Some(ListAggOnOverflow::Error)
} else {
self.expect_keyword("TRUNCATE")?;
let filler = match self.peek_token() {
Some(Token::Word(kw)) if kw.keyword == "WITH" || kw.keyword == "WITHOUT" => {
None
}
Some(Token::SingleQuotedString(_))
| Some(Token::NationalStringLiteral(_))
| Some(Token::HexStringLiteral(_)) => Some(Box::new(self.parse_expr()?)),
Comment on lines +441 to +443
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This allows for any supported string literal to be parsed as a filler and uses parse_expr to handle this ergonomically. (There may be a more precise way of handling this, but I wasn't immediately able to find it.)

Also it might be a good idea to be stricter with the separator parsing, but I'll defer to your feedback.

_ => self.expected(
"either filler, WITH, or WITHOUT in LISTAGG",
self.peek_token(),
)?,
};
let with_count = self.parse_keyword("WITH");
if !with_count && !self.parse_keyword("WITHOUT") {
self.expected("either WITH or WITHOUT in LISTAGG", self.peek_token())?;
}
self.expect_keyword("COUNT")?;
Some(ListAggOnOverflow::Truncate { filler, with_count })
}
} else {
None
};
self.expect_token(&Token::RParen)?;
// Once again ANSI SQL requires WITHIN GROUP, but Redshift does not. Again we choose the
// more general implementation.
let within_group = if self.parse_keywords(vec!["WITHIN", "GROUP"]) {
self.expect_token(&Token::LParen)?;
self.expect_keywords(&["ORDER", "BY"])?;
let order_by_expr = self.parse_comma_separated(Parser::parse_order_by_expr)?;
self.expect_token(&Token::RParen)?;
order_by_expr
} else {
vec![]
};
Ok(Expr::ListAgg(ListAgg {
distinct,
expr,
separator,
on_overflow,
within_group,
}))
}

// This function parses date/time fields for both the EXTRACT function-like
// operator and interval qualifiers. EXTRACT supports a wider set of
// date/time fields than interval qualifiers, so this function may need to
Expand Down Expand Up @@ -851,6 +905,18 @@ impl Parser {
Ok(values)
}

/// Parse either `ALL` or `DISTINCT`. Returns `true` if `DISTINCT` is parsed and results in a
/// `ParserError` if both `ALL` and `DISTINCT` are fround.
pub fn parse_all_or_distinct(&mut self) -> Result<bool, ParserError> {
let all = self.parse_keyword("ALL");
let distinct = self.parse_keyword("DISTINCT");
if all && distinct {
return parser_err!("Cannot specify both ALL and DISTINCT".to_string());
} else {
Ok(distinct)
}
}

/// Parse a SQL CREATE statement
pub fn parse_create(&mut self) -> Result<Statement, ParserError> {
if self.parse_keyword("TABLE") {
Expand Down Expand Up @@ -1635,11 +1701,7 @@ impl Parser {
/// Parse a restricted `SELECT` statement (no CTEs / `UNION` / `ORDER BY`),
/// assuming the initial `SELECT` was already consumed
pub fn parse_select(&mut self) -> Result<Select, ParserError> {
let all = self.parse_keyword("ALL");
let distinct = self.parse_keyword("DISTINCT");
if all && distinct {
return parser_err!("Cannot specify both ALL and DISTINCT in SELECT");
}
let distinct = self.parse_all_or_distinct()?;

let top = if self.parse_keyword("TOP") {
Some(self.parse_top()?)
Expand Down
58 changes: 54 additions & 4 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ fn parse_select_all() {
fn parse_select_all_distinct() {
let result = parse_sql_statements("SELECT ALL DISTINCT name FROM customer");
assert_eq!(
ParserError::ParserError("Cannot specify both ALL and DISTINCT in SELECT".to_string()),
ParserError::ParserError("Cannot specify both ALL and DISTINCT".to_string()),
result.unwrap_err(),
);
}
Expand Down Expand Up @@ -357,9 +357,7 @@ fn parse_select_count_distinct() {
let sql = "SELECT COUNT(ALL DISTINCT + x) FROM customer";
let res = parse_sql_statements(sql);
assert_eq!(
ParserError::ParserError(
"Cannot specify both ALL and DISTINCT in function: COUNT".to_string()
),
ParserError::ParserError("Cannot specify both ALL and DISTINCT".to_string()),
res.unwrap_err()
);
}
Expand Down Expand Up @@ -914,6 +912,58 @@ fn parse_extract() {
);
}

#[test]
fn parse_listagg() {
let sql = "SELECT LISTAGG(DISTINCT dateid, ', ' ON OVERFLOW TRUNCATE '%' WITHOUT COUNT) \
WITHIN GROUP (ORDER BY id, username)";
let select = verified_only_select(sql);

verified_stmt("SELECT LISTAGG(sellerid) WITHIN GROUP (ORDER BY dateid)");
verified_stmt("SELECT LISTAGG(dateid)");
verified_stmt("SELECT LISTAGG(DISTINCT dateid)");
verified_stmt("SELECT LISTAGG(dateid ON OVERFLOW ERROR)");
verified_stmt("SELECT LISTAGG(dateid ON OVERFLOW TRUNCATE N'...' WITH COUNT)");
verified_stmt("SELECT LISTAGG(dateid ON OVERFLOW TRUNCATE X'deadbeef' WITH COUNT)");

let expr = Box::new(Expr::Identifier(Ident::new("dateid")));
let on_overflow = Some(ListAggOnOverflow::Truncate {
filler: Some(Box::new(Expr::Value(Value::SingleQuotedString(
"%".to_string(),
)))),
with_count: false,
});
let within_group = vec![
OrderByExpr {
expr: Expr::Identifier(Ident {
value: "id".to_string(),
quote_style: None,
}),
asc: None,
nulls_first: None,
},
OrderByExpr {
expr: Expr::Identifier(Ident {
value: "username".to_string(),
quote_style: None,
}),
asc: None,
nulls_first: None,
},
];
assert_eq!(
&Expr::ListAgg(ListAgg {
distinct: true,
expr,
separator: Some(Box::new(Expr::Value(Value::SingleQuotedString(
", ".to_string()
)))),
on_overflow,
within_group
}),
expr_from_projection(only(&select.projection))
);
}

#[test]
fn parse_create_table() {
let sql = "CREATE TABLE uk_cities (\
Expand Down