Skip to content

Commit 4b60866

Browse files
lovasoajmhain
andauthored
add support for custom operators in postgres (#1302)
Co-authored-by: Joey Hain <[email protected]>
1 parent 2fb919d commit 4b60866

File tree

7 files changed

+203
-46
lines changed

7 files changed

+203
-46
lines changed

src/ast/operator.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ pub enum BinaryOperator {
111111
DuckIntegerDivide,
112112
/// MySQL [`DIV`](https://dev.mysql.com/doc/refman/8.0/en/arithmetic-functions.html) integer division
113113
MyIntegerDivide,
114-
/// Support for custom operators (built by parsers outside this crate)
114+
/// Support for custom operators (such as Postgres custom operators)
115115
Custom(String),
116116
/// Bitwise XOR, e.g. `a # b` (PostgreSQL-specific)
117117
PGBitwiseXor,

src/dialect/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,12 @@ pub trait Dialect: Debug + Any {
122122
fn is_identifier_start(&self, ch: char) -> bool;
123123
/// Determine if a character is a valid unquoted identifier character
124124
fn is_identifier_part(&self, ch: char) -> bool;
125+
126+
/// Most dialects do not have custom operators. Override this method to provide custom operators.
127+
fn is_custom_operator_part(&self, _ch: char) -> bool {
128+
false
129+
}
130+
125131
/// Determine if the dialect supports escaping characters via '\' in string literals.
126132
///
127133
/// Some dialects like BigQuery and Snowflake support this while others like

src/dialect/postgresql.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ impl Dialect for PostgreSqlDialect {
2525
Some('"')
2626
}
2727

28+
fn is_delimited_identifier_start(&self, ch: char) -> bool {
29+
ch == '"' // Postgres does not support backticks to quote identifiers
30+
}
31+
2832
fn is_identifier_start(&self, ch: char) -> bool {
2933
// See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
3034
// We don't yet support identifiers beginning with "letters with
@@ -36,6 +40,29 @@ impl Dialect for PostgreSqlDialect {
3640
ch.is_alphabetic() || ch.is_ascii_digit() || ch == '$' || ch == '_'
3741
}
3842

43+
/// See <https://www.postgresql.org/docs/current/sql-createoperator.html>
44+
fn is_custom_operator_part(&self, ch: char) -> bool {
45+
matches!(
46+
ch,
47+
'+' | '-'
48+
| '*'
49+
| '/'
50+
| '<'
51+
| '>'
52+
| '='
53+
| '~'
54+
| '!'
55+
| '@'
56+
| '#'
57+
| '%'
58+
| '^'
59+
| '&'
60+
| '|'
61+
| '`'
62+
| '?'
63+
)
64+
}
65+
3966
fn parse_statement(&self, parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
4067
if parser.parse_keyword(Keyword::COMMENT) {
4168
Some(parse_comment(parser))

src/parser/mod.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2344,9 +2344,8 @@ impl<'a> Parser<'a> {
23442344
return infix;
23452345
}
23462346

2347-
let tok = self.next_token();
2348-
2349-
let regular_binary_operator = match &tok.token {
2347+
let mut tok = self.next_token();
2348+
let regular_binary_operator = match &mut tok.token {
23502349
Token::Spaceship => Some(BinaryOperator::Spaceship),
23512350
Token::DoubleEq => Some(BinaryOperator::Eq),
23522351
Token::Eq => Some(BinaryOperator::Eq),
@@ -2410,6 +2409,7 @@ impl<'a> Parser<'a> {
24102409
Token::Question => Some(BinaryOperator::Question),
24112410
Token::QuestionAnd => Some(BinaryOperator::QuestionAnd),
24122411
Token::QuestionPipe => Some(BinaryOperator::QuestionPipe),
2412+
Token::CustomBinaryOperator(s) => Some(BinaryOperator::Custom(core::mem::take(s))),
24132413

24142414
Token::Word(w) => match w.keyword {
24152415
Keyword::AND => Some(BinaryOperator::And),
@@ -2964,7 +2964,8 @@ impl<'a> Parser<'a> {
29642964
| Token::AtAt
29652965
| Token::Question
29662966
| Token::QuestionAnd
2967-
| Token::QuestionPipe => Ok(Self::PG_OTHER_PREC),
2967+
| Token::QuestionPipe
2968+
| Token::CustomBinaryOperator(_) => Ok(Self::PG_OTHER_PREC),
29682969
_ => Ok(0),
29692970
}
29702971
}

src/tokenizer.rs

Lines changed: 71 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,10 @@ pub enum Token {
231231
/// jsonb ?| text[] -> boolean: Check whether any member of the text array exists as top-level
232232
/// keys within the jsonb object
233233
QuestionPipe,
234+
/// Custom binary operator
235+
/// This is used to represent any custom binary operator that is not part of the SQL standard.
236+
/// PostgreSQL allows defining custom binary operators using CREATE OPERATOR.
237+
CustomBinaryOperator(String),
234238
}
235239

236240
impl fmt::Display for Token {
@@ -320,6 +324,7 @@ impl fmt::Display for Token {
320324
Token::Question => write!(f, "?"),
321325
Token::QuestionAnd => write!(f, "?&"),
322326
Token::QuestionPipe => write!(f, "?|"),
327+
Token::CustomBinaryOperator(s) => f.write_str(s),
323328
}
324329
}
325330
}
@@ -961,15 +966,12 @@ impl<'a> Tokenizer<'a> {
961966
Some('>') => {
962967
chars.next();
963968
match chars.peek() {
964-
Some('>') => {
965-
chars.next();
966-
Ok(Some(Token::LongArrow))
967-
}
968-
_ => Ok(Some(Token::Arrow)),
969+
Some('>') => self.consume_for_binop(chars, "->>", Token::LongArrow),
970+
_ => self.start_binop(chars, "->", Token::Arrow),
969971
}
970972
}
971973
// a regular '-' operator
972-
_ => Ok(Some(Token::Minus)),
974+
_ => self.start_binop(chars, "-", Token::Minus),
973975
}
974976
}
975977
'/' => {
@@ -999,26 +1001,28 @@ impl<'a> Tokenizer<'a> {
9991001
'%' => {
10001002
chars.next(); // advance past '%'
10011003
match chars.peek() {
1002-
Some(' ') => Ok(Some(Token::Mod)),
1004+
Some(s) if s.is_whitespace() => Ok(Some(Token::Mod)),
10031005
Some(sch) if self.dialect.is_identifier_start('%') => {
10041006
self.tokenize_identifier_or_keyword([ch, *sch], chars)
10051007
}
1006-
_ => Ok(Some(Token::Mod)),
1008+
_ => self.start_binop(chars, "%", Token::Mod),
10071009
}
10081010
}
10091011
'|' => {
10101012
chars.next(); // consume the '|'
10111013
match chars.peek() {
1012-
Some('/') => self.consume_and_return(chars, Token::PGSquareRoot),
1014+
Some('/') => self.consume_for_binop(chars, "|/", Token::PGSquareRoot),
10131015
Some('|') => {
10141016
chars.next(); // consume the second '|'
10151017
match chars.peek() {
1016-
Some('/') => self.consume_and_return(chars, Token::PGCubeRoot),
1017-
_ => Ok(Some(Token::StringConcat)),
1018+
Some('/') => {
1019+
self.consume_for_binop(chars, "||/", Token::PGCubeRoot)
1020+
}
1021+
_ => self.start_binop(chars, "||", Token::StringConcat),
10181022
}
10191023
}
10201024
// Bitshift '|' operator
1021-
_ => Ok(Some(Token::Pipe)),
1025+
_ => self.start_binop(chars, "|", Token::Pipe),
10221026
}
10231027
}
10241028
'=' => {
@@ -1061,22 +1065,22 @@ impl<'a> Tokenizer<'a> {
10611065
Some('=') => {
10621066
chars.next();
10631067
match chars.peek() {
1064-
Some('>') => self.consume_and_return(chars, Token::Spaceship),
1065-
_ => Ok(Some(Token::LtEq)),
1068+
Some('>') => self.consume_for_binop(chars, "<=>", Token::Spaceship),
1069+
_ => self.start_binop(chars, "<=", Token::LtEq),
10661070
}
10671071
}
1068-
Some('>') => self.consume_and_return(chars, Token::Neq),
1069-
Some('<') => self.consume_and_return(chars, Token::ShiftLeft),
1070-
Some('@') => self.consume_and_return(chars, Token::ArrowAt),
1071-
_ => Ok(Some(Token::Lt)),
1072+
Some('>') => self.consume_for_binop(chars, "<>", Token::Neq),
1073+
Some('<') => self.consume_for_binop(chars, "<<", Token::ShiftLeft),
1074+
Some('@') => self.consume_for_binop(chars, "<@", Token::ArrowAt),
1075+
_ => self.start_binop(chars, "<", Token::Lt),
10721076
}
10731077
}
10741078
'>' => {
10751079
chars.next(); // consume
10761080
match chars.peek() {
1077-
Some('=') => self.consume_and_return(chars, Token::GtEq),
1078-
Some('>') => self.consume_and_return(chars, Token::ShiftRight),
1079-
_ => Ok(Some(Token::Gt)),
1081+
Some('=') => self.consume_for_binop(chars, ">=", Token::GtEq),
1082+
Some('>') => self.consume_for_binop(chars, ">>", Token::ShiftRight),
1083+
_ => self.start_binop(chars, ">", Token::Gt),
10801084
}
10811085
}
10821086
':' => {
@@ -1094,9 +1098,12 @@ impl<'a> Tokenizer<'a> {
10941098
'&' => {
10951099
chars.next(); // consume the '&'
10961100
match chars.peek() {
1097-
Some('&') => self.consume_and_return(chars, Token::Overlap),
1101+
Some('&') => {
1102+
chars.next(); // consume the second '&'
1103+
self.start_binop(chars, "&&", Token::Overlap)
1104+
}
10981105
// Bitshift '&' operator
1099-
_ => Ok(Some(Token::Ampersand)),
1106+
_ => self.start_binop(chars, "&", Token::Ampersand),
11001107
}
11011108
}
11021109
'^' => {
@@ -1119,38 +1126,37 @@ impl<'a> Tokenizer<'a> {
11191126
'~' => {
11201127
chars.next(); // consume
11211128
match chars.peek() {
1122-
Some('*') => self.consume_and_return(chars, Token::TildeAsterisk),
1129+
Some('*') => self.consume_for_binop(chars, "~*", Token::TildeAsterisk),
11231130
Some('~') => {
11241131
chars.next();
11251132
match chars.peek() {
11261133
Some('*') => {
1127-
self.consume_and_return(chars, Token::DoubleTildeAsterisk)
1134+
self.consume_for_binop(chars, "~~*", Token::DoubleTildeAsterisk)
11281135
}
1129-
_ => Ok(Some(Token::DoubleTilde)),
1136+
_ => self.start_binop(chars, "~~", Token::DoubleTilde),
11301137
}
11311138
}
1132-
_ => Ok(Some(Token::Tilde)),
1139+
_ => self.start_binop(chars, "~", Token::Tilde),
11331140
}
11341141
}
11351142
'#' => {
11361143
chars.next();
11371144
match chars.peek() {
1138-
Some('-') => self.consume_and_return(chars, Token::HashMinus),
1145+
Some('-') => self.consume_for_binop(chars, "#-", Token::HashMinus),
11391146
Some('>') => {
11401147
chars.next();
11411148
match chars.peek() {
11421149
Some('>') => {
1143-
chars.next();
1144-
Ok(Some(Token::HashLongArrow))
1150+
self.consume_for_binop(chars, "#>>", Token::HashLongArrow)
11451151
}
1146-
_ => Ok(Some(Token::HashArrow)),
1152+
_ => self.start_binop(chars, "#>", Token::HashArrow),
11471153
}
11481154
}
11491155
Some(' ') => Ok(Some(Token::Sharp)),
11501156
Some(sch) if self.dialect.is_identifier_start('#') => {
11511157
self.tokenize_identifier_or_keyword([ch, *sch], chars)
11521158
}
1153-
_ => Ok(Some(Token::Sharp)),
1159+
_ => self.start_binop(chars, "#", Token::Sharp),
11541160
}
11551161
}
11561162
'@' => {
@@ -1206,6 +1212,39 @@ impl<'a> Tokenizer<'a> {
12061212
}
12071213
}
12081214

1215+
/// Consume the next character, then parse a custom binary operator. The next character should be included in the prefix
1216+
fn consume_for_binop(
1217+
&self,
1218+
chars: &mut State,
1219+
prefix: &str,
1220+
default: Token,
1221+
) -> Result<Option<Token>, TokenizerError> {
1222+
chars.next(); // consume the first char
1223+
self.start_binop(chars, prefix, default)
1224+
}
1225+
1226+
/// parse a custom binary operator
1227+
fn start_binop(
1228+
&self,
1229+
chars: &mut State,
1230+
prefix: &str,
1231+
default: Token,
1232+
) -> Result<Option<Token>, TokenizerError> {
1233+
let mut custom = None;
1234+
while let Some(&ch) = chars.peek() {
1235+
if !self.dialect.is_custom_operator_part(ch) {
1236+
break;
1237+
}
1238+
1239+
custom.get_or_insert_with(|| prefix.to_string()).push(ch);
1240+
chars.next();
1241+
}
1242+
1243+
Ok(Some(
1244+
custom.map(Token::CustomBinaryOperator).unwrap_or(default),
1245+
))
1246+
}
1247+
12091248
/// Tokenize dollar preceded value (i.e: a string/placeholder)
12101249
fn tokenize_dollar_preceded_value(&self, chars: &mut State) -> Result<Token, TokenizerError> {
12111250
let mut s = String::new();

tests/sqlparser_mssql.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,12 @@ fn parse_for_json_expect_ast() {
437437
);
438438
}
439439

440+
#[test]
441+
fn parse_ampersand_arobase() {
442+
// In SQL Server, a&@b means (a) & (@b), in PostgreSQL it means (a) &@ (b)
443+
ms().expr_parses_to("a&@b", "a & @b");
444+
}
445+
440446
#[test]
441447
fn parse_cast_varchar_max() {
442448
ms_and_generic().verified_expr("CAST('foo' AS VARCHAR(MAX))");

0 commit comments

Comments
 (0)