Skip to content

add support for custom operators in postgres #1302

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/ast/operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ pub enum BinaryOperator {
DuckIntegerDivide,
/// MySQL [`DIV`](https://dev.mysql.com/doc/refman/8.0/en/arithmetic-functions.html) integer division
MyIntegerDivide,
/// Support for custom operators (built by parsers outside this crate)
/// Support for custom operators (such as Postgres custom operators)
Custom(String),
/// Bitwise XOR, e.g. `a # b` (PostgreSQL-specific)
PGBitwiseXor,
Expand Down
6 changes: 6 additions & 0 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,12 @@ pub trait Dialect: Debug + Any {
fn is_identifier_start(&self, ch: char) -> bool;
/// Determine if a character is a valid unquoted identifier character
fn is_identifier_part(&self, ch: char) -> bool;

/// Most dialects do not have custom operators. Override this method to provide custom operators.
fn is_custom_operator_part(&self, _ch: char) -> bool {
false
}

/// Determine if the dialect supports escaping characters via '\' in string literals.
///
/// Some dialects like BigQuery and Snowflake support this while others like
Expand Down
27 changes: 27 additions & 0 deletions src/dialect/postgresql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ impl Dialect for PostgreSqlDialect {
Some('"')
}

fn is_delimited_identifier_start(&self, ch: char) -> bool {
ch == '"' // Postgres does not support backticks to quote identifiers
}

fn is_identifier_start(&self, ch: char) -> bool {
// See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
// We don't yet support identifiers beginning with "letters with
Expand All @@ -36,6 +40,29 @@ impl Dialect for PostgreSqlDialect {
ch.is_alphabetic() || ch.is_ascii_digit() || ch == '$' || ch == '_'
}

/// See <https://www.postgresql.org/docs/current/sql-createoperator.html>
fn is_custom_operator_part(&self, ch: char) -> bool {
matches!(
ch,
'+' | '-'
| '*'
| '/'
| '<'
| '>'
| '='
| '~'
| '!'
| '@'
| '#'
| '%'
| '^'
| '&'
| '|'
| '`'
| '?'
)
}

fn parse_statement(&self, parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
if parser.parse_keyword(Keyword::COMMENT) {
Some(parse_comment(parser))
Expand Down
9 changes: 5 additions & 4 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2300,9 +2300,8 @@ impl<'a> Parser<'a> {
return infix;
}

let tok = self.next_token();

let regular_binary_operator = match &tok.token {
let mut tok = self.next_token();
let regular_binary_operator = match &mut tok.token {
Token::Spaceship => Some(BinaryOperator::Spaceship),
Token::DoubleEq => Some(BinaryOperator::Eq),
Token::Eq => Some(BinaryOperator::Eq),
Expand Down Expand Up @@ -2366,6 +2365,7 @@ impl<'a> Parser<'a> {
Token::Question => Some(BinaryOperator::Question),
Token::QuestionAnd => Some(BinaryOperator::QuestionAnd),
Token::QuestionPipe => Some(BinaryOperator::QuestionPipe),
Token::CustomBinaryOperator(s) => Some(BinaryOperator::Custom(core::mem::take(s))),

Token::Word(w) => match w.keyword {
Keyword::AND => Some(BinaryOperator::And),
Expand Down Expand Up @@ -2920,7 +2920,8 @@ impl<'a> Parser<'a> {
| Token::AtAt
| Token::Question
| Token::QuestionAnd
| Token::QuestionPipe => Ok(Self::PG_OTHER_PREC),
| Token::QuestionPipe
| Token::CustomBinaryOperator(_) => Ok(Self::PG_OTHER_PREC),
_ => Ok(0),
}
}
Expand Down
105 changes: 73 additions & 32 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,10 @@ pub enum Token {
/// jsonb ?| text[] -> boolean: Check whether any member of the text array exists as top-level
/// keys within the jsonb object
QuestionPipe,
/// Custom binary operator
/// This is used to represent any custom binary operator that is not part of the SQL standard.
/// PostgreSQL allows defining custom binary operators using CREATE OPERATOR.
CustomBinaryOperator(String),
}

impl fmt::Display for Token {
Expand Down Expand Up @@ -320,6 +324,7 @@ impl fmt::Display for Token {
Token::Question => write!(f, "?"),
Token::QuestionAnd => write!(f, "?&"),
Token::QuestionPipe => write!(f, "?|"),
Token::CustomBinaryOperator(s) => f.write_str(s),
}
}
}
Expand Down Expand Up @@ -961,15 +966,12 @@ impl<'a> Tokenizer<'a> {
Some('>') => {
chars.next();
match chars.peek() {
Some('>') => {
chars.next();
Ok(Some(Token::LongArrow))
}
_ => Ok(Some(Token::Arrow)),
Some('>') => self.consume_for_binop(chars, "->>", Token::LongArrow),
_ => self.start_binop(chars, "->", Token::Arrow),
}
}
// a regular '-' operator
_ => Ok(Some(Token::Minus)),
_ => self.start_binop(chars, "-", Token::Minus),
}
}
'/' => {
Expand Down Expand Up @@ -999,26 +1001,28 @@ impl<'a> Tokenizer<'a> {
'%' => {
chars.next(); // advance past '%'
match chars.peek() {
Some(' ') => Ok(Some(Token::Mod)),
Some(s) if s.is_whitespace() => Ok(Some(Token::Mod)),
Some(sch) if self.dialect.is_identifier_start('%') => {
self.tokenize_identifier_or_keyword([ch, *sch], chars)
}
_ => Ok(Some(Token::Mod)),
_ => self.start_binop(chars, "%", Token::Mod),
}
}
'|' => {
chars.next(); // consume the '|'
match chars.peek() {
Some('/') => self.consume_and_return(chars, Token::PGSquareRoot),
Some('/') => self.consume_for_binop(chars, "|/", Token::PGSquareRoot),
Some('|') => {
chars.next(); // consume the second '|'
match chars.peek() {
Some('/') => self.consume_and_return(chars, Token::PGCubeRoot),
_ => Ok(Some(Token::StringConcat)),
Some('/') => {
self.consume_for_binop(chars, "||/", Token::PGCubeRoot)
}
_ => self.start_binop(chars, "||", Token::StringConcat),
}
}
// Bitshift '|' operator
_ => Ok(Some(Token::Pipe)),
_ => self.start_binop(chars, "|", Token::Pipe),
}
}
'=' => {
Expand Down Expand Up @@ -1061,22 +1065,22 @@ impl<'a> Tokenizer<'a> {
Some('=') => {
chars.next();
match chars.peek() {
Some('>') => self.consume_and_return(chars, Token::Spaceship),
_ => Ok(Some(Token::LtEq)),
Some('>') => self.consume_for_binop(chars, "<=>", Token::Spaceship),
_ => self.start_binop(chars, "<=", Token::LtEq),
}
}
Some('>') => self.consume_and_return(chars, Token::Neq),
Some('<') => self.consume_and_return(chars, Token::ShiftLeft),
Some('@') => self.consume_and_return(chars, Token::ArrowAt),
_ => Ok(Some(Token::Lt)),
Some('>') => self.consume_for_binop(chars, "<>", Token::Neq),
Some('<') => self.consume_for_binop(chars, "<<", Token::ShiftLeft),
Some('@') => self.consume_for_binop(chars, "<@", Token::ArrowAt),
_ => self.start_binop(chars, "<", Token::Lt),
}
}
'>' => {
chars.next(); // consume
match chars.peek() {
Some('=') => self.consume_and_return(chars, Token::GtEq),
Some('>') => self.consume_and_return(chars, Token::ShiftRight),
_ => Ok(Some(Token::Gt)),
Some('=') => self.consume_for_binop(chars, ">=", Token::GtEq),
Some('>') => self.consume_for_binop(chars, ">>", Token::ShiftRight),
_ => self.start_binop(chars, ">", Token::Gt),
}
}
':' => {
Expand All @@ -1094,9 +1098,12 @@ impl<'a> Tokenizer<'a> {
'&' => {
chars.next(); // consume the '&'
match chars.peek() {
Some('&') => self.consume_and_return(chars, Token::Overlap),
Some('&') => {
chars.next(); // consume the second '&'
self.start_binop(chars, "&&", Token::Overlap)
}
// Bitshift '&' operator
_ => Ok(Some(Token::Ampersand)),
_ => self.start_binop(chars, "&", Token::Ampersand),
}
}
'^' => {
Expand All @@ -1119,38 +1126,37 @@ impl<'a> Tokenizer<'a> {
'~' => {
chars.next(); // consume
match chars.peek() {
Some('*') => self.consume_and_return(chars, Token::TildeAsterisk),
Some('*') => self.consume_for_binop(chars, "~*", Token::TildeAsterisk),
Some('~') => {
chars.next();
match chars.peek() {
Some('*') => {
self.consume_and_return(chars, Token::DoubleTildeAsterisk)
self.consume_for_binop(chars, "~~*", Token::DoubleTildeAsterisk)
}
_ => Ok(Some(Token::DoubleTilde)),
_ => self.start_binop(chars, "~~", Token::DoubleTilde),
}
}
_ => Ok(Some(Token::Tilde)),
_ => self.start_binop(chars, "~", Token::Tilde),
}
}
'#' => {
chars.next();
match chars.peek() {
Some('-') => self.consume_and_return(chars, Token::HashMinus),
Some('-') => self.consume_for_binop(chars, "#-", Token::HashMinus),
Some('>') => {
chars.next();
match chars.peek() {
Some('>') => {
chars.next();
Ok(Some(Token::HashLongArrow))
self.consume_for_binop(chars, "#>>", Token::HashLongArrow)
}
_ => Ok(Some(Token::HashArrow)),
_ => self.start_binop(chars, "#>", Token::HashArrow),
}
}
Some(' ') => Ok(Some(Token::Sharp)),
Some(sch) if self.dialect.is_identifier_start('#') => {
self.tokenize_identifier_or_keyword([ch, *sch], chars)
}
_ => Ok(Some(Token::Sharp)),
_ => self.start_binop(chars, "#", Token::Sharp),
}
}
'@' => {
Expand Down Expand Up @@ -1206,6 +1212,41 @@ impl<'a> Tokenizer<'a> {
}
}

/// Consume the next character, then parse a custom binary operator. The next character should be included in the prefix
fn consume_for_binop(
&self,
chars: &mut State,
prefix: &str,
default: Token,
) -> Result<Option<Token>, TokenizerError> {
chars.next(); // consume the first char
self.start_binop(chars, prefix, default)
}

/// parse a custom binary operator
fn start_binop(
&self,
chars: &mut State,
prefix: &str,
default: Token,
) -> Result<Option<Token>, TokenizerError> {
let mut s = prefix.to_string();
let mut is_custom_operator = false;
while let Some(&ch) = chars.peek() {
if !self.dialect.is_custom_operator_part(ch) {
break;
}
s.push(ch);
is_custom_operator = true;
chars.next();
}
if is_custom_operator {
Ok(Some(Token::CustomBinaryOperator(s)))
} else {
Ok(Some(default))
}
}

/// Tokenize dollar preceded value (i.e: a string/placeholder)
fn tokenize_dollar_preceded_value(&self, chars: &mut State) -> Result<Token, TokenizerError> {
let mut s = String::new();
Expand Down
6 changes: 6 additions & 0 deletions tests/sqlparser_mssql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,12 @@ fn parse_for_json_expect_ast() {
);
}

#[test]
fn parse_ampersand_arobase() {
// In SQL Server, a&@b means (a) & (@b), in PostgreSQL it means (a) &@ (b)
ms().expr_parses_to("a&@b", "a & @b");
}

#[test]
fn parse_cast_varchar_max() {
ms_and_generic().verified_expr("CAST('foo' AS VARCHAR(MAX))");
Expand Down
Loading
Loading