From 1d872fd25bb1880a6c1a95d2aa13482e74b87552 Mon Sep 17 00:00:00 2001
From: lovasoa <pere.jobs@gmail.com>
Date: Thu, 6 Jun 2024 22:45:37 +0200
Subject: [PATCH 1/8] add support for custom operators in postgres

Fixes https://github.com/sqlparser-rs/sqlparser-rs/issues/1298
Fixes https://github.com/lovasoa/SQLpage/issues/372
Closes https://github.com/sqlparser-rs/sqlparser-rs/pull/1299
---
 src/ast/operator.rs         |   2 +-
 src/dialect/mod.rs          |   6 ++
 src/dialect/postgresql.rs   |  27 +++++++++
 src/parser/mod.rs           |   9 +--
 src/tokenizer.rs            | 108 ++++++++++++++++++++++++++++--------
 tests/sqlparser_mssql.rs    |   6 ++
 tests/sqlparser_postgres.rs |  73 +++++++++++++++++++++---
 7 files changed, 195 insertions(+), 36 deletions(-)

diff --git a/src/ast/operator.rs b/src/ast/operator.rs
index 3c4f192e3..e70df344a 100644
--- a/src/ast/operator.rs
+++ b/src/ast/operator.rs
@@ -111,7 +111,7 @@ pub enum BinaryOperator {
     DuckIntegerDivide,
     /// MySQL [`DIV`](https://dev.mysql.com/doc/refman/8.0/en/arithmetic-functions.html) integer division
     MyIntegerDivide,
-    /// Support for custom operators (built by parsers outside this crate)
+    /// Support for custom operators (such as Postgres custom operators)
     Custom(String),
     /// Bitwise XOR, e.g. `a # b` (PostgreSQL-specific)
     PGBitwiseXor,
diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs
index da5c8c5ac..c79257456 100644
--- a/src/dialect/mod.rs
+++ b/src/dialect/mod.rs
@@ -122,6 +122,12 @@ pub trait Dialect: Debug + Any {
     fn is_identifier_start(&self, ch: char) -> bool;
     /// Determine if a character is a valid unquoted identifier character
     fn is_identifier_part(&self, ch: char) -> bool;
+
+    /// Most dialects do not have custom operators. Override this method to provide custom operators.
+    fn is_custom_operator_part(&self, _ch: char) -> bool {
+        false
+    }
+
     /// Determine if the dialect supports escaping characters via '\' in string literals.
     ///
     /// Some dialects like BigQuery and Snowflake support this while others like
diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs
index f179111e0..8ca64bb48 100644
--- a/src/dialect/postgresql.rs
+++ b/src/dialect/postgresql.rs
@@ -25,6 +25,10 @@ impl Dialect for PostgreSqlDialect {
         Some('"')
     }
 
+    fn is_delimited_identifier_start(&self, ch: char) -> bool {
+        ch == '"' // Postgres does not support backticks to quote identifiers
+    }
+
     fn is_identifier_start(&self, ch: char) -> bool {
         // See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
         // We don't yet support identifiers beginning with "letters with
@@ -36,6 +40,29 @@ impl Dialect for PostgreSqlDialect {
         ch.is_alphabetic() || ch.is_ascii_digit() || ch == '$' || ch == '_'
     }
 
+    /// See https://www.postgresql.org/docs/current/sql-createoperator.html
+    fn is_custom_operator_part(&self, ch: char) -> bool {
+        matches!(
+            ch,
+            '+' | '-'
+                | '*'
+                | '/'
+                | '<'
+                | '>'
+                | '='
+                | '~'
+                | '!'
+                | '@'
+                | '#'
+                | '%'
+                | '^'
+                | '&'
+                | '|'
+                | '`'
+                | '?'
+        )
+    }
+
     fn parse_statement(&self, parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
         if parser.parse_keyword(Keyword::COMMENT) {
             Some(parse_comment(parser))
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index c6750644c..5cb12a354 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -2300,9 +2300,8 @@ impl<'a> Parser<'a> {
             return infix;
         }
 
-        let tok = self.next_token();
-
-        let regular_binary_operator = match &tok.token {
+        let mut tok = self.next_token();
+        let regular_binary_operator = match &mut tok.token {
             Token::Spaceship => Some(BinaryOperator::Spaceship),
             Token::DoubleEq => Some(BinaryOperator::Eq),
             Token::Eq => Some(BinaryOperator::Eq),
@@ -2366,6 +2365,7 @@ impl<'a> Parser<'a> {
             Token::Question => Some(BinaryOperator::Question),
             Token::QuestionAnd => Some(BinaryOperator::QuestionAnd),
             Token::QuestionPipe => Some(BinaryOperator::QuestionPipe),
+            Token::CustomBinaryOperator(s) => Some(BinaryOperator::Custom(std::mem::take(s))),
 
             Token::Word(w) => match w.keyword {
                 Keyword::AND => Some(BinaryOperator::And),
@@ -2920,7 +2920,8 @@ impl<'a> Parser<'a> {
             | Token::AtAt
             | Token::Question
             | Token::QuestionAnd
-            | Token::QuestionPipe => Ok(Self::PG_OTHER_PREC),
+            | Token::QuestionPipe
+            | Token::CustomBinaryOperator(_) => Ok(Self::PG_OTHER_PREC),
             _ => Ok(0),
         }
     }
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index b6fed354d..f5aca8c35 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -231,6 +231,10 @@ pub enum Token {
     /// jsonb ?| text[] -> boolean: Check whether any member of the text array exists as top-level
     /// keys within the jsonb object
     QuestionPipe,
+    /// Custom binary operator
+    /// This is used to represent any custom binary operator that is not part of the SQL standard.
+    /// PostgreSQL allows defining custom binary operators using CREATE OPERATOR.
+    CustomBinaryOperator(String),
 }
 
 impl fmt::Display for Token {
@@ -320,6 +324,7 @@ impl fmt::Display for Token {
             Token::Question => write!(f, "?"),
             Token::QuestionAnd => write!(f, "?&"),
             Token::QuestionPipe => write!(f, "?|"),
+            Token::CustomBinaryOperator(s) => f.write_str(s),
         }
     }
 }
@@ -999,26 +1004,32 @@ impl<'a> Tokenizer<'a> {
                 '%' => {
                     chars.next(); // advance past '%'
                     match chars.peek() {
-                        Some(' ') => Ok(Some(Token::Mod)),
+                        Some(s) if s.is_whitespace() => Ok(Some(Token::Mod)),
                         Some(sch) if self.dialect.is_identifier_start('%') => {
                             self.tokenize_identifier_or_keyword([ch, *sch], chars)
                         }
-                        _ => Ok(Some(Token::Mod)),
+                        _ => self.parse_custom_operator_or(chars, "%", Token::Mod),
                     }
                 }
                 '|' => {
                     chars.next(); // consume the '|'
                     match chars.peek() {
-                        Some('/') => self.consume_and_return(chars, Token::PGSquareRoot),
+                        Some('/') => {
+                            chars.next(); // consume the '/'
+                            self.parse_custom_operator_or(chars, "|/", Token::PGSquareRoot)
+                        }
                         Some('|') => {
                             chars.next(); // consume the second '|'
                             match chars.peek() {
-                                Some('/') => self.consume_and_return(chars, Token::PGCubeRoot),
-                                _ => Ok(Some(Token::StringConcat)),
+                                Some('/') => {
+                                    chars.next();
+                                    self.parse_custom_operator_or(chars, "|", Token::PGCubeRoot)
+                                }
+                                _ => self.parse_custom_operator_or(chars, "|", Token::StringConcat),
                             }
                         }
                         // Bitshift '|' operator
-                        _ => Ok(Some(Token::Pipe)),
+                        _ => self.parse_custom_operator_or(chars, "|", Token::Pipe),
                     }
                 }
                 '=' => {
@@ -1061,14 +1072,26 @@ impl<'a> Tokenizer<'a> {
                         Some('=') => {
                             chars.next();
                             match chars.peek() {
-                                Some('>') => self.consume_and_return(chars, Token::Spaceship),
-                                _ => Ok(Some(Token::LtEq)),
+                                Some('>') => {
+                                    chars.next();
+                                    self.parse_custom_operator_or(chars, "<=>", Token::Spaceship)
+                                }
+                                _ => self.parse_custom_operator_or(chars, "<=", Token::LtEq),
                             }
                         }
-                        Some('>') => self.consume_and_return(chars, Token::Neq),
-                        Some('<') => self.consume_and_return(chars, Token::ShiftLeft),
-                        Some('@') => self.consume_and_return(chars, Token::ArrowAt),
-                        _ => Ok(Some(Token::Lt)),
+                        Some('>') => {
+                            chars.next();
+                            self.parse_custom_operator_or(chars, "<>", Token::Neq)
+                        }
+                        Some('<') => {
+                            chars.next();
+                            self.parse_custom_operator_or(chars, "<<", Token::ShiftLeft)
+                        }
+                        Some('@') => {
+                            chars.next();
+                            self.parse_custom_operator_or(chars, "<@", Token::ArrowAt)
+                        }
+                        _ => self.parse_custom_operator_or(chars, "<", Token::Lt),
                     }
                 }
                 '>' => {
@@ -1094,9 +1117,12 @@ impl<'a> Tokenizer<'a> {
                 '&' => {
                     chars.next(); // consume the '&'
                     match chars.peek() {
-                        Some('&') => self.consume_and_return(chars, Token::Overlap),
+                        Some('&') => {
+                            chars.next(); // consume the second '&'
+                            self.parse_custom_operator_or(chars, "&&", Token::Overlap)
+                        }
                         // Bitshift '&' operator
-                        _ => Ok(Some(Token::Ampersand)),
+                        _ => self.parse_custom_operator_or(chars, "&", Token::Ampersand),
                     }
                 }
                 '^' => {
@@ -1119,38 +1145,53 @@ impl<'a> Tokenizer<'a> {
                 '~' => {
                     chars.next(); // consume
                     match chars.peek() {
-                        Some('*') => self.consume_and_return(chars, Token::TildeAsterisk),
+                        Some('*') => {
+                            chars.next();
+                            self.parse_custom_operator_or(chars, "~*", Token::TildeAsterisk)
+                        }
                         Some('~') => {
                             chars.next();
                             match chars.peek() {
                                 Some('*') => {
-                                    self.consume_and_return(chars, Token::DoubleTildeAsterisk)
+                                    chars.next();
+                                    self.parse_custom_operator_or(
+                                        chars,
+                                        "~~*",
+                                        Token::DoubleTildeAsterisk,
+                                    )
                                 }
-                                _ => Ok(Some(Token::DoubleTilde)),
+                                _ => self.parse_custom_operator_or(chars, "~~", Token::DoubleTilde),
                             }
                         }
-                        _ => Ok(Some(Token::Tilde)),
+                        _ => self.parse_custom_operator_or(chars, "~", Token::Tilde),
                     }
                 }
                 '#' => {
                     chars.next();
                     match chars.peek() {
-                        Some('-') => self.consume_and_return(chars, Token::HashMinus),
+                        Some('-') => {
+                            chars.next();
+                            self.parse_custom_operator_or(chars, "#-", Token::HashMinus)
+                        }
                         Some('>') => {
                             chars.next();
                             match chars.peek() {
                                 Some('>') => {
                                     chars.next();
-                                    Ok(Some(Token::HashLongArrow))
+                                    self.parse_custom_operator_or(
+                                        chars,
+                                        "#>>",
+                                        Token::HashLongArrow,
+                                    )
                                 }
-                                _ => Ok(Some(Token::HashArrow)),
+                                _ => self.parse_custom_operator_or(chars, "#>", Token::HashArrow),
                             }
                         }
                         Some(' ') => Ok(Some(Token::Sharp)),
                         Some(sch) if self.dialect.is_identifier_start('#') => {
                             self.tokenize_identifier_or_keyword([ch, *sch], chars)
                         }
-                        _ => Ok(Some(Token::Sharp)),
+                        _ => self.parse_custom_operator_or(chars, "#", Token::Sharp),
                     }
                 }
                 '@' => {
@@ -1206,6 +1247,29 @@ impl<'a> Tokenizer<'a> {
         }
     }
 
+    fn parse_custom_operator_or(
+        &self,
+        chars: &mut State,
+        operator_start: &str,
+        default: Token,
+    ) -> Result<Option<Token>, TokenizerError> {
+        let mut s = operator_start.to_string();
+        let mut is_custom_operator = false;
+        while let Some(&ch) = chars.peek() {
+            if !self.dialect.is_custom_operator_part(ch) {
+                break;
+            }
+            s.push(ch);
+            is_custom_operator = true;
+            chars.next();
+        }
+        if is_custom_operator {
+            Ok(Some(Token::CustomBinaryOperator(s)))
+        } else {
+            Ok(Some(default))
+        }
+    }
+
     /// Tokenize dollar preceded value (i.e: a string/placeholder)
     fn tokenize_dollar_preceded_value(&self, chars: &mut State) -> Result<Token, TokenizerError> {
         let mut s = String::new();
diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs
index 5d61c6ab9..86d3990f6 100644
--- a/tests/sqlparser_mssql.rs
+++ b/tests/sqlparser_mssql.rs
@@ -437,6 +437,12 @@ fn parse_for_json_expect_ast() {
     );
 }
 
+#[test]
+fn parse_ampersand_arobase() {
+    // In SQL Server, a&@b means (a) & (@b), in PostgreSQL it means (a) &@ (b)
+    ms().expr_parses_to("a&@b", "a & @b");
+}
+
 #[test]
 fn parse_cast_varchar_max() {
     ms_and_generic().verified_expr("CAST('foo' AS VARCHAR(MAX))");
diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs
index 677246a51..aee8bdef0 100644
--- a/tests/sqlparser_postgres.rs
+++ b/tests/sqlparser_postgres.rs
@@ -1757,6 +1757,29 @@ fn parse_pg_returning() {
     };
 }
 
+fn test_operator(operator: &str, dialect: &TestedDialects, expected: BinaryOperator) {
+    let operator_tokens =
+        sqlparser::tokenizer::Tokenizer::new(&PostgreSqlDialect {}, &format!("a{operator}b"))
+            .tokenize()
+            .unwrap();
+    assert_eq!(
+        operator_tokens.len(),
+        3,
+        "binary op should be 3 tokens, not {operator_tokens:?}"
+    );
+    let expected_expr = Expr::BinaryOp {
+        left: Box::new(Expr::Identifier(Ident::new("a"))),
+        op: expected,
+        right: Box::new(Expr::Identifier(Ident::new("b"))),
+    };
+    let str_expr_canonical = format!("a {operator} b");
+    assert_eq!(expected_expr, dialect.verified_expr(&str_expr_canonical));
+    assert_eq!(
+        expected_expr,
+        dialect.expr_parses_to(&format!("a{operator}b"), &str_expr_canonical)
+    );
+}
+
 #[test]
 fn parse_pg_binary_ops() {
     let binary_ops = &[
@@ -1770,18 +1793,50 @@ fn parse_pg_binary_ops() {
     ];
 
     for (str_op, op, dialects) in binary_ops {
-        let select = dialects.verified_only_select(&format!("SELECT a {} b", &str_op));
-        assert_eq!(
-            SelectItem::UnnamedExpr(Expr::BinaryOp {
-                left: Box::new(Expr::Identifier(Ident::new("a"))),
-                op: op.clone(),
-                right: Box::new(Expr::Identifier(Ident::new("b"))),
-            }),
-            select.projection[0]
-        );
+        test_operator(str_op, dialects, op.clone());
+    }
+}
+
+#[test]
+fn parse_pg_custom_binary_ops() {
+    // Postgres supports declaring custom binary operators, using any character in the following set:
+    //  + - * / < > = ~ ! @ # % ^ & | ` ?
+
+    // Here, we test the ones used by common extensions
+    let operators = [
+        // PostGIS
+        "&&&",   // n-D bounding boxes intersect
+        "&<",    // (is strictly to the left of)
+        "&>",    // (is strictly to the right of)
+        "|=|",   //  distance between A and B trajectories at their closest point of approach
+        "<<#>>", // n-D distance between A and B bounding boxes
+        "|>>",   // A's bounding box is strictly above B's.
+        "~=",    // bounding box is the same
+        // PGroonga
+        "&@",   // Full text search by a keyword
+        "&@~",  // Full text search by easy to use query language
+        "&@*",  // Similar search
+        "&`",   // Advanced search by ECMAScript like query language
+        "&@|",  // Full text search by an array of keywords
+        "&@~|", //  Full text search by an array of queries in easy to use query language
+        // pgtrgm
+        "<<%", // second argument has a continuous extent of an ordered trigram set that matches word boundaries
+        "%>>", // commutator of <<%
+        "<<<->", // distance between arguments
+        // hstore
+        "#=", // Replace fields with matching values from hstore
+    ];
+    for op in &operators {
+        test_operator(op, &pg(), BinaryOperator::Custom(op.to_string()));
     }
 }
 
+#[test]
+fn parse_ampersand_arobase() {
+    // In SQL Server, a&@b means (a) & (@b), in PostgreSQL it means (a) &@ (b)
+    pg().expr_parses_to("a&@b", "a &@ b");
+}
+
 #[test]
 fn parse_pg_unary_ops() {
     let pg_unary_ops = &[

From 28a8c461569f7320523df390c532497fe74f77e9 Mon Sep 17 00:00:00 2001
From: lovasoa <pere.jobs@gmail.com>
Date: Thu, 6 Jun 2024 23:06:40 +0200
Subject: [PATCH 2/8] more concise method names

---
 src/tokenizer.rs | 92 +++++++++++++++++++-----------------------------
 1 file changed, 36 insertions(+), 56 deletions(-)

diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index f5aca8c35..ce1ebd18d 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1008,28 +1008,24 @@ impl<'a> Tokenizer<'a> {
                         Some(sch) if self.dialect.is_identifier_start('%') => {
                             self.tokenize_identifier_or_keyword([ch, *sch], chars)
                         }
-                        _ => self.parse_custom_operator_or(chars, "%", Token::Mod),
+                        _ => self.start_binop(chars, "%", Token::Mod),
                     }
                 }
                 '|' => {
                     chars.next(); // consume the '|'
                     match chars.peek() {
-                        Some('/') => {
-                            chars.next(); // consume the '/'
-                            self.parse_custom_operator_or(chars, "|/", Token::PGSquareRoot)
-                        }
+                        Some('/') => self.consume_for_binop(chars, "|/", Token::PGSquareRoot),
                         Some('|') => {
                             chars.next(); // consume the second '|'
                             match chars.peek() {
                                 Some('/') => {
-                                    chars.next();
-                                    self.parse_custom_operator_or(chars, "|", Token::PGCubeRoot)
+                                    self.consume_for_binop(chars, "||/", Token::PGCubeRoot)
                                 }
-                                _ => self.parse_custom_operator_or(chars, "|", Token::StringConcat),
+                                _ => self.start_binop(chars, "||", Token::StringConcat),
                             }
                         }
                         // Bitshift '|' operator
-                        _ => self.parse_custom_operator_or(chars, "|", Token::Pipe),
+                        _ => self.start_binop(chars, "|", Token::Pipe),
                     }
                 }
                 '=' => {
@@ -1072,26 +1068,14 @@ impl<'a> Tokenizer<'a> {
                         Some('=') => {
                             chars.next();
                             match chars.peek() {
-                                Some('>') => {
-                                    chars.next();
-                                    self.parse_custom_operator_or(chars, "<=>", Token::Spaceship)
-                                }
-                                _ => self.parse_custom_operator_or(chars, "<=", Token::LtEq),
+                                Some('>') => self.consume_for_binop(chars, "<=>", Token::Spaceship),
+                                _ => self.start_binop(chars, "<=", Token::LtEq),
                             }
                         }
-                        Some('>') => {
-                            chars.next();
-                            self.parse_custom_operator_or(chars, "<>", Token::Neq)
-                        }
-                        Some('<') => {
-                            chars.next();
-                            self.parse_custom_operator_or(chars, "<<", Token::ShiftLeft)
-                        }
-                        Some('@') => {
-                            chars.next();
-                            self.parse_custom_operator_or(chars, "<@", Token::ArrowAt)
-                        }
-                        _ => self.parse_custom_operator_or(chars, "<", Token::Lt),
+                        Some('>') => self.consume_for_binop(chars, "<>", Token::Neq),
+                        Some('<') => self.consume_for_binop(chars, "<<", Token::ShiftLeft),
+                        Some('@') => self.consume_for_binop(chars, "<@", Token::ArrowAt),
+                        _ => self.start_binop(chars, "<", Token::Lt),
                     }
                 }
                 '>' => {
@@ -1119,10 +1103,10 @@ impl<'a> Tokenizer<'a> {
                     match chars.peek() {
                         Some('&') => {
                             chars.next(); // consume the second '&'
-                            self.parse_custom_operator_or(chars, "&&", Token::Overlap)
+                            self.start_binop(chars, "&&", Token::Overlap)
                         }
                         // Bitshift '&' operator
-                        _ => self.parse_custom_operator_or(chars, "&", Token::Ampersand),
+                        _ => self.start_binop(chars, "&", Token::Ampersand),
                     }
                 }
                 '^' => {
@@ -1145,53 +1129,37 @@ impl<'a> Tokenizer<'a> {
                 '~' => {
                     chars.next(); // consume
                     match chars.peek() {
-                        Some('*') => {
-                            chars.next();
-                            self.parse_custom_operator_or(chars, "~*", Token::TildeAsterisk)
-                        }
+                        Some('*') => self.consume_for_binop(chars, "~*", Token::TildeAsterisk),
                         Some('~') => {
                             chars.next();
                             match chars.peek() {
                                 Some('*') => {
-                                    chars.next();
-                                    self.parse_custom_operator_or(
-                                        chars,
-                                        "~~*",
-                                        Token::DoubleTildeAsterisk,
-                                    )
+                                    self.consume_for_binop(chars, "~~*", Token::DoubleTildeAsterisk)
                                 }
-                                _ => self.parse_custom_operator_or(chars, "~~", Token::DoubleTilde),
+                                _ => self.start_binop(chars, "~~", Token::DoubleTilde),
                             }
                         }
-                        _ => self.parse_custom_operator_or(chars, "~", Token::Tilde),
+                        _ => self.start_binop(chars, "~", Token::Tilde),
                     }
                 }
                 '#' => {
                     chars.next();
                     match chars.peek() {
-                        Some('-') => {
-                            chars.next();
-                            self.parse_custom_operator_or(chars, "#-", Token::HashMinus)
-                        }
+                        Some('-') => self.consume_for_binop(chars, "#-", Token::HashMinus),
                         Some('>') => {
                             chars.next();
                             match chars.peek() {
                                 Some('>') => {
-                                    chars.next();
-                                    self.parse_custom_operator_or(
-                                        chars,
-                                        "#>>",
-                                        Token::HashLongArrow,
-                                    )
+                                    self.consume_for_binop(chars, "#>>", Token::HashLongArrow)
                                 }
-                                _ => self.parse_custom_operator_or(chars, "#>", Token::HashArrow),
+                                _ => self.start_binop(chars, "#>", Token::HashArrow),
                             }
                         }
                         Some(' ') => Ok(Some(Token::Sharp)),
                         Some(sch) if self.dialect.is_identifier_start('#') => {
                             self.tokenize_identifier_or_keyword([ch, *sch], chars)
                         }
-                        _ => self.parse_custom_operator_or(chars, "#", Token::Sharp),
+                        _ => self.start_binop(chars, "#", Token::Sharp),
                     }
                 }
                 '@' => {
@@ -1247,13 +1215,25 @@ impl<'a> Tokenizer<'a> {
         }
     }
 
-    fn parse_custom_operator_or(
+    /// Consume the next character, then parse a custom binary operator. The next character should be included in the prefix
+    fn consume_for_binop(
+        &self,
+        chars: &mut State,
+        prefix: &str,
+        default: Token,
+    ) -> Result<Option<Token>, TokenizerError> {
+        chars.next(); // consume the first char
+        self.start_binop(chars, prefix, default)
+    }
+
+    /// parse a custom binary operator
+    fn start_binop(
         &self,
         chars: &mut State,
-        operator_start: &str,
+        prefix: &str,
         default: Token,
     ) -> Result<Option<Token>, TokenizerError> {
-        let mut s = operator_start.to_string();
+        let mut s = prefix.to_string();
         let mut is_custom_operator = false;
         while let Some(&ch) = chars.peek() {
             if !self.dialect.is_custom_operator_part(ch) {

From 9c9404f9878594ca8e3666c3c7bfee1989a4420c Mon Sep 17 00:00:00 2001
From: lovasoa <pere.jobs@gmail.com>
Date: Thu, 6 Jun 2024 23:08:20 +0200
Subject: [PATCH 3/8] fix for nostd

---
 src/parser/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 5cb12a354..83e2bed39 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -2365,7 +2365,7 @@ impl<'a> Parser<'a> {
             Token::Question => Some(BinaryOperator::Question),
             Token::QuestionAnd => Some(BinaryOperator::QuestionAnd),
             Token::QuestionPipe => Some(BinaryOperator::QuestionPipe),
-            Token::CustomBinaryOperator(s) => Some(BinaryOperator::Custom(std::mem::take(s))),
+            Token::CustomBinaryOperator(s) => Some(BinaryOperator::Custom(core::mem::take(s))),
 
             Token::Word(w) => match w.keyword {
                 Keyword::AND => Some(BinaryOperator::And),

From 8e590296b5df0eb35f70c442c354ee707c1d751f Mon Sep 17 00:00:00 2001
From: lovasoa <pere.jobs@gmail.com>
Date: Thu, 6 Jun 2024 23:09:50 +0200
Subject: [PATCH 4/8] fix doc

---
 src/dialect/postgresql.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs
index 8ca64bb48..0e04bfa27 100644
--- a/src/dialect/postgresql.rs
+++ b/src/dialect/postgresql.rs
@@ -40,7 +40,7 @@ impl Dialect for PostgreSqlDialect {
         ch.is_alphabetic() || ch.is_ascii_digit() || ch == '$' || ch == '_'
     }
 
-    /// See https://www.postgresql.org/docs/current/sql-createoperator.html
+    /// See <https://www.postgresql.org/docs/current/sql-createoperator.html>
     fn is_custom_operator_part(&self, ch: char) -> bool {
         matches!(
             ch,

From b99ee76e4e4141e841271c82802d49c9ee17e317 Mon Sep 17 00:00:00 2001
From: lovasoa <pere.jobs@gmail.com>
Date: Thu, 6 Jun 2024 23:17:30 +0200
Subject: [PATCH 5/8] add support for custom operators starting with a minus
 sign

---
 src/tokenizer.rs            | 9 +++------
 tests/sqlparser_postgres.rs | 2 ++
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index ce1ebd18d..82fe9ec14 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -966,15 +966,12 @@ impl<'a> Tokenizer<'a> {
                         Some('>') => {
                             chars.next();
                             match chars.peek() {
-                                Some('>') => {
-                                    chars.next();
-                                    Ok(Some(Token::LongArrow))
-                                }
-                                _ => Ok(Some(Token::Arrow)),
+                                Some('>') => self.consume_for_binop(chars, "->>", Token::LongArrow),
+                                _ => self.start_binop(chars, "->", Token::Arrow),
                             }
                         }
                         // a regular '-' operator
-                        _ => Ok(Some(Token::Minus)),
+                        _ => self.start_binop(chars, "-", Token::Minus),
                     }
                 }
                 '/' => {
diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs
index aee8bdef0..76e1831fb 100644
--- a/tests/sqlparser_postgres.rs
+++ b/tests/sqlparser_postgres.rs
@@ -1825,6 +1825,8 @@ fn parse_pg_custom_binary_ops() {
         "<<<->", // distance between arguments
         // hstore
         "#=", // Replace fields with matching values from hstore
+        // ranges
+        "-|-", // Is adjacent to
     ];
     for op in &operators {
         test_operator(op, &pg(), BinaryOperator::Custom(op.to_string()));

From d11a24e41cec0e3b79c5288ef791af4f0f73accc Mon Sep 17 00:00:00 2001
From: lovasoa <pere.jobs@gmail.com>
Date: Thu, 6 Jun 2024 23:35:00 +0200
Subject: [PATCH 6/8] add support for operators starting with '>'

---
 src/tokenizer.rs            |  6 +++---
 tests/sqlparser_postgres.rs | 21 +++++++++++++++++++++
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 82fe9ec14..3dd7f8789 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1078,9 +1078,9 @@ impl<'a> Tokenizer<'a> {
                 '>' => {
                     chars.next(); // consume
                     match chars.peek() {
-                        Some('=') => self.consume_and_return(chars, Token::GtEq),
-                        Some('>') => self.consume_and_return(chars, Token::ShiftRight),
-                        _ => Ok(Some(Token::Gt)),
+                        Some('=') => self.consume_for_binop(chars, ">=", Token::GtEq),
+                        Some('>') => self.consume_for_binop(chars, ">>", Token::ShiftRight),
+                        _ => self.start_binop(chars, ">", Token::Gt),
                     }
                 }
                 ':' => {
diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs
index 76e1831fb..d1218f283 100644
--- a/tests/sqlparser_postgres.rs
+++ b/tests/sqlparser_postgres.rs
@@ -1827,6 +1827,27 @@ fn parse_pg_custom_binary_ops() {
         "#=", // Replace fields with matching values from hstore
         // ranges
         "-|-", // Is adjacent to
+        // pg_similarity
+        "~++", // L1 distance
+        "~##", // Cosine Distance
+        "~-~", // Dice Coefficient
+        "~!!", // Euclidean Distance
+        "~@~", // Hamming Distance
+        "~??", // Jaccard Coefficient
+        "~%%", // Jaro Distance
+        "~@@", // Jaro-Winkler Distance
+        "~==", // Levenshtein Distance
+        "~^^", // Matching Coefficient
+        "~||", // Monge-Elkan Coefficient
+        "~#~", // Needleman-Wunsch Coefficient
+        "~**", // Overlap Coefficient
+        "~~~", // Q-Gram Distance
+        "~=~", // Smith-Waterman Coefficient
+        "~!~", // Smith-Waterman-Gotoh Coefficient
+        "~*~", // Soundex Distance
+        // soundex_operator
+        ">@@<", // Soundex matches
+        "<@@>", // Soundex doesn't match
     ];
     for op in &operators {
         test_operator(op, &pg(), BinaryOperator::Custom(op.to_string()));

From 487df8cf3a9f474aeb1a50966a34c445cfebbfc0 Mon Sep 17 00:00:00 2001
From: Ophir LOJKINE <contact@ophir.dev>
Date: Fri, 7 Jun 2024 06:27:53 +0200
Subject: [PATCH 7/8] performance

Co-authored-by: Joey Hain <joey@sigmacomputing.com>
---
 src/tokenizer.rs | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 3dd7f8789..43d9dd23a 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1230,21 +1230,17 @@ impl<'a> Tokenizer<'a> {
         prefix: &str,
         default: Token,
     ) -> Result<Option<Token>, TokenizerError> {
-        let mut s = prefix.to_string();
-        let mut is_custom_operator = false;
+        let mut custom = None;
         while let Some(&ch) = chars.peek() {
             if !self.dialect.is_custom_operator_part(ch) {
                 break;
             }
-            s.push(ch);
-            is_custom_operator = true;
+
+            custom.get_or_insert_with(|| prefix.to_string()).push(ch);
             chars.next();
         }
-        if is_custom_operator {
-            Ok(Some(Token::CustomBinaryOperator(s)))
-        } else {
-            Ok(Some(default))
-        }
+
+        Ok(Some(custom.map(Token::CustomBinaryOperator).unwrap_or(default)))
     }
 
     /// Tokenize dollar preceded value (i.e: a string/placeholder)

From d1231c1af2506da21d1f8d07bd87c46d50abfbaf Mon Sep 17 00:00:00 2001
From: lovasoa <pere.jobs@gmail.com>
Date: Fri, 7 Jun 2024 06:31:28 +0200
Subject: [PATCH 8/8] cargo fmt

---
 src/tokenizer.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 43d9dd23a..bcc5478bc 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1240,7 +1240,9 @@ impl<'a> Tokenizer<'a> {
             chars.next();
         }
 
-        Ok(Some(custom.map(Token::CustomBinaryOperator).unwrap_or(default)))
+        Ok(Some(
+            custom.map(Token::CustomBinaryOperator).unwrap_or(default),
+        ))
     }
 
     /// Tokenize dollar preceded value (i.e: a string/placeholder)