From 9728307ae34ff275eb7abe4d9dc1ab4307fa6ce0 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Tue, 9 May 2023 10:34:49 -0700 Subject: [PATCH 1/8] Add REGEXP infix operator for MySQL --- src/ast/operator.rs | 3 +++ src/dialect/mysql.rs | 24 +++++++++++++++++++++++- src/keywords.rs | 1 + tests/sqlparser_mysql.rs | 6 ++++++ 4 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/ast/operator.rs b/src/ast/operator.rs index 75877c949..2c7a2f824 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -85,6 +85,8 @@ pub enum BinaryOperator { BitwiseOr, BitwiseAnd, BitwiseXor, + // MySQL & Sqlite use `REGEXP` as an infix operator + Regexp, PGBitwiseXor, PGBitwiseShiftLeft, PGBitwiseShiftRight, @@ -122,6 +124,7 @@ impl fmt::Display for BinaryOperator { BinaryOperator::BitwiseOr => f.write_str("|"), BinaryOperator::BitwiseAnd => f.write_str("&"), BinaryOperator::BitwiseXor => f.write_str("^"), + BinaryOperator::Regexp => f.write_str("REGEXP"), BinaryOperator::PGBitwiseXor => f.write_str("#"), BinaryOperator::PGBitwiseShiftLeft => f.write_str("<<"), BinaryOperator::PGBitwiseShiftRight => f.write_str(">>"), diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index ceab34810..459418d43 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -10,7 +10,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::dialect::Dialect; +use crate::{ + ast::{BinaryOperator, Expr}, + dialect::Dialect, + keywords::Keyword, +}; /// [MySQL](https://www.mysql.com/) #[derive(Debug)] @@ -35,4 +39,22 @@ impl Dialect for MySqlDialect { fn is_delimited_identifier_start(&self, ch: char) -> bool { ch == '`' } + + fn parse_infix( + &self, + _parser: &mut crate::parser::Parser, + _expr: &crate::ast::Expr, + _precedence: u8, + ) -> Option> { + // Parse REGEXP as an operator + if _parser.parse_keyword(Keyword::REGEXP) { + Some(Ok(Expr::BinaryOp { + left: Box::new(_expr.clone()), + op: BinaryOperator::Regexp, + right: Box::new(_parser.parse_expr().unwrap()), + })) + } else { + None + } + } } diff --git a/src/keywords.rs b/src/keywords.rs index a0c5b68cb..c4a21dec9 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -473,6 +473,7 @@ define_keywords!( REFERENCES, REFERENCING, REGCLASS, + REGEXP, REGR_AVGX, REGR_AVGY, REGR_COUNT, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 1c479bb18..50e2a081b 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1407,3 +1407,9 @@ fn parse_string_introducers() { mysql().one_statement_parses_to("SELECT _utf8mb4'abc'", "SELECT _utf8mb4 'abc'"); mysql().verified_stmt("SELECT _binary 'abc', _utf8mb4 'abc'"); } + +#[test] +fn parse_regexp_infix() { + // TODO: what's the correct test here? (Is there a test which snapshots the AST?) + mysql().verified_stmt(r#"SELECT "foobar" REGEXP "^foo""#); +} From a149c8acb8e3e7b5a62f78d5af2b1abce03fc005 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Tue, 9 May 2023 18:10:32 -0700 Subject: [PATCH 2/8] use Box from alloc --- src/dialect/mysql.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index 459418d43..9e82702f3 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -10,6 +10,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +#[cfg(not(feature = "std"))] +use alloc::boxed::Box; + use crate::{ ast::{BinaryOperator, Expr}, dialect::Dialect, From c16dd2f91fd055d3439f276df6b4d46e62cdd2a3 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Tue, 9 May 2023 19:08:20 -0700 Subject: [PATCH 3/8] Confirm it's not a quote issue --- tests/sqlparser_mysql.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 50e2a081b..3fa51c4d4 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1411,5 +1411,5 @@ fn parse_string_introducers() { #[test] fn parse_regexp_infix() { // TODO: what's the correct test here? (Is there a test which snapshots the AST?) - mysql().verified_stmt(r#"SELECT "foobar" REGEXP "^foo""#); + mysql().verified_stmt(r#"SELECT 'Michael!' REGEXP '.*';"#); } From 0492d8f08aa1bc464f2897f4b7faf02b7c0afa72 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Tue, 9 May 2023 19:41:46 -0700 Subject: [PATCH 4/8] --- src/parser.rs | 3 +++ tests/sqlparser_mysql.rs | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/parser.rs b/src/parser.rs index 82cbe9d12..7c84b0d6d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1980,6 +1980,8 @@ impl<'a> Parser<'a> { const AND_PREC: u8 = 10; const OR_PREC: u8 = 5; + const REGEXP_OP_PREC: u8 = 17; + /// Get the precedence of the next token pub fn get_next_precedence(&self) -> Result { // allow the dialect to override precedence logic @@ -2029,6 +2031,7 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(Self::BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::REGEXP_OP_PREC), Token::Eq | Token::Lt | Token::LtEq diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 3fa51c4d4..c3165e56f 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1411,5 +1411,5 @@ fn parse_string_introducers() { #[test] fn parse_regexp_infix() { // TODO: what's the correct test here? (Is there a test which snapshots the AST?) - mysql().verified_stmt(r#"SELECT 'Michael!' REGEXP '.*';"#); + mysql().verified_stmt(r#"SELECT 'Michael!' REGEXP '.*'"#); } From 9938cedde47f8a5daebf422160ccd040a9babbd1 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Tue, 9 May 2023 19:41:58 -0700 Subject: [PATCH 5/8] --- tests/sqlparser_mysql.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index c3165e56f..636db7954 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1410,6 +1410,5 @@ fn parse_string_introducers() { #[test] fn parse_regexp_infix() { - // TODO: what's the correct test here? (Is there a test which snapshots the AST?) mysql().verified_stmt(r#"SELECT 'Michael!' REGEXP '.*'"#); } From 41ed48df619fbd51421d7a0121e9d17d1ff338e6 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Tue, 9 May 2023 19:51:27 -0700 Subject: [PATCH 6/8] Add to sqlite too (should it be abstracted in some way?) --- src/dialect/sqlite.rs | 23 ++++++++++++++++++++++- tests/sqlparser_sqlite.rs | 5 +++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index fa21224f6..b440bdb09 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -10,7 +10,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::ast::Statement; +#[cfg(not(feature = "std"))] +use alloc::boxed::Box; + +use crate::ast::{BinaryOperator, Expr, Statement}; use crate::dialect::Dialect; use crate::keywords::Keyword; use crate::parser::{Parser, ParserError}; @@ -47,4 +50,22 @@ impl Dialect for SQLiteDialect { None } } + + fn parse_infix( + &self, + _parser: &mut crate::parser::Parser, + _expr: &crate::ast::Expr, + _precedence: u8, + ) -> Option> { + // Parse REGEXP as an operator + if _parser.parse_keyword(Keyword::REGEXP) { + Some(Ok(Expr::BinaryOp { + left: Box::new(_expr.clone()), + op: BinaryOperator::Regexp, + right: Box::new(_parser.parse_expr().unwrap()), + })) + } else { + None + } + } } diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 31d2dd97f..fbd8ba592 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -256,3 +256,8 @@ fn sqlite_and_generic() -> TestedDialects { options: None, } } + +#[test] +fn parse_regexp_infix() { + mysql().verified_stmt(r#"SELECT 'Michael!' REGEXP '.*'"#); +} From be806b74cb5cbdcc6a959d312eebdf3eb2c2ca7c Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Tue, 9 May 2023 20:03:51 -0700 Subject: [PATCH 7/8] --- tests/sqlparser_sqlite.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index fbd8ba592..74dd01d7f 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -259,5 +259,5 @@ fn sqlite_and_generic() -> TestedDialects { #[test] fn parse_regexp_infix() { - mysql().verified_stmt(r#"SELECT 'Michael!' REGEXP '.*'"#); + sqlite().verified_stmt(r#"SELECT 'Michael!' REGEXP '.*'"#); } From 12631998a6fb66c434983b65d72fffee09a0f0ea Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Wed, 10 May 2023 11:08:07 -0700 Subject: [PATCH 8/8] --- src/parser.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 7c84b0d6d..8e5946f03 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1980,8 +1980,6 @@ impl<'a> Parser<'a> { const AND_PREC: u8 = 10; const OR_PREC: u8 = 5; - const REGEXP_OP_PREC: u8 = 17; - /// Get the precedence of the next token pub fn get_next_precedence(&self) -> Result { // allow the dialect to override precedence logic @@ -2031,7 +2029,7 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::REGEXP_OP_PREC), + Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC), Token::Eq | Token::Lt | Token::LtEq