From 7064b0ce961c9560e777149a313c465b82144cf8 Mon Sep 17 00:00:00 2001 From: Jefffrey <22608443+Jefffrey@users.noreply.github.com> Date: Sat, 29 Apr 2023 16:58:49 +1000 Subject: [PATCH 1/3] Add parse_multipart_identifier function to parser --- src/parser.rs | 128 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) diff --git a/src/parser.rs b/src/parser.rs index c468e9be8..c5c855de2 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4687,6 +4687,54 @@ impl<'a> Parser<'a> { Ok(idents) } + /// Parse identifiers of form ident1[.identN]* + pub fn parse_multipart_identifier(&mut self) -> Result, ParserError> { + let mut idents = vec![]; + + // expecting at least one word for identifier + match self.next_token().token { + Token::Word(w) => idents.push(w.to_ident()), + Token::EOF => { + return Err(ParserError::ParserError( + "Empty input when parsing identifier".to_string(), + ))? + } + token => { + return Err(ParserError::ParserError(format!( + "Unexpected token in identifier: {token}" + )))? + } + }; + + // parse optional next parts if exist + loop { + match self.next_token().token { + // ensure that optional period is succeeded by another identifier + Token::Period => match self.next_token().token { + Token::Word(w) => idents.push(w.to_ident()), + Token::EOF => { + return Err(ParserError::ParserError( + "Trailing period in identifier".to_string(), + ))? + } + token => { + return Err(ParserError::ParserError(format!( + "Unexpected token following period in identifier: {token}" + )))? + } + }, + Token::EOF => break, + token => { + return Err(ParserError::ParserError(format!( + "Unexpected token in identifier: {token}" + )))? + } + } + } + + Ok(idents) + } + /// Parse a simple one-word identifier (possibly quoted, possibly a keyword) pub fn parse_identifier(&mut self) -> Result { let next_token = self.next_token(); @@ -7429,4 +7477,84 @@ mod tests { )) ); } + + #[test] + fn test_parse_multipart_identifier_positive() { + let dialect = TestedDialects { + dialects: vec![Box::new(GenericDialect {})], + }; + + // parse multipart with quotes + let expected = vec![ + Ident { + value: "CATALOG".to_string(), + quote_style: None, + }, + Ident { + value: "F(o)o. \"bar".to_string(), + quote_style: Some('"'), + }, + Ident { + value: "table".to_string(), + quote_style: None, + }, + ]; + dialect.run_parser_method(r#"CATALOG."F(o)o. ""bar".table"#, |parser| { + let actual = parser.parse_multipart_identifier().unwrap(); + assert_eq!(expected, actual); + }); + + // allow whitespace between ident parts + let expected = vec![ + Ident { + value: "CATALOG".to_string(), + quote_style: None, + }, + Ident { + value: "table".to_string(), + quote_style: None, + }, + ]; + dialect.run_parser_method("CATALOG . table", |parser| { + let actual = parser.parse_multipart_identifier().unwrap(); + assert_eq!(expected, actual); + }); + } + + #[test] + fn test_parse_multipart_identifier_negative() { + macro_rules! test_parse_multipart_identifier_error { + ($input:expr, $expected_err:expr $(,)?) => {{ + all_dialects().run_parser_method(&*$input, |parser| { + let actual_err = parser.parse_multipart_identifier().unwrap_err(); + assert_eq!(actual_err.to_string(), $expected_err); + }); + }}; + } + + test_parse_multipart_identifier_error!( + "", + "sql parser error: Empty input when parsing identifier", + ); + + test_parse_multipart_identifier_error!( + "*schema.table", + "sql parser error: Unexpected token in identifier: *", + ); + + test_parse_multipart_identifier_error!( + "schema.table*", + "sql parser error: Unexpected token in identifier: *", + ); + + test_parse_multipart_identifier_error!( + "schema.table.", + "sql parser error: Trailing period in identifier", + ); + + test_parse_multipart_identifier_error!( + "schema.*", + "sql parser error: Unexpected token following period in identifier: *", + ); + } } From 95bd30f37f09f68844090a5ae22f228f544a7726 Mon Sep 17 00:00:00 2001 From: Jefffrey <22608443+Jefffrey@users.noreply.github.com> Date: Sun, 14 May 2023 11:15:08 +1000 Subject: [PATCH 2/3] Update doc for parse_multipart_identifier --- src/parser.rs | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/src/parser.rs b/src/parser.rs index c5c855de2..e3922621f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4688,6 +4688,44 @@ impl<'a> Parser<'a> { } /// Parse identifiers of form ident1[.identN]* + /// + /// Similar in functionality to [parse_identifiers], with difference + /// being this function is much more strict about parsing a valid multipart identifier, not + /// allowing extraneous tokens to be parsed, otherwise it fails. + /// + /// For example: + /// + /// ```rust + /// use sqlparser::ast::Ident; + /// use sqlparser::dialect::GenericDialect; + /// use sqlparser::parser::Parser; + /// + /// let dialect = GenericDialect {}; + /// let expected = vec![Ident::new("one"), Ident::new("two")]; + /// + /// // expected usage + /// let sql = "one.two"; + /// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap(); + /// let actual = parser.parse_multipart_identifier().unwrap(); + /// assert_eq!(&actual, &expected); + /// + /// // parse_identifiers is more loose on what it allows, parsing successfully + /// let sql = "one + two"; + /// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap(); + /// let actual = parser.parse_identifiers().unwrap(); + /// assert_eq!(&actual, &expected); + /// + /// // expected to strictly fail due to + separator + /// let sql = "one + two"; + /// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap(); + /// let actual = parser.parse_multipart_identifier().unwrap_err(); + /// assert_eq!( + /// actual.to_string(), + /// "sql parser error: Unexpected token in identifier: +" + /// ); + /// ``` + /// + /// [parse_identifiers]: Parser::parse_identifiers pub fn parse_multipart_identifier(&mut self) -> Result, ParserError> { let mut idents = vec![]; From eb2cc8b621fdf192e69fba9e68101ddd9281299c Mon Sep 17 00:00:00 2001 From: Jefffrey <22608443+Jefffrey@users.noreply.github.com> Date: Sun, 14 May 2023 11:20:14 +1000 Subject: [PATCH 3/3] Fix conflict --- src/parser.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parser.rs b/src/parser.rs index 496e99654..731ffe3f7 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -7544,6 +7544,7 @@ mod tests { fn test_parse_multipart_identifier_positive() { let dialect = TestedDialects { dialects: vec![Box::new(GenericDialect {})], + options: None, }; // parse multipart with quotes