diff --git a/src/parser.rs b/src/parser.rs index 7299a5c5d..731ffe3f7 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4705,6 +4705,92 @@ impl<'a> Parser<'a> { Ok(idents) } + /// Parse identifiers of form ident1[.identN]* + /// + /// Similar in functionality to [parse_identifiers], with difference + /// being this function is much more strict about parsing a valid multipart identifier, not + /// allowing extraneous tokens to be parsed, otherwise it fails. + /// + /// For example: + /// + /// ```rust + /// use sqlparser::ast::Ident; + /// use sqlparser::dialect::GenericDialect; + /// use sqlparser::parser::Parser; + /// + /// let dialect = GenericDialect {}; + /// let expected = vec![Ident::new("one"), Ident::new("two")]; + /// + /// // expected usage + /// let sql = "one.two"; + /// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap(); + /// let actual = parser.parse_multipart_identifier().unwrap(); + /// assert_eq!(&actual, &expected); + /// + /// // parse_identifiers is more loose on what it allows, parsing successfully + /// let sql = "one + two"; + /// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap(); + /// let actual = parser.parse_identifiers().unwrap(); + /// assert_eq!(&actual, &expected); + /// + /// // expected to strictly fail due to + separator + /// let sql = "one + two"; + /// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap(); + /// let actual = parser.parse_multipart_identifier().unwrap_err(); + /// assert_eq!( + /// actual.to_string(), + /// "sql parser error: Unexpected token in identifier: +" + /// ); + /// ``` + /// + /// [parse_identifiers]: Parser::parse_identifiers + pub fn parse_multipart_identifier(&mut self) -> Result, ParserError> { + let mut idents = vec![]; + + // expecting at least one word for identifier + match self.next_token().token { + Token::Word(w) => idents.push(w.to_ident()), + Token::EOF => { + return Err(ParserError::ParserError( + "Empty input when parsing identifier".to_string(), + ))? + } + token => { + return Err(ParserError::ParserError(format!( + "Unexpected token in identifier: {token}" + )))? + } + }; + + // parse optional next parts if exist + loop { + match self.next_token().token { + // ensure that optional period is succeeded by another identifier + Token::Period => match self.next_token().token { + Token::Word(w) => idents.push(w.to_ident()), + Token::EOF => { + return Err(ParserError::ParserError( + "Trailing period in identifier".to_string(), + ))? + } + token => { + return Err(ParserError::ParserError(format!( + "Unexpected token following period in identifier: {token}" + )))? + } + }, + Token::EOF => break, + token => { + return Err(ParserError::ParserError(format!( + "Unexpected token in identifier: {token}" + )))? + } + } + } + + Ok(idents) + } + /// Parse a simple one-word identifier (possibly quoted, possibly a keyword) pub fn parse_identifier(&mut self) -> Result { let next_token = self.next_token(); @@ -7453,4 +7539,85 @@ mod tests { )) ); } + + #[test] + fn test_parse_multipart_identifier_positive() { + let dialect = TestedDialects { + dialects: vec![Box::new(GenericDialect {})], + options: None, + }; + + // parse multipart with quotes + let expected = vec![ + Ident { + value: "CATALOG".to_string(), + quote_style: None, + }, + Ident { + value: "F(o)o. \"bar".to_string(), + quote_style: Some('"'), + }, + Ident { + value: "table".to_string(), + quote_style: None, + }, + ]; + dialect.run_parser_method(r#"CATALOG."F(o)o. ""bar".table"#, |parser| { + let actual = parser.parse_multipart_identifier().unwrap(); + assert_eq!(expected, actual); + }); + + // allow whitespace between ident parts + let expected = vec![ + Ident { + value: "CATALOG".to_string(), + quote_style: None, + }, + Ident { + value: "table".to_string(), + quote_style: None, + }, + ]; + dialect.run_parser_method("CATALOG . table", |parser| { + let actual = parser.parse_multipart_identifier().unwrap(); + assert_eq!(expected, actual); + }); + } + + #[test] + fn test_parse_multipart_identifier_negative() { + macro_rules! test_parse_multipart_identifier_error { + ($input:expr, $expected_err:expr $(,)?) => {{ + all_dialects().run_parser_method(&*$input, |parser| { + let actual_err = parser.parse_multipart_identifier().unwrap_err(); + assert_eq!(actual_err.to_string(), $expected_err); + }); + }}; + } + + test_parse_multipart_identifier_error!( + "", + "sql parser error: Empty input when parsing identifier", + ); + + test_parse_multipart_identifier_error!( + "*schema.table", + "sql parser error: Unexpected token in identifier: *", + ); + + test_parse_multipart_identifier_error!( + "schema.table*", + "sql parser error: Unexpected token in identifier: *", + ); + + test_parse_multipart_identifier_error!( + "schema.table.", + "sql parser error: Trailing period in identifier", + ); + + test_parse_multipart_identifier_error!( + "schema.*", + "sql parser error: Unexpected token following period in identifier: *", + ); + } }