@@ -231,6 +231,10 @@ pub enum Token {
231
231
/// jsonb ?| text[] -> boolean: Check whether any member of the text array exists as top-level
232
232
/// keys within the jsonb object
233
233
QuestionPipe ,
234
+ /// Custom binary operator
235
+ /// This is used to represent any custom binary operator that is not part of the SQL standard.
236
+ /// PostgreSQL allows defining custom binary operators using CREATE OPERATOR.
237
+ CustomBinaryOperator ( String ) ,
234
238
}
235
239
236
240
impl fmt:: Display for Token {
@@ -320,6 +324,7 @@ impl fmt::Display for Token {
320
324
Token :: Question => write ! ( f, "?" ) ,
321
325
Token :: QuestionAnd => write ! ( f, "?&" ) ,
322
326
Token :: QuestionPipe => write ! ( f, "?|" ) ,
327
+ Token :: CustomBinaryOperator ( s) => f. write_str ( s) ,
323
328
}
324
329
}
325
330
}
@@ -961,15 +966,12 @@ impl<'a> Tokenizer<'a> {
961
966
Some ( '>' ) => {
962
967
chars. next ( ) ;
963
968
match chars. peek ( ) {
964
- Some ( '>' ) => {
965
- chars. next ( ) ;
966
- Ok ( Some ( Token :: LongArrow ) )
967
- }
968
- _ => Ok ( Some ( Token :: Arrow ) ) ,
969
+ Some ( '>' ) => self . consume_for_binop ( chars, "->>" , Token :: LongArrow ) ,
970
+ _ => self . start_binop ( chars, "->" , Token :: Arrow ) ,
969
971
}
970
972
}
971
973
// a regular '-' operator
972
- _ => Ok ( Some ( Token :: Minus ) ) ,
974
+ _ => self . start_binop ( chars , "-" , Token :: Minus ) ,
973
975
}
974
976
}
975
977
'/' => {
@@ -999,26 +1001,28 @@ impl<'a> Tokenizer<'a> {
999
1001
'%' => {
1000
1002
chars. next ( ) ; // advance past '%'
1001
1003
match chars. peek ( ) {
1002
- Some ( ' ' ) => Ok ( Some ( Token :: Mod ) ) ,
1004
+ Some ( s ) if s . is_whitespace ( ) => Ok ( Some ( Token :: Mod ) ) ,
1003
1005
Some ( sch) if self . dialect . is_identifier_start ( '%' ) => {
1004
1006
self . tokenize_identifier_or_keyword ( [ ch, * sch] , chars)
1005
1007
}
1006
- _ => Ok ( Some ( Token :: Mod ) ) ,
1008
+ _ => self . start_binop ( chars , "%" , Token :: Mod ) ,
1007
1009
}
1008
1010
}
1009
1011
'|' => {
1010
1012
chars. next ( ) ; // consume the '|'
1011
1013
match chars. peek ( ) {
1012
- Some ( '/' ) => self . consume_and_return ( chars, Token :: PGSquareRoot ) ,
1014
+ Some ( '/' ) => self . consume_for_binop ( chars, "|/" , Token :: PGSquareRoot ) ,
1013
1015
Some ( '|' ) => {
1014
1016
chars. next ( ) ; // consume the second '|'
1015
1017
match chars. peek ( ) {
1016
- Some ( '/' ) => self . consume_and_return ( chars, Token :: PGCubeRoot ) ,
1017
- _ => Ok ( Some ( Token :: StringConcat ) ) ,
1018
+ Some ( '/' ) => {
1019
+ self . consume_for_binop ( chars, "||/" , Token :: PGCubeRoot )
1020
+ }
1021
+ _ => self . start_binop ( chars, "||" , Token :: StringConcat ) ,
1018
1022
}
1019
1023
}
1020
1024
// Bitshift '|' operator
1021
- _ => Ok ( Some ( Token :: Pipe ) ) ,
1025
+ _ => self . start_binop ( chars , "|" , Token :: Pipe ) ,
1022
1026
}
1023
1027
}
1024
1028
'=' => {
@@ -1061,22 +1065,22 @@ impl<'a> Tokenizer<'a> {
1061
1065
Some ( '=' ) => {
1062
1066
chars. next ( ) ;
1063
1067
match chars. peek ( ) {
1064
- Some ( '>' ) => self . consume_and_return ( chars, Token :: Spaceship ) ,
1065
- _ => Ok ( Some ( Token :: LtEq ) ) ,
1068
+ Some ( '>' ) => self . consume_for_binop ( chars, "<=>" , Token :: Spaceship ) ,
1069
+ _ => self . start_binop ( chars , "<=" , Token :: LtEq ) ,
1066
1070
}
1067
1071
}
1068
- Some ( '>' ) => self . consume_and_return ( chars, Token :: Neq ) ,
1069
- Some ( '<' ) => self . consume_and_return ( chars, Token :: ShiftLeft ) ,
1070
- Some ( '@' ) => self . consume_and_return ( chars, Token :: ArrowAt ) ,
1071
- _ => Ok ( Some ( Token :: Lt ) ) ,
1072
+ Some ( '>' ) => self . consume_for_binop ( chars, "<>" , Token :: Neq ) ,
1073
+ Some ( '<' ) => self . consume_for_binop ( chars, "<<" , Token :: ShiftLeft ) ,
1074
+ Some ( '@' ) => self . consume_for_binop ( chars, "<@" , Token :: ArrowAt ) ,
1075
+ _ => self . start_binop ( chars , "<" , Token :: Lt ) ,
1072
1076
}
1073
1077
}
1074
1078
'>' => {
1075
1079
chars. next ( ) ; // consume
1076
1080
match chars. peek ( ) {
1077
- Some ( '=' ) => self . consume_and_return ( chars, Token :: GtEq ) ,
1078
- Some ( '>' ) => self . consume_and_return ( chars, Token :: ShiftRight ) ,
1079
- _ => Ok ( Some ( Token :: Gt ) ) ,
1081
+ Some ( '=' ) => self . consume_for_binop ( chars, ">=" , Token :: GtEq ) ,
1082
+ Some ( '>' ) => self . consume_for_binop ( chars, ">>" , Token :: ShiftRight ) ,
1083
+ _ => self . start_binop ( chars , ">" , Token :: Gt ) ,
1080
1084
}
1081
1085
}
1082
1086
':' => {
@@ -1094,9 +1098,12 @@ impl<'a> Tokenizer<'a> {
1094
1098
'&' => {
1095
1099
chars. next ( ) ; // consume the '&'
1096
1100
match chars. peek ( ) {
1097
- Some ( '&' ) => self . consume_and_return ( chars, Token :: Overlap ) ,
1101
+ Some ( '&' ) => {
1102
+ chars. next ( ) ; // consume the second '&'
1103
+ self . start_binop ( chars, "&&" , Token :: Overlap )
1104
+ }
1098
1105
// Bitshift '&' operator
1099
- _ => Ok ( Some ( Token :: Ampersand ) ) ,
1106
+ _ => self . start_binop ( chars , "&" , Token :: Ampersand ) ,
1100
1107
}
1101
1108
}
1102
1109
'^' => {
@@ -1119,38 +1126,37 @@ impl<'a> Tokenizer<'a> {
1119
1126
'~' => {
1120
1127
chars. next ( ) ; // consume
1121
1128
match chars. peek ( ) {
1122
- Some ( '*' ) => self . consume_and_return ( chars, Token :: TildeAsterisk ) ,
1129
+ Some ( '*' ) => self . consume_for_binop ( chars, "~*" , Token :: TildeAsterisk ) ,
1123
1130
Some ( '~' ) => {
1124
1131
chars. next ( ) ;
1125
1132
match chars. peek ( ) {
1126
1133
Some ( '*' ) => {
1127
- self . consume_and_return ( chars, Token :: DoubleTildeAsterisk )
1134
+ self . consume_for_binop ( chars, "~~*" , Token :: DoubleTildeAsterisk )
1128
1135
}
1129
- _ => Ok ( Some ( Token :: DoubleTilde ) ) ,
1136
+ _ => self . start_binop ( chars , "~~" , Token :: DoubleTilde ) ,
1130
1137
}
1131
1138
}
1132
- _ => Ok ( Some ( Token :: Tilde ) ) ,
1139
+ _ => self . start_binop ( chars , "~" , Token :: Tilde ) ,
1133
1140
}
1134
1141
}
1135
1142
'#' => {
1136
1143
chars. next ( ) ;
1137
1144
match chars. peek ( ) {
1138
- Some ( '-' ) => self . consume_and_return ( chars, Token :: HashMinus ) ,
1145
+ Some ( '-' ) => self . consume_for_binop ( chars, "#-" , Token :: HashMinus ) ,
1139
1146
Some ( '>' ) => {
1140
1147
chars. next ( ) ;
1141
1148
match chars. peek ( ) {
1142
1149
Some ( '>' ) => {
1143
- chars. next ( ) ;
1144
- Ok ( Some ( Token :: HashLongArrow ) )
1150
+ self . consume_for_binop ( chars, "#>>" , Token :: HashLongArrow )
1145
1151
}
1146
- _ => Ok ( Some ( Token :: HashArrow ) ) ,
1152
+ _ => self . start_binop ( chars , "#>" , Token :: HashArrow ) ,
1147
1153
}
1148
1154
}
1149
1155
Some ( ' ' ) => Ok ( Some ( Token :: Sharp ) ) ,
1150
1156
Some ( sch) if self . dialect . is_identifier_start ( '#' ) => {
1151
1157
self . tokenize_identifier_or_keyword ( [ ch, * sch] , chars)
1152
1158
}
1153
- _ => Ok ( Some ( Token :: Sharp ) ) ,
1159
+ _ => self . start_binop ( chars , "#" , Token :: Sharp ) ,
1154
1160
}
1155
1161
}
1156
1162
'@' => {
@@ -1206,6 +1212,39 @@ impl<'a> Tokenizer<'a> {
1206
1212
}
1207
1213
}
1208
1214
1215
+ /// Consume the next character, then parse a custom binary operator. The next character should be included in the prefix
1216
+ fn consume_for_binop (
1217
+ & self ,
1218
+ chars : & mut State ,
1219
+ prefix : & str ,
1220
+ default : Token ,
1221
+ ) -> Result < Option < Token > , TokenizerError > {
1222
+ chars. next ( ) ; // consume the first char
1223
+ self . start_binop ( chars, prefix, default)
1224
+ }
1225
+
1226
+ /// parse a custom binary operator
1227
+ fn start_binop (
1228
+ & self ,
1229
+ chars : & mut State ,
1230
+ prefix : & str ,
1231
+ default : Token ,
1232
+ ) -> Result < Option < Token > , TokenizerError > {
1233
+ let mut custom = None ;
1234
+ while let Some ( & ch) = chars. peek ( ) {
1235
+ if !self . dialect . is_custom_operator_part ( ch) {
1236
+ break ;
1237
+ }
1238
+
1239
+ custom. get_or_insert_with ( || prefix. to_string ( ) ) . push ( ch) ;
1240
+ chars. next ( ) ;
1241
+ }
1242
+
1243
+ Ok ( Some (
1244
+ custom. map ( Token :: CustomBinaryOperator ) . unwrap_or ( default) ,
1245
+ ) )
1246
+ }
1247
+
1209
1248
/// Tokenize dollar preceded value (i.e: a string/placeholder)
1210
1249
fn tokenize_dollar_preceded_value ( & self , chars : & mut State ) -> Result < Token , TokenizerError > {
1211
1250
let mut s = String :: new ( ) ;
0 commit comments