|
38 | 38 | #include "vdbeInt.h"
|
39 | 39 | #include "version.h"
|
40 | 40 | #include "coll/coll.h"
|
| 41 | +#include "tarantoolInt.h" |
41 | 42 | #include <unicode/ustring.h>
|
42 | 43 | #include <unicode/ucasemap.h>
|
43 | 44 | #include <unicode/ucnv.h>
|
@@ -211,61 +212,140 @@ absFunc(sql_context * context, int argc, sql_value ** argv)
|
211 | 212 | }
|
212 | 213 | }
|
213 | 214 |
|
214 |
| -/* |
215 |
| - * Implementation of the instr() function. |
| 215 | +/** |
| 216 | + * Implementation of the position() function. |
216 | 217 | *
|
217 |
| - * instr(haystack,needle) finds the first occurrence of needle |
218 |
| - * in haystack and returns the number of previous characters plus 1, |
219 |
| - * or 0 if needle does not occur within haystack. |
| 218 | + * position(needle, haystack) finds the first occurrence of needle |
| 219 | + * in haystack and returns the number of previous characters |
| 220 | + * plus 1, or 0 if needle does not occur within haystack. |
220 | 221 | *
|
221 |
| - * If both haystack and needle are BLOBs, then the result is one more than |
222 |
| - * the number of bytes in haystack prior to the first occurrence of needle, |
223 |
| - * or 0 if needle never occurs in haystack. |
| 222 | + * If both haystack and needle are BLOBs, then the result is one |
| 223 | + * more than the number of bytes in haystack prior to the first |
| 224 | + * occurrence of needle, or 0 if needle never occurs in haystack. |
224 | 225 | */
|
225 | 226 | static void
|
226 |
| -instrFunc(sql_context * context, int argc, sql_value ** argv) |
| 227 | +position_func(struct sql_context *context, int argc, struct Mem **argv) |
227 | 228 | {
|
228 |
| - const unsigned char *zHaystack; |
229 |
| - const unsigned char *zNeedle; |
230 |
| - int nHaystack; |
231 |
| - int nNeedle; |
232 |
| - int typeHaystack, typeNeedle; |
233 |
| - int N = 1; |
234 |
| - int isText; |
235 |
| - |
236 | 229 | UNUSED_PARAMETER(argc);
|
237 |
| - typeHaystack = sql_value_type(argv[0]); |
238 |
| - typeNeedle = sql_value_type(argv[1]); |
239 |
| - if (typeHaystack == SQL_NULL || typeNeedle == SQL_NULL) |
| 230 | + struct Mem *needle = argv[0]; |
| 231 | + struct Mem *haystack = argv[1]; |
| 232 | + int needle_type = sql_value_type(needle); |
| 233 | + int haystack_type = sql_value_type(haystack); |
| 234 | + |
| 235 | + if (haystack_type == SQL_NULL || needle_type == SQL_NULL) |
| 236 | + return; |
| 237 | + /* |
| 238 | + * Position function can be called only with string |
| 239 | + * or blob params. |
| 240 | + */ |
| 241 | + struct Mem *inconsistent_type_arg = NULL; |
| 242 | + if (needle_type != SQL_TEXT && needle_type != SQL_BLOB) |
| 243 | + inconsistent_type_arg = needle; |
| 244 | + if (haystack_type != SQL_TEXT && haystack_type != SQL_BLOB) |
| 245 | + inconsistent_type_arg = haystack; |
| 246 | + if (inconsistent_type_arg != NULL) { |
| 247 | + diag_set(ClientError, ER_INCONSISTENT_TYPES, "TEXT or BLOB", |
| 248 | + mem_type_to_str(inconsistent_type_arg)); |
| 249 | + context->isError = SQL_TARANTOOL_ERROR; |
| 250 | + context->fErrorOrAux = 1; |
| 251 | + return; |
| 252 | + } |
| 253 | + /* |
| 254 | + * Both params of Position function must be of the same |
| 255 | + * type. |
| 256 | + */ |
| 257 | + if (haystack_type != needle_type) { |
| 258 | + diag_set(ClientError, ER_INCONSISTENT_TYPES, |
| 259 | + mem_type_to_str(needle), mem_type_to_str(haystack)); |
| 260 | + context->isError = SQL_TARANTOOL_ERROR; |
| 261 | + context->fErrorOrAux = 1; |
240 | 262 | return;
|
241 |
| - nHaystack = sql_value_bytes(argv[0]); |
242 |
| - nNeedle = sql_value_bytes(argv[1]); |
243 |
| - if (nNeedle > 0) { |
244 |
| - if (typeHaystack == SQL_BLOB && typeNeedle == SQL_BLOB) { |
245 |
| - zHaystack = sql_value_blob(argv[0]); |
246 |
| - zNeedle = sql_value_blob(argv[1]); |
247 |
| - assert(zNeedle != 0); |
248 |
| - assert(zHaystack != 0 || nHaystack == 0); |
249 |
| - isText = 0; |
| 263 | + } |
| 264 | + |
| 265 | + int n_needle_bytes = sql_value_bytes(needle); |
| 266 | + int n_haystack_bytes = sql_value_bytes(haystack); |
| 267 | + int position = 1; |
| 268 | + if (n_needle_bytes > 0) { |
| 269 | + const unsigned char *haystack_str; |
| 270 | + const unsigned char *needle_str; |
| 271 | + if (haystack_type == SQL_BLOB) { |
| 272 | + needle_str = sql_value_blob(needle); |
| 273 | + haystack_str = sql_value_blob(haystack); |
| 274 | + assert(needle_str != NULL); |
| 275 | + assert(haystack_str != NULL || n_haystack_bytes == 0); |
| 276 | + /* |
| 277 | + * Naive implementation of substring |
| 278 | + * searching: matching time O(n * m). |
| 279 | + * Can be improved. |
| 280 | + */ |
| 281 | + while (n_needle_bytes <= n_haystack_bytes && |
| 282 | + memcmp(haystack_str, needle_str, n_needle_bytes) != 0) { |
| 283 | + position++; |
| 284 | + n_haystack_bytes--; |
| 285 | + haystack_str++; |
| 286 | + } |
| 287 | + if (n_needle_bytes > n_haystack_bytes) |
| 288 | + position = 0; |
250 | 289 | } else {
|
251 |
| - zHaystack = sql_value_text(argv[0]); |
252 |
| - zNeedle = sql_value_text(argv[1]); |
253 |
| - isText = 1; |
254 |
| - if (zHaystack == 0 || zNeedle == 0) |
255 |
| - return; |
256 |
| - } |
257 |
| - while (nNeedle <= nHaystack |
258 |
| - && memcmp(zHaystack, zNeedle, nNeedle) != 0) { |
259 |
| - N++; |
260 |
| - do { |
261 |
| - nHaystack--; |
262 |
| - zHaystack++; |
263 |
| - } while (isText && (zHaystack[0] & 0xc0) == 0x80); |
| 290 | + /* |
| 291 | + * Code below handles not only simple |
| 292 | + * cases like position('a', 'bca'), but |
| 293 | + * also more complex ones: |
| 294 | + * position('a', 'bcá' COLLATE "unicode_ci") |
| 295 | + * To do so, we need to use comparison |
| 296 | + * window, which has constant character |
| 297 | + * size, but variable byte size. |
| 298 | + * Character size is equal to |
| 299 | + * needle char size. |
| 300 | + */ |
| 301 | + haystack_str = sql_value_text(haystack); |
| 302 | + needle_str = sql_value_text(needle); |
| 303 | + |
| 304 | + int n_needle_chars = |
| 305 | + sql_utf8_char_count(needle_str, n_needle_bytes); |
| 306 | + int n_haystack_chars = |
| 307 | + sql_utf8_char_count(haystack_str, |
| 308 | + n_haystack_bytes); |
| 309 | + |
| 310 | + if (n_haystack_chars < n_needle_chars) { |
| 311 | + position = 0; |
| 312 | + goto finish; |
| 313 | + } |
| 314 | + /* |
| 315 | + * Comparison window is determined by |
| 316 | + * beg_offset and end_offset. beg_offset |
| 317 | + * is offset in bytes from haystack |
| 318 | + * beginning to window beginning. |
| 319 | + * end_offset is offset in bytes from |
| 320 | + * haystack beginning to window end. |
| 321 | + */ |
| 322 | + int end_offset = 0; |
| 323 | + for (int c = 0; c < n_needle_chars; c++) { |
| 324 | + SQL_UTF8_FWD_1(haystack_str, end_offset, |
| 325 | + n_haystack_bytes); |
| 326 | + } |
| 327 | + int beg_offset = 0; |
| 328 | + struct coll *coll = sqlGetFuncCollSeq(context); |
| 329 | + int c; |
| 330 | + for (c = 0; c + n_needle_chars <= n_haystack_chars; c++) { |
| 331 | + if (coll->cmp((const char *) haystack_str + beg_offset, |
| 332 | + end_offset - beg_offset, |
| 333 | + (const char *) needle_str, |
| 334 | + n_needle_bytes, coll) == 0) |
| 335 | + goto finish; |
| 336 | + position++; |
| 337 | + /* Update offsets. */ |
| 338 | + SQL_UTF8_FWD_1(haystack_str, beg_offset, |
| 339 | + n_haystack_bytes); |
| 340 | + SQL_UTF8_FWD_1(haystack_str, end_offset, |
| 341 | + n_haystack_bytes); |
| 342 | + } |
| 343 | + /* Needle was not found in the haystack. */ |
| 344 | + position = 0; |
264 | 345 | }
|
265 |
| - if (nNeedle > nHaystack) |
266 |
| - N = 0; |
267 | 346 | }
|
268 |
| - sql_result_int(context, N); |
| 347 | +finish: |
| 348 | + sql_result_int(context, position); |
269 | 349 | }
|
270 | 350 |
|
271 | 351 | /*
|
@@ -1756,7 +1836,7 @@ sqlRegisterBuiltinFunctions(void)
|
1756 | 1836 | FIELD_TYPE_STRING),
|
1757 | 1837 | FUNCTION2(length, 1, 0, 0, lengthFunc, SQL_FUNC_LENGTH,
|
1758 | 1838 | FIELD_TYPE_INTEGER),
|
1759 |
| - FUNCTION(instr, 2, 0, 0, instrFunc, FIELD_TYPE_INTEGER), |
| 1839 | + FUNCTION(position, 2, 0, 1, position_func, FIELD_TYPE_INTEGER), |
1760 | 1840 | FUNCTION(printf, -1, 0, 0, printfFunc, FIELD_TYPE_STRING),
|
1761 | 1841 | FUNCTION(unicode, 1, 0, 0, unicodeFunc, FIELD_TYPE_STRING),
|
1762 | 1842 | FUNCTION(char, -1, 0, 0, charFunc, FIELD_TYPE_STRING),
|
|
0 commit comments