|
19 | 19 | #include <ctype.h>
|
20 | 20 | #include <sys/types.h>
|
21 | 21 |
|
22 |
| -#ifdef __SSE2__ |
23 |
| -#include <emmintrin.h> |
24 |
| -#endif |
25 |
| - |
26 | 22 | #include "php.h"
|
27 | 23 |
|
28 | 24 | #include "url.h"
|
29 | 25 | #include "file.h"
|
| 26 | +#include "zend_simd.h" |
30 | 27 |
|
31 | 28 | /* {{{ free_url */
|
32 | 29 | PHPAPI void php_url_free(php_url *theurl)
|
@@ -457,53 +454,53 @@ static zend_always_inline zend_string *php_url_encode_impl(const char *s, size_t
|
457 | 454 | start = zend_string_safe_alloc(3, len, 0, 0);
|
458 | 455 | to = (unsigned char*)ZSTR_VAL(start);
|
459 | 456 |
|
460 |
| -#ifdef __SSE2__ |
| 457 | +#ifdef ZEND_HAVE_SIMD |
461 | 458 | while (from + 16 < end) {
|
462 |
| - __m128i mask; |
| 459 | + zend_vec_8x16_t mask; |
463 | 460 | uint32_t bits;
|
464 |
| - const __m128i _A = _mm_set1_epi8('A' - 1); |
465 |
| - const __m128i Z_ = _mm_set1_epi8('Z' + 1); |
466 |
| - const __m128i _a = _mm_set1_epi8('a' - 1); |
467 |
| - const __m128i z_ = _mm_set1_epi8('z' + 1); |
468 |
| - const __m128i _zero = _mm_set1_epi8('0' - 1); |
469 |
| - const __m128i nine_ = _mm_set1_epi8('9' + 1); |
470 |
| - const __m128i dot = _mm_set1_epi8('.'); |
471 |
| - const __m128i minus = _mm_set1_epi8('-'); |
472 |
| - const __m128i under = _mm_set1_epi8('_'); |
473 |
| - |
474 |
| - __m128i in = _mm_loadu_si128((__m128i *)from); |
475 |
| - |
476 |
| - __m128i gt = _mm_cmpgt_epi8(in, _A); |
477 |
| - __m128i lt = _mm_cmplt_epi8(in, Z_); |
478 |
| - mask = _mm_and_si128(lt, gt); /* upper */ |
479 |
| - gt = _mm_cmpgt_epi8(in, _a); |
480 |
| - lt = _mm_cmplt_epi8(in, z_); |
481 |
| - mask = _mm_or_si128(mask, _mm_and_si128(lt, gt)); /* lower */ |
482 |
| - gt = _mm_cmpgt_epi8(in, _zero); |
483 |
| - lt = _mm_cmplt_epi8(in, nine_); |
484 |
| - mask = _mm_or_si128(mask, _mm_and_si128(lt, gt)); /* number */ |
485 |
| - mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, dot)); |
486 |
| - mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, minus)); |
487 |
| - mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, under)); |
| 461 | + const zend_vec_8x16_t _A = zend_vec_set_8x16('A' - 1); |
| 462 | + const zend_vec_8x16_t Z_ = zend_vec_set_8x16('Z' + 1); |
| 463 | + const zend_vec_8x16_t _a = zend_vec_set_8x16('a' - 1); |
| 464 | + const zend_vec_8x16_t z_ = zend_vec_set_8x16('z' + 1); |
| 465 | + const zend_vec_8x16_t _zero = zend_vec_set_8x16('0' - 1); |
| 466 | + const zend_vec_8x16_t nine_ = zend_vec_set_8x16('9' + 1); |
| 467 | + const zend_vec_8x16_t dot = zend_vec_set_8x16('.'); |
| 468 | + const zend_vec_8x16_t minus = zend_vec_set_8x16('-'); |
| 469 | + const zend_vec_8x16_t under = zend_vec_set_8x16('_'); |
| 470 | + |
| 471 | + zend_vec_8x16_t in = zend_vec_loadu_8x16((zend_vec_8x16_t *)from); |
| 472 | + |
| 473 | + zend_vec_8x16_t gt = zend_vec_cmpgt_8x16(in, _A); |
| 474 | + zend_vec_8x16_t lt = zend_vec_cmplt_8x16(in, Z_); |
| 475 | + mask = zend_vec_and_8x16(lt, gt); /* upper */ |
| 476 | + gt = zend_vec_cmpgt_8x16(in, _a); |
| 477 | + lt = zend_vec_cmplt_8x16(in, z_); |
| 478 | + mask = zend_vec_or_8x16(mask, zend_vec_and_8x16(lt, gt)); /* lower */ |
| 479 | + gt = zend_vec_cmpgt_8x16(in, _zero); |
| 480 | + lt = zend_vec_cmplt_8x16(in, nine_); |
| 481 | + mask = zend_vec_or_8x16(mask, zend_vec_and_8x16(lt, gt)); /* number */ |
| 482 | + mask = zend_vec_or_8x16(mask, zend_vec_cmpeq_8x16(in, dot)); |
| 483 | + mask = zend_vec_or_8x16(mask, zend_vec_cmpeq_8x16(in, minus)); |
| 484 | + mask = zend_vec_or_8x16(mask, zend_vec_cmpeq_8x16(in, under)); |
488 | 485 |
|
489 | 486 | if (!raw) {
|
490 |
| - const __m128i blank = _mm_set1_epi8(' '); |
491 |
| - __m128i eq = _mm_cmpeq_epi8(in, blank); |
492 |
| - if (_mm_movemask_epi8(eq)) { |
493 |
| - in = _mm_add_epi8(in, _mm_and_si128(eq, _mm_set1_epi8('+' - ' '))); |
494 |
| - mask = _mm_or_si128(mask, eq); |
| 487 | + const zend_vec_8x16_t blank = zend_vec_set_8x16(' '); |
| 488 | + zend_vec_8x16_t eq = zend_vec_cmpeq_8x16(in, blank); |
| 489 | + if (zend_vec_movemask_8x16(eq)) { |
| 490 | + in = zend_vec_add_8x16(in, zend_vec_and_8x16(eq, zend_vec_set_8x16('+' - ' '))); |
| 491 | + mask = zend_vec_or_8x16(mask, eq); |
495 | 492 | }
|
496 | 493 | }
|
497 | 494 | if (raw) {
|
498 |
| - const __m128i wavy = _mm_set1_epi8('~'); |
499 |
| - mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, wavy)); |
| 495 | + const zend_vec_8x16_t wavy = zend_vec_set_8x16('~'); |
| 496 | + mask = zend_vec_or_8x16(mask, zend_vec_cmpeq_8x16(in, wavy)); |
500 | 497 | }
|
501 |
| - if (((bits = _mm_movemask_epi8(mask)) & 0xffff) == 0xffff) { |
502 |
| - _mm_storeu_si128((__m128i*)to, in); |
| 498 | + if (((bits = zend_vec_movemask_8x16(mask)) & 0xffff) == 0xffff) { |
| 499 | + zend_vec_storeu_8x16((zend_vec_8x16_t*)to, in); |
503 | 500 | to += 16;
|
504 | 501 | } else {
|
505 | 502 | unsigned char xmm[16];
|
506 |
| - _mm_storeu_si128((__m128i*)xmm, in); |
| 503 | + zend_vec_storeu_8x16((zend_vec_8x16_t*)xmm, in); |
507 | 504 | for (size_t i = 0; i < sizeof(xmm); i++) {
|
508 | 505 | if ((bits & (0x1 << i))) {
|
509 | 506 | *to++ = xmm[i];
|
|
0 commit comments