Skip to content

Commit 66ac1fd

Browse files
committed
use zend_simd.h in ZendAccelerator.c
1 parent 2e4dfd5 commit 66ac1fd

File tree

1 file changed

+106
-0
lines changed

1 file changed

+106
-0
lines changed

Zend/zend_simd.h

+106
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
/*
2+
+----------------------------------------------------------------------+
3+
| Zend Engine |
4+
+----------------------------------------------------------------------+
5+
| Copyright (c) Zend Technologies Ltd. (http://www.zend.com) |
6+
+----------------------------------------------------------------------+
7+
| This source file is subject to version 2.00 of the Zend license, |
8+
| that is bundled with this package in the file LICENSE, and is |
9+
| available through the world-wide-web at the following url: |
10+
| http://www.zend.com/license/2_00.txt. |
11+
| If you did not receive a copy of the Zend license and are unable to |
12+
| obtain it through the world-wide-web, please send a note to |
13+
| [email protected] so we can mail you a copy immediately. |
14+
+----------------------------------------------------------------------+
15+
| Authors: Saki Takamachi <[email protected]> |
16+
+----------------------------------------------------------------------+
17+
*/
18+
19+
#ifndef ZEND_SIMD_H
20+
#define ZEND_SIMD_H
21+
22+
#if defined(__SSE2__) || defined(__aarch64__) || defined(_M_ARM64)
23+
24+
#define ZEND_HAVE_SIMD
25+
26+
#ifdef __SSE2__
27+
#include <emmintrin.h>
28+
29+
typedef __m128i zend_vec_8x16_t;
30+
typedef __m128i zend_vec_16x8_t;
31+
typedef __m128i zend_vec_32x4_t;
32+
typedef __m128i zend_vec_64x2_t;
33+
34+
#define zend_vec_setzero_8x16() _mm_setzero_si128()
35+
#define zend_vec_set_8x16(x) _mm_set1_epi8(x)
36+
#define zend_vec_set_8x16_from_16x8(x0, x1, x2, x3, x4, x5, x6, x7) _mm_set_epi16(x0, x1, x2, x3, x4, x5, x6, x7)
37+
#define zend_vec_set_8x16_from_32x4(x0, x1, x2, x3) _mm_set_epi32(x0, x1, x2, x3)
38+
#define zend_vec_set_8x16_from_64x2(x0, x1) _mm_set_epi64(x0, x1)
39+
#define zend_vec_load_8x16(x) _mm_load_si128((const __m128i *) (x))
40+
#define zend_vec_loadu_8x16(x) _mm_loadu_si128((const __m128i *) (x))
41+
#define zend_vec_store_8x16(to, x) _mm_store_si128((__m128i *) (to), x)
42+
#define zend_vec_storeu_8x16(to, x) _mm_storeu_si128((__m128i *) (to), x)
43+
44+
#define zend_vec_or_8x16(a, b) _mm_or_si128(a, b)
45+
#define zend_vec_xor_8x16(a, b) _mm_xor_si128(a, b)
46+
#define zend_vec_and_8x16(a, b) _mm_and_si128(a, b)
47+
#define zend_vec_rshift_128_from_8x16(x, bytes) _mm_srli_si128(x, bytes)
48+
#define zend_vec_lshift_128_from_8x16(x, bytes) _mm_slli_si128(x, bytes)
49+
50+
#define zend_vec_add_8x16(a, b) _mm_add_epi8(a, b)
51+
52+
#define zend_vec_cmpeq_8x16(a, b) _mm_cmpeq_epi8(a, b)
53+
#define zend_vec_cmplt_8x16(a, b) _mm_cmplt_epi8(a, b)
54+
#define zend_vec_cmpgt_8x16(a, b) _mm_cmpgt_epi8(a, b)
55+
56+
#define zend_vec_movemask_8x16(x) _mm_movemask_epi8(x)
57+
58+
59+
#elif defined(__aarch64__) || defined(_M_ARM64)
60+
#include <arm_neon.h>
61+
62+
typedef int8x16_t zend_vec_8x16_t;
63+
typedef int16x8_t zend_vec_16x8_t;
64+
typedef int32x4_t zend_vec_32x4_t;
65+
typedef int64x2_t zend_vec_64x2_t;
66+
67+
#define zend_vec_setzero_8x16() vdupq_n_s8(0)
68+
#define zend_vec_set_8x16(x) vdupq_n_s8(x)
69+
#define zend_vec_set_8x16_from_16x8(x0, x1, x2, x3, x4, x5, x6, x7) \
70+
vreinterpretq_s8_s16((int16x8_t) { \
71+
(int16_t) (x7), (int16_t) (x6), (int16_t) (x5), (int16_t) (x4), \
72+
(int16_t) (x3), (int16_t) (x2), (int16_t) (x1), (int16_t) (x0) })
73+
#define zend_vec_set_8x16_from_32x4(x0, x1, x2, x3) \
74+
vreinterpretq_s8_s32((int32x4_t) { (int32_t) (x3), (int32_t) (x2), (int32_t) (x1), (int32_t) (x0) })
75+
#define zend_vec_set_8x16_from_64x2(x0, x1) vreinterpretq_s8_s64((int64x2_t) { (int64_t) (x1), (int64_t) (x0) })
76+
#define zend_vec_load_8x16(x) vld1q_s8((const int8_t *) (x))
77+
#define zend_vec_loadu_8x16(x) zend_vec_load_8x16(x)
78+
#define zend_vec_store_8x16(to, x) vst1q_s8((int8_t *) (to), x)
79+
#define zend_vec_storeu_8x16(to, x) zend_vec_store_8x16(to, x)
80+
81+
#define zend_vec_or_8x16(a, b) vorrq_s8(a, b)
82+
#define zend_vec_xor_8x16(a, b) veorq_s8(a, b)
83+
#define zend_vec_and_8x16(a, b) vandq_s8(a, b)
84+
#define zend_vec_rshift_128_from_8x16(x, bytes) vreinterpretq_s8_u8(vextq_u8(vdupq_n_u8(0), vreinterpretq_u8_s8(x), bytes))
85+
#define zend_vec_lshift_128_from_8x16(x, bytes) vreinterpretq_s8_u8(vextq_u8(vreinterpretq_u8_s8(x), vdupq_n_u8(0), 16 - bytes))
86+
87+
#define zend_vec_add_8x16(a, b) vaddq_s8(a, b)
88+
89+
#define zend_vec_cmpeq_8x16(a, b) (vreinterpretq_s8_u8(vceqq_s8(a, b)))
90+
#define zend_vec_cmplt_8x16(a, b) (vreinterpretq_s8_u8(vcltq_s8(a, b)))
91+
#define zend_vec_cmpgt_8x16(a, b) (vreinterpretq_s8_u8(vcgtq_s8(a, b)))
92+
93+
static zend_always_inline int zend_vec_movemask_8x16(int8x16_t x)
94+
{
95+
uint16x8_t high_bits = vreinterpretq_u16_u8(vshrq_n_u8(vreinterpretq_u8_s8(x), 7));
96+
uint32x4_t paired16 = vreinterpretq_u32_u16(vsraq_n_u16(high_bits, high_bits, 7));
97+
uint64x2_t paired32 = vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 14));
98+
uint8x16_t paired64 = vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 28));
99+
return vgetq_lane_u8(paired64, 0) | ((int) vgetq_lane_u8(paired64, 8) << 8);
100+
}
101+
102+
#endif
103+
104+
#endif /* defined(__SSE2__) || defined(__aarch64__) || defined(_M_ARM64) */
105+
106+
#endif /* ZEND_SIMD_H */

0 commit comments

Comments
 (0)