|
42 | 42 | #include "oops/oop.inline.hpp"
|
43 | 43 | #include "prims/methodHandles.hpp"
|
44 | 44 | #include "prims/upcallLinker.hpp"
|
| 45 | +#include "runtime/arguments.hpp" |
45 | 46 | #include "runtime/atomic.hpp"
|
46 | 47 | #include "runtime/continuation.hpp"
|
47 | 48 | #include "runtime/continuationEntry.inline.hpp"
|
@@ -8176,6 +8177,78 @@ class StubGenerator: public StubCodeGenerator {
|
8176 | 8177 | // }
|
8177 | 8178 | };
|
8178 | 8179 |
|
| 8180 | + void generate_vector_math_stubs() { |
| 8181 | + // Get native vector math stub routine addresses |
| 8182 | + void* libsleef = nullptr; |
| 8183 | + char ebuf[1024]; |
| 8184 | + char dll_name[JVM_MAXPATHLEN]; |
| 8185 | + if (os::dll_locate_lib(dll_name, sizeof(dll_name), Arguments::get_dll_dir(), "sleef")) { |
| 8186 | + libsleef = os::dll_load(dll_name, ebuf, sizeof ebuf); |
| 8187 | + } |
| 8188 | + if (libsleef == nullptr) { |
| 8189 | + log_info(library)("Failed to load native vector math library, %s!", ebuf); |
| 8190 | + return; |
| 8191 | + } |
| 8192 | + // Method naming convention |
| 8193 | + // All the methods are named as <OP><T><N>_<U><suffix> |
| 8194 | + // Where: |
| 8195 | + // <OP> is the operation name, e.g. sin |
| 8196 | + // <T> is optional to indicate float/double |
| 8197 | + // "f/d" for vector float/double operation |
| 8198 | + // <N> is the number of elements in the vector |
| 8199 | + // "2/4" for neon, and "x" for sve |
| 8200 | + // <U> is the precision level |
| 8201 | + // "u10/u05" represents 1.0/0.5 ULP error bounds |
| 8202 | + // We use "u10" for all operations by default |
| 8203 | + // But for those functions do not have u10 support, we use "u05" instead |
| 8204 | + // <suffix> indicates neon/sve |
| 8205 | + // "sve/advsimd" for sve/neon implementations |
| 8206 | + // e.g. sinfx_u10sve is the method for computing vector float sin using SVE instructions |
| 8207 | + // cosd2_u10advsimd is the method for computing 2 elements vector double cos using NEON instructions |
| 8208 | + // |
| 8209 | + log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "sleef" JNI_LIB_SUFFIX, p2i(libsleef)); |
| 8210 | + |
| 8211 | + // Math vector stubs implemented with SVE for scalable vector size. |
| 8212 | + if (UseSVE > 0) { |
| 8213 | + for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) { |
| 8214 | + int vop = VectorSupport::VECTOR_OP_MATH_START + op; |
| 8215 | + // Skip "tanh" because there is performance regression |
| 8216 | + if (vop == VectorSupport::VECTOR_OP_TANH) { |
| 8217 | + continue; |
| 8218 | + } |
| 8219 | + |
| 8220 | + // The native library does not support u10 level of "hypot". |
| 8221 | + const char* ulf = (vop == VectorSupport::VECTOR_OP_HYPOT) ? "u05" : "u10"; |
| 8222 | + |
| 8223 | + snprintf(ebuf, sizeof(ebuf), "%sfx_%ssve", VectorSupport::mathname[op], ulf); |
| 8224 | + StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf); |
| 8225 | + |
| 8226 | + snprintf(ebuf, sizeof(ebuf), "%sdx_%ssve", VectorSupport::mathname[op], ulf); |
| 8227 | + StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf); |
| 8228 | + } |
| 8229 | + } |
| 8230 | + |
| 8231 | + // Math vector stubs implemented with NEON for 64/128 bits vector size. |
| 8232 | + for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) { |
| 8233 | + int vop = VectorSupport::VECTOR_OP_MATH_START + op; |
| 8234 | + // Skip "tanh" because there is performance regression |
| 8235 | + if (vop == VectorSupport::VECTOR_OP_TANH) { |
| 8236 | + continue; |
| 8237 | + } |
| 8238 | + |
| 8239 | + // The native library does not support u10 level of "hypot". |
| 8240 | + const char* ulf = (vop == VectorSupport::VECTOR_OP_HYPOT) ? "u05" : "u10"; |
| 8241 | + |
| 8242 | + snprintf(ebuf, sizeof(ebuf), "%sf4_%sadvsimd", VectorSupport::mathname[op], ulf); |
| 8243 | + StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libsleef, ebuf); |
| 8244 | + |
| 8245 | + snprintf(ebuf, sizeof(ebuf), "%sf4_%sadvsimd", VectorSupport::mathname[op], ulf); |
| 8246 | + StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libsleef, ebuf); |
| 8247 | + |
| 8248 | + snprintf(ebuf, sizeof(ebuf), "%sd2_%sadvsimd", VectorSupport::mathname[op], ulf); |
| 8249 | + StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libsleef, ebuf); |
| 8250 | + } |
| 8251 | + } |
8179 | 8252 |
|
8180 | 8253 | // Initialization
|
8181 | 8254 | void generate_initial_stubs() {
|
@@ -8329,6 +8402,9 @@ class StubGenerator: public StubCodeGenerator {
|
8329 | 8402 | // because it's faster for the sizes of modulus we care about.
|
8330 | 8403 | StubRoutines::_montgomerySquare = g.generate_multiply();
|
8331 | 8404 | }
|
| 8405 | + |
| 8406 | + generate_vector_math_stubs(); |
| 8407 | + |
8332 | 8408 | #endif // COMPILER2
|
8333 | 8409 |
|
8334 | 8410 | if (UseChaCha20Intrinsics) {
|
@@ -8384,6 +8460,7 @@ class StubGenerator: public StubCodeGenerator {
|
8384 | 8460 | if (UseAdler32Intrinsics) {
|
8385 | 8461 | StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
|
8386 | 8462 | }
|
| 8463 | + |
8387 | 8464 | #endif // COMPILER2_OR_JVMCI
|
8388 | 8465 | }
|
8389 | 8466 |
|
|
0 commit comments