From e96c9c48c06c944f89b7a5f02e0faf4ee645f326 Mon Sep 17 00:00:00 2001 From: Lukicheva Polina Date: Thu, 7 Oct 2021 05:01:05 -0500 Subject: [PATCH 1/4] Fix race condition in dpnp_rng_vonmises_small_kappa_c and dpnp_rng_vonmises_large_kappa_c --- dpnp/backend/kernels/dpnp_krnl_random.cpp | 64 ++++++++++++++--------- 1 file changed, 40 insertions(+), 24 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp index 189952af2d09..30c470538864 100644 --- a/dpnp/backend/kernels/dpnp_krnl_random.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp @@ -1261,7 +1261,8 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da _DataType s_minus_one, hpt, r_over_two_kappa_minus_one, rho_minus_one; _DataType* Uvec = nullptr; _DataType* Vvec = nullptr; - size_t* n = nullptr; + bool* mask_checker = nullptr; + bool* mask = nullptr; const _DataType d_zero = 0.0, d_one = 1.0; assert(kappa > 1.0); @@ -1277,19 +1278,23 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da Uvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); Vvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); - n = reinterpret_cast(dpnp_memory_alloc_c(sizeof(size_t))); - for (*n = 0; *n < size;) + + mask_checker = (bool*)dpnp_memory_alloc_c(1 * sizeof(bool)); + mask_checker[0] = false; + mask = reinterpret_cast(dpnp_memory_alloc_c(size * sizeof(bool))); + dpnp_full_c(mask_checker, mask, size); + + while(!mask_checker[0]) { - size_t diff_size = size - *n; mkl_rng::uniform<_DataType> uniform_distribution_u(d_zero, 0.5 * M_PI); - auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, diff_size, Uvec); + auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, size, Uvec); mkl_rng::uniform<_DataType> uniform_distribution_v(d_zero, d_one); - auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, diff_size, Vvec); + auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, size, Vvec); - cl::sycl::range<1> diff_gws(diff_size); + cl::sycl::range<1> gws(size); auto paral_kernel_some = [&](cl::sycl::handler& cgh) { cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event}); - cgh.parallel_for(diff_gws, [=](cl::sycl::id<1> global_id) { + cgh.parallel_for(gws, [=](cl::sycl::id<1> global_id) { size_t i = global_id[0]; _DataType sn, cn, sn2, cn2; @@ -1304,23 +1309,27 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da neg_W_minus_one = s_minus_one * sn2 / (0.5 * s_minus_one + cn2); Y = kappa * (s_minus_one + neg_W_minus_one); - if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y)) + if (((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y)) && !mask[i]) { Y = neg_W_minus_one * (2 - neg_W_minus_one); if (Y < 0) Y = 0.0; else if (Y > 1.0) Y = 1.0; - *n = *n + 1; - result1[*n] = cl::sycl::asin(cl::sycl::sqrt(Y)); + + result1[i] = cl::sycl::asin(cl::sycl::sqrt(Y)); + mask[i] = true; } }); }; auto some_event = DPNP_QUEUE.submit(paral_kernel_some); some_event.wait(); + + dpnp_all_c(mask, mask_checker, size); } dpnp_memory_free_c(Uvec); - dpnp_memory_free_c(n); + dpnp_memory_free_c(mask_checker); + dpnp_memory_free_c(mask); mkl_rng::uniform<_DataType> uniform_distribution(d_zero, d_one); auto uniform_distr_event = mkl_rng::generate(uniform_distribution, DPNP_RNG_ENGINE, size, Vvec); @@ -1359,7 +1368,8 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da _DataType rho_over_kappa, rho, r, s_kappa; _DataType* Uvec = nullptr; _DataType* Vvec = nullptr; - size_t* n = nullptr; + bool* mask_checker = nullptr; + bool* mask = nullptr; const _DataType d_zero = 0.0, d_one = 1.0; @@ -1374,39 +1384,45 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da Uvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); Vvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); - n = reinterpret_cast(dpnp_memory_alloc_c(sizeof(size_t))); - for (*n = 0; *n < size;) + mask_checker = (bool*)dpnp_memory_alloc_c(1 * sizeof(bool)); + mask_checker[0] = false; + mask = reinterpret_cast(dpnp_memory_alloc_c(size * sizeof(bool))); + dpnp_full_c(mask_checker, mask, size); + + while (!mask_checker[0]) { - size_t diff_size = size - *n; mkl_rng::uniform<_DataType> uniform_distribution_u(d_zero, M_PI); - auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, diff_size, Uvec); + auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, size, Uvec); mkl_rng::uniform<_DataType> uniform_distribution_v(d_zero, d_one); - auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, diff_size, Vvec); + auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, size, Vvec); - cl::sycl::range<1> diff_gws((diff_size)); + cl::sycl::range<1> gws((size)); auto paral_kernel_some = [&](cl::sycl::handler& cgh) { cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event}); - cgh.parallel_for(diff_gws, [=](cl::sycl::id<1> global_id) { + cgh.parallel_for(gws, [=](cl::sycl::id<1> global_id) { size_t i = global_id[0]; _DataType Z, W, Y, V; Z = cl::sycl::cos(Uvec[i]); V = Vvec[i]; W = (kappa + s_kappa * Z) / (s_kappa + kappa * Z); Y = s_kappa - kappa * W; - if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y)) + if (((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y)) && !mask[i]) { - *n = *n + 1; - result1[*n] = cl::sycl::acos(W); + result1[i] = cl::sycl::acos(W); + mask[i] = true; } }); }; auto some_event = DPNP_QUEUE.submit(paral_kernel_some); some_event.wait(); + + dpnp_all_c(mask, mask_checker, size); } dpnp_memory_free_c(Uvec); - dpnp_memory_free_c(n); + dpnp_memory_free_c(mask_checker); + dpnp_memory_free_c(mask); mkl_rng::uniform<_DataType> uniform_distribution(d_zero, d_one); auto uniform_distr_event = mkl_rng::generate(uniform_distribution, DPNP_RNG_ENGINE, size, Vvec); From b467566075d8416acb13eb7fa94c2b0518eab7f8 Mon Sep 17 00:00:00 2001 From: Lukicheva Polina Date: Thu, 7 Oct 2021 07:11:07 -0500 Subject: [PATCH 2/4] Rename arrays and change if condition from kernels in dpnp_rng_vonmises_large_kappa_c and dpnp_rng_vonmises_small_kappa_c --- dpnp/backend/kernels/dpnp_krnl_random.cpp | 101 +++++++++++----------- 1 file changed, 52 insertions(+), 49 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp index 7bb73124d63d..9bda5072ec99 100644 --- a/dpnp/backend/kernels/dpnp_krnl_random.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp @@ -1261,8 +1261,8 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da _DataType s_minus_one, hpt, r_over_two_kappa_minus_one, rho_minus_one; _DataType* Uvec = nullptr; _DataType* Vvec = nullptr; - bool* mask_checker = nullptr; - bool* mask = nullptr; + bool* result_ready = nullptr; + bool* result_mask = nullptr; const _DataType d_zero = 0.0, d_one = 1.0; assert(kappa > 1.0); @@ -1279,12 +1279,12 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da Uvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); Vvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); - mask_checker = (bool*)dpnp_memory_alloc_c(1 * sizeof(bool)); - mask_checker[0] = false; - mask = reinterpret_cast(dpnp_memory_alloc_c(size * sizeof(bool))); - dpnp_full_c(mask_checker, mask, size); + result_ready = reinterpret_cast(dpnp_memory_alloc_c(1 * sizeof(bool))); + result_ready[0] = false; + result_mask = reinterpret_cast(dpnp_memory_alloc_c(size * sizeof(bool))); + dpnp_full_c(result_ready, result_mask, size); - while(!mask_checker[0]) + while(!result_ready[0]) { mkl_rng::uniform<_DataType> uniform_distribution_u(d_zero, 0.5 * M_PI); auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, size, Uvec); @@ -1296,40 +1296,41 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event}); cgh.parallel_for(gws, [=](cl::sycl::id<1> global_id) { size_t i = global_id[0]; + if (!result_mask[0]){ + _DataType sn, cn, sn2, cn2; + _DataType neg_W_minus_one, V, Y; - _DataType sn, cn, sn2, cn2; - _DataType neg_W_minus_one, V, Y; + sn = cl::sycl::sin(Uvec[i]); + cn = cl::sycl::cos(Uvec[i]); + V = Vvec[i]; + sn2 = sn * sn; + cn2 = cn * cn; - sn = cl::sycl::sin(Uvec[i]); - cn = cl::sycl::cos(Uvec[i]); - V = Vvec[i]; - sn2 = sn * sn; - cn2 = cn * cn; + neg_W_minus_one = s_minus_one * sn2 / (0.5 * s_minus_one + cn2); + Y = kappa * (s_minus_one + neg_W_minus_one); - neg_W_minus_one = s_minus_one * sn2 / (0.5 * s_minus_one + cn2); - Y = kappa * (s_minus_one + neg_W_minus_one); - - if (((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y)) && !mask[i]) - { - Y = neg_W_minus_one * (2 - neg_W_minus_one); - if (Y < 0) - Y = 0.0; - else if (Y > 1.0) - Y = 1.0; - - result1[i] = cl::sycl::asin(cl::sycl::sqrt(Y)); - mask[i] = true; + if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y)) + { + Y = neg_W_minus_one * (2 - neg_W_minus_one); + if (Y < 0) + Y = 0.0; + else if (Y > 1.0) + Y = 1.0; + + result1[i] = cl::sycl::asin(cl::sycl::sqrt(Y)); + result_mask[i] = true; + } } }); }; auto some_event = DPNP_QUEUE.submit(paral_kernel_some); some_event.wait(); - dpnp_all_c(mask, mask_checker, size); + dpnp_all_c(result_mask, result_ready, size); } dpnp_memory_free_c(Uvec); - dpnp_memory_free_c(mask_checker); - dpnp_memory_free_c(mask); + dpnp_memory_free_c(result_ready); + dpnp_memory_free_c(result_mask); mkl_rng::uniform<_DataType> uniform_distribution(d_zero, d_one); auto uniform_distr_event = mkl_rng::generate(uniform_distribution, DPNP_RNG_ENGINE, size, Vvec); @@ -1368,8 +1369,8 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da _DataType rho_over_kappa, rho, r, s_kappa; _DataType* Uvec = nullptr; _DataType* Vvec = nullptr; - bool* mask_checker = nullptr; - bool* mask = nullptr; + bool* result_ready = nullptr; + bool* result_mask = nullptr; const _DataType d_zero = 0.0, d_one = 1.0; @@ -1385,12 +1386,12 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da Uvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); Vvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); - mask_checker = (bool*)dpnp_memory_alloc_c(1 * sizeof(bool)); - mask_checker[0] = false; - mask = reinterpret_cast(dpnp_memory_alloc_c(size * sizeof(bool))); - dpnp_full_c(mask_checker, mask, size); + result_ready = reinterpret_cast(dpnp_memory_alloc_c(1 * sizeof(bool))); + result_ready[0] = false; + result_mask = reinterpret_cast(dpnp_memory_alloc_c(size * sizeof(bool))); + dpnp_full_c(result_ready, result_mask, size); - while (!mask_checker[0]) + while (!result_ready[0]) { mkl_rng::uniform<_DataType> uniform_distribution_u(d_zero, M_PI); auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, size, Uvec); @@ -1403,26 +1404,28 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event}); cgh.parallel_for(gws, [=](cl::sycl::id<1> global_id) { size_t i = global_id[0]; - _DataType Z, W, Y, V; - Z = cl::sycl::cos(Uvec[i]); - V = Vvec[i]; - W = (kappa + s_kappa * Z) / (s_kappa + kappa * Z); - Y = s_kappa - kappa * W; - if (((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y)) && !mask[i]) - { - result1[i] = cl::sycl::acos(W); - mask[i] = true; + if (!result_mask[0]) { + _DataType Z, W, Y, V; + Z = cl::sycl::cos(Uvec[i]); + V = Vvec[i]; + W = (kappa + s_kappa * Z) / (s_kappa + kappa * Z); + Y = s_kappa - kappa * W; + if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y)) + { + result1[i] = cl::sycl::acos(W); + result_mask[i] = true; + } } }); }; auto some_event = DPNP_QUEUE.submit(paral_kernel_some); some_event.wait(); - dpnp_all_c(mask, mask_checker, size); + dpnp_all_c(result_mask, result_ready, size); } dpnp_memory_free_c(Uvec); - dpnp_memory_free_c(mask_checker); - dpnp_memory_free_c(mask); + dpnp_memory_free_c(result_ready); + dpnp_memory_free_c(result_mask); mkl_rng::uniform<_DataType> uniform_distribution(d_zero, d_one); auto uniform_distr_event = mkl_rng::generate(uniform_distribution, DPNP_RNG_ENGINE, size, Vvec); From e660f721687cdc505a84ca17efb6a835fa52aa1f Mon Sep 17 00:00:00 2001 From: Lukicheva Polina Date: Thu, 7 Oct 2021 07:58:44 -0500 Subject: [PATCH 3/4] Add space --- dpnp/backend/kernels/dpnp_krnl_random.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp index 9bda5072ec99..3fdd364d450a 100644 --- a/dpnp/backend/kernels/dpnp_krnl_random.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp @@ -1296,7 +1296,7 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event}); cgh.parallel_for(gws, [=](cl::sycl::id<1> global_id) { size_t i = global_id[0]; - if (!result_mask[0]){ + if (!result_mask[0]) { _DataType sn, cn, sn2, cn2; _DataType neg_W_minus_one, V, Y; From ec7efa46272121b4b3fd604bc8737076621c0459 Mon Sep 17 00:00:00 2001 From: Lukicheva Polina Date: Thu, 7 Oct 2021 08:03:22 -0500 Subject: [PATCH 4/4] Fix indices in dpnp_rng_vonmises_small_kappa_c and dpnp_rng_vonmises_large_kappa_c --- dpnp/backend/kernels/dpnp_krnl_random.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp index 3fdd364d450a..a205e98a6b93 100644 --- a/dpnp/backend/kernels/dpnp_krnl_random.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp @@ -1296,7 +1296,7 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event}); cgh.parallel_for(gws, [=](cl::sycl::id<1> global_id) { size_t i = global_id[0]; - if (!result_mask[0]) { + if (!result_mask[i]) { _DataType sn, cn, sn2, cn2; _DataType neg_W_minus_one, V, Y; @@ -1404,7 +1404,7 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event}); cgh.parallel_for(gws, [=](cl::sycl::id<1> global_id) { size_t i = global_id[0]; - if (!result_mask[0]) { + if (!result_mask[i]) { _DataType Z, W, Y, V; Z = cl::sycl::cos(Uvec[i]); V = Vvec[i];