Skip to content

Plukiche/vonmisses random #998

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

111 changes: 65 additions & 46 deletions dpnp/backend/kernels/dpnp_krnl_random.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1261,7 +1261,8 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da
_DataType s_minus_one, hpt, r_over_two_kappa_minus_one, rho_minus_one;
_DataType* Uvec = nullptr;
_DataType* Vvec = nullptr;
size_t* n = nullptr;
bool* result_ready = nullptr;
bool* result_mask = nullptr;
const _DataType d_zero = 0.0, d_one = 1.0;

assert(kappa > 1.0);
Expand All @@ -1277,50 +1278,59 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da

Uvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType)));
Vvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType)));
n = reinterpret_cast<size_t*>(dpnp_memory_alloc_c(sizeof(size_t)));
for (*n = 0; *n < size;)

result_ready = reinterpret_cast<bool*>(dpnp_memory_alloc_c(1 * sizeof(bool)));
result_ready[0] = false;
result_mask = reinterpret_cast<bool*>(dpnp_memory_alloc_c(size * sizeof(bool)));
dpnp_full_c<bool>(result_ready, result_mask, size);

while(!result_ready[0])
{
size_t diff_size = size - *n;
mkl_rng::uniform<_DataType> uniform_distribution_u(d_zero, 0.5 * M_PI);
auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, diff_size, Uvec);
auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, size, Uvec);
mkl_rng::uniform<_DataType> uniform_distribution_v(d_zero, d_one);
auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, diff_size, Vvec);
auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, size, Vvec);

cl::sycl::range<1> diff_gws(diff_size);
cl::sycl::range<1> gws(size);
auto paral_kernel_some = [&](cl::sycl::handler& cgh) {
cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event});
cgh.parallel_for(diff_gws, [=](cl::sycl::id<1> global_id) {
cgh.parallel_for(gws, [=](cl::sycl::id<1> global_id) {
size_t i = global_id[0];
if (!result_mask[i]) {
_DataType sn, cn, sn2, cn2;
_DataType neg_W_minus_one, V, Y;

_DataType sn, cn, sn2, cn2;
_DataType neg_W_minus_one, V, Y;

sn = cl::sycl::sin(Uvec[i]);
cn = cl::sycl::cos(Uvec[i]);
V = Vvec[i];
sn2 = sn * sn;
cn2 = cn * cn;
sn = cl::sycl::sin(Uvec[i]);
cn = cl::sycl::cos(Uvec[i]);
V = Vvec[i];
sn2 = sn * sn;
cn2 = cn * cn;

neg_W_minus_one = s_minus_one * sn2 / (0.5 * s_minus_one + cn2);
Y = kappa * (s_minus_one + neg_W_minus_one);
neg_W_minus_one = s_minus_one * sn2 / (0.5 * s_minus_one + cn2);
Y = kappa * (s_minus_one + neg_W_minus_one);

if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y))
{
Y = neg_W_minus_one * (2 - neg_W_minus_one);
if (Y < 0)
Y = 0.0;
else if (Y > 1.0)
Y = 1.0;
*n = *n + 1;
result1[*n] = cl::sycl::asin(cl::sycl::sqrt(Y));
if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y))
{
Y = neg_W_minus_one * (2 - neg_W_minus_one);
if (Y < 0)
Y = 0.0;
else if (Y > 1.0)
Y = 1.0;

result1[i] = cl::sycl::asin(cl::sycl::sqrt(Y));
result_mask[i] = true;
}
}
});
};
auto some_event = DPNP_QUEUE.submit(paral_kernel_some);
some_event.wait();

dpnp_all_c<bool, bool>(result_mask, result_ready, size);
}
dpnp_memory_free_c(Uvec);
dpnp_memory_free_c(n);
dpnp_memory_free_c(result_ready);
dpnp_memory_free_c(result_mask);

mkl_rng::uniform<_DataType> uniform_distribution(d_zero, d_one);
auto uniform_distr_event = mkl_rng::generate(uniform_distribution, DPNP_RNG_ENGINE, size, Vvec);
Expand Down Expand Up @@ -1359,7 +1369,8 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da
_DataType rho_over_kappa, rho, r, s_kappa;
_DataType* Uvec = nullptr;
_DataType* Vvec = nullptr;
size_t* n = nullptr;
bool* result_ready = nullptr;
bool* result_mask = nullptr;

const _DataType d_zero = 0.0, d_one = 1.0;

Expand All @@ -1374,39 +1385,47 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da

Uvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType)));
Vvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType)));
n = reinterpret_cast<size_t*>(dpnp_memory_alloc_c(sizeof(size_t)));

for (*n = 0; *n < size;)
result_ready = reinterpret_cast<bool*>(dpnp_memory_alloc_c(1 * sizeof(bool)));
result_ready[0] = false;
result_mask = reinterpret_cast<bool*>(dpnp_memory_alloc_c(size * sizeof(bool)));
dpnp_full_c<bool>(result_ready, result_mask, size);

while (!result_ready[0])
{
size_t diff_size = size - *n;
mkl_rng::uniform<_DataType> uniform_distribution_u(d_zero, M_PI);
auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, diff_size, Uvec);
auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, size, Uvec);
mkl_rng::uniform<_DataType> uniform_distribution_v(d_zero, d_one);
auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, diff_size, Vvec);
auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, size, Vvec);

cl::sycl::range<1> diff_gws((diff_size));
cl::sycl::range<1> gws((size));

auto paral_kernel_some = [&](cl::sycl::handler& cgh) {
cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event});
cgh.parallel_for(diff_gws, [=](cl::sycl::id<1> global_id) {
cgh.parallel_for(gws, [=](cl::sycl::id<1> global_id) {
size_t i = global_id[0];
_DataType Z, W, Y, V;
Z = cl::sycl::cos(Uvec[i]);
V = Vvec[i];
W = (kappa + s_kappa * Z) / (s_kappa + kappa * Z);
Y = s_kappa - kappa * W;
if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y))
{
*n = *n + 1;
result1[*n] = cl::sycl::acos(W);
if (!result_mask[i]) {
_DataType Z, W, Y, V;
Z = cl::sycl::cos(Uvec[i]);
V = Vvec[i];
W = (kappa + s_kappa * Z) / (s_kappa + kappa * Z);
Y = s_kappa - kappa * W;
if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y))
{
result1[i] = cl::sycl::acos(W);
result_mask[i] = true;
}
}
});
};
auto some_event = DPNP_QUEUE.submit(paral_kernel_some);
some_event.wait();

dpnp_all_c<bool, bool>(result_mask, result_ready, size);
}
dpnp_memory_free_c(Uvec);
dpnp_memory_free_c(n);
dpnp_memory_free_c(result_ready);
dpnp_memory_free_c(result_mask);

mkl_rng::uniform<_DataType> uniform_distribution(d_zero, d_one);
auto uniform_distr_event = mkl_rng::generate(uniform_distribution, DPNP_RNG_ENGINE, size, Vvec);
Expand Down