Skip to content

Commit ae176af

Browse files
author
Natalia Gimelshein
committed
fixes for pytorch#9435
1 parent 30c0373 commit ae176af

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

aten/src/ATen/native/cuda/Dropout.cu

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,10 @@ fused_dropout_cuda(const Tensor& self, double p, Generator * gen){
9898
Tensor mask = self.type().toScalarType(kByte).tensor(self.sizes());
9999
const int64_t nelem = self.numel();
100100
int64_t block_size = 256;
101-
unsigned int blocks_per_sm = at::globalContext().getCurrentDeviceProperties()->maxThreadsPerMultiProcessor/block_size;
101+
unsigned int blocks_per_sm = at::cuda::getCurrentDeviceProperties()->maxThreadsPerMultiProcessor/block_size;
102102
dim3 dim_block(block_size);
103103
dim3 grid((nelem + block_size -1)/block_size);
104-
grid.x = std::min((unsigned int)at::globalContext().getCurrentDeviceProperties()->multiProcessorCount * blocks_per_sm, grid.x);
104+
grid.x = std::min((unsigned int)at::cuda::getCurrentDeviceProperties()->multiProcessorCount * blocks_per_sm, grid.x);
105105
int64_t nrep = ((nelem - 1)/(block_size*grid.x*UNROLL)+1)*UNROLL;
106106
if (cuda::detail::canUse32BitIndexMath(self)){
107107
AT_DISPATCH_FLOATING_TYPES_AND_HALF(self.type(), "fused_dropout", [&] {
@@ -115,10 +115,10 @@ fused_dropout_cuda(const Tensor& self, double p, Generator * gen){
115115
mask_info.collapseDims(); //ret and mask are collapsed to 1d contiguous tensor
116116
switch (self_info.dims) {
117117
case 1:
118-
fused_dropout_kernel<scalar_t, accscalar_t, unsigned int, 1><<<grid, dim_block, 0, globalContext().getCurrentCUDAStream()>>>(self_info, ret_info, mask_info, nelem, pa, next_philox_seed(gen,nrep));
118+
fused_dropout_kernel<scalar_t, accscalar_t, unsigned int, 1><<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>(self_info, ret_info, mask_info, nelem, pa, next_philox_seed(gen,nrep));
119119
break;
120120
default:
121-
fused_dropout_kernel<scalar_t, accscalar_t, unsigned int, -1><<<dim_block, grid, 0, globalContext().getCurrentCUDAStream()>>>(self_info, ret_info, mask_info, nelem, pa, next_philox_seed(gen,nrep));
121+
fused_dropout_kernel<scalar_t, accscalar_t, unsigned int, -1><<<dim_block, grid, 0, at::cuda::getCurrentCUDAStream()>>>(self_info, ret_info, mask_info, nelem, pa, next_philox_seed(gen,nrep));
122122
}
123123
});
124124
} else {
@@ -133,10 +133,10 @@ fused_dropout_cuda(const Tensor& self, double p, Generator * gen){
133133
mask_info.collapseDims(); //ret and mask are collapsed to 1d contiguous tensor
134134
switch (self_info.dims) {
135135
case 1:
136-
fused_dropout_kernel<scalar_t, accscalar_t, uint64_t, 1><<<dim_block, grid, 0, globalContext().getCurrentCUDAStream()>>>(self_info, ret_info, mask_info, nelem, pa, next_philox_seed(gen,nrep));
136+
fused_dropout_kernel<scalar_t, accscalar_t, uint64_t, 1><<<dim_block, grid, 0, at::cuda::getCurrentCUDAStream()>>>(self_info, ret_info, mask_info, nelem, pa, next_philox_seed(gen,nrep));
137137
break;
138138
default:
139-
fused_dropout_kernel<scalar_t, accscalar_t, uint64_t, -1><<<dim_block, grid, 0, globalContext().getCurrentCUDAStream()>>>(self_info, ret_info, mask_info, nelem, pa, next_philox_seed(gen,nrep));
139+
fused_dropout_kernel<scalar_t, accscalar_t, uint64_t, -1><<<dim_block, grid, 0, at::cuda::getCurrentCUDAStream()>>>(self_info, ret_info, mask_info, nelem, pa, next_philox_seed(gen,nrep));
140140
}
141141
});
142142
}

0 commit comments

Comments
 (0)