@@ -98,10 +98,10 @@ fused_dropout_cuda(const Tensor& self, double p, Generator * gen){
98
98
Tensor mask = self.type ().toScalarType (kByte ).tensor (self.sizes ());
99
99
const int64_t nelem = self.numel ();
100
100
int64_t block_size = 256 ;
101
- unsigned int blocks_per_sm = at::globalContext (). getCurrentDeviceProperties ()->maxThreadsPerMultiProcessor /block_size;
101
+ unsigned int blocks_per_sm = at::cuda:: getCurrentDeviceProperties ()->maxThreadsPerMultiProcessor /block_size;
102
102
dim3 dim_block (block_size);
103
103
dim3 grid ((nelem + block_size -1 )/block_size);
104
- grid.x = std::min ((unsigned int )at::globalContext (). getCurrentDeviceProperties ()->multiProcessorCount * blocks_per_sm, grid.x );
104
+ grid.x = std::min ((unsigned int )at::cuda:: getCurrentDeviceProperties ()->multiProcessorCount * blocks_per_sm, grid.x );
105
105
int64_t nrep = ((nelem - 1 )/(block_size*grid.x *UNROLL)+1 )*UNROLL;
106
106
if (cuda::detail::canUse32BitIndexMath (self)){
107
107
AT_DISPATCH_FLOATING_TYPES_AND_HALF (self.type (), " fused_dropout" , [&] {
@@ -115,10 +115,10 @@ fused_dropout_cuda(const Tensor& self, double p, Generator * gen){
115
115
mask_info.collapseDims (); // ret and mask are collapsed to 1d contiguous tensor
116
116
switch (self_info.dims ) {
117
117
case 1 :
118
- fused_dropout_kernel<scalar_t , accscalar_t , unsigned int , 1 ><<<grid, dim_block, 0 , globalContext(). getCurrentCUDAStream()>>> (self_info, ret_info, mask_info, nelem, pa, next_philox_seed (gen,nrep));
118
+ fused_dropout_kernel<scalar_t , accscalar_t , unsigned int , 1 ><<<grid, dim_block, 0 , at::cuda:: getCurrentCUDAStream()>>> (self_info, ret_info, mask_info, nelem, pa, next_philox_seed (gen,nrep));
119
119
break ;
120
120
default :
121
- fused_dropout_kernel<scalar_t , accscalar_t , unsigned int , -1 ><<<dim_block, grid, 0 , globalContext(). getCurrentCUDAStream()>>> (self_info, ret_info, mask_info, nelem, pa, next_philox_seed (gen,nrep));
121
+ fused_dropout_kernel<scalar_t , accscalar_t , unsigned int , -1 ><<<dim_block, grid, 0 , at::cuda:: getCurrentCUDAStream()>>> (self_info, ret_info, mask_info, nelem, pa, next_philox_seed (gen,nrep));
122
122
}
123
123
});
124
124
} else {
@@ -133,10 +133,10 @@ fused_dropout_cuda(const Tensor& self, double p, Generator * gen){
133
133
mask_info.collapseDims (); // ret and mask are collapsed to 1d contiguous tensor
134
134
switch (self_info.dims ) {
135
135
case 1 :
136
- fused_dropout_kernel<scalar_t , accscalar_t , uint64_t , 1 ><<<dim_block, grid, 0 , globalContext(). getCurrentCUDAStream()>>> (self_info, ret_info, mask_info, nelem, pa, next_philox_seed (gen,nrep));
136
+ fused_dropout_kernel<scalar_t , accscalar_t , uint64_t , 1 ><<<dim_block, grid, 0 , at::cuda:: getCurrentCUDAStream()>>> (self_info, ret_info, mask_info, nelem, pa, next_philox_seed (gen,nrep));
137
137
break ;
138
138
default :
139
- fused_dropout_kernel<scalar_t , accscalar_t , uint64_t , -1 ><<<dim_block, grid, 0 , globalContext(). getCurrentCUDAStream()>>> (self_info, ret_info, mask_info, nelem, pa, next_philox_seed (gen,nrep));
139
+ fused_dropout_kernel<scalar_t , accscalar_t , uint64_t , -1 ><<<dim_block, grid, 0 , at::cuda:: getCurrentCUDAStream()>>> (self_info, ret_info, mask_info, nelem, pa, next_philox_seed (gen,nrep));
140
140
}
141
141
});
142
142
}
0 commit comments