|
5 | 5 | #include <THC/THCAtomics.cuh>
|
6 | 6 | #include <THC/THCDeviceUtils.cuh>
|
7 | 7 |
|
8 |
| -// TODO make it in a common file |
9 |
| -#define CUDA_1D_KERNEL_LOOP(i, n) \ |
10 |
| - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ |
11 |
| - i += blockDim.x * gridDim.x) |
| 8 | +#include "cuda_helpers.h" |
12 | 9 |
|
13 | 10 |
|
14 | 11 | template <typename T>
|
@@ -48,7 +45,7 @@ __device__ T bilinear_interpolate(const T* input,
|
48 | 45 | T ly = y - y_low;
|
49 | 46 | T lx = x - x_low;
|
50 | 47 | T hy = 1. - ly, hx = 1. - lx;
|
51 |
| - |
| 48 | + |
52 | 49 | // do bilinear interpolation
|
53 | 50 | T v1 = input[y_low * width + x_low];
|
54 | 51 | T v2 = input[y_low * width + x_high];
|
@@ -171,7 +168,7 @@ __device__ void bilinear_interpolate_gradient(
|
171 | 168 | }
|
172 | 169 |
|
173 | 170 | template <typename T>
|
174 |
| -__global__ void RoIAlignBackwardFeature(const int nthreads, const T* grad_output, |
| 171 | +__global__ void RoIAlignBackward(const int nthreads, const T* grad_output, |
175 | 172 | const int num_rois, const T spatial_scale,
|
176 | 173 | const int channels, const int height, const int width,
|
177 | 174 | const int pooled_height, const int pooled_width,
|
@@ -327,7 +324,7 @@ at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad,
|
327 | 324 | int w_stride = grad.stride(3);
|
328 | 325 |
|
329 | 326 | AT_DISPATCH_FLOATING_TYPES(grad.type(), "ROIAlign_backward", [&] {
|
330 |
| - RoIAlignBackwardFeature<scalar_t><<<grid, block, 0, stream>>>( |
| 327 | + RoIAlignBackward<scalar_t><<<grid, block, 0, stream>>>( |
331 | 328 | grad.numel(),
|
332 | 329 | grad.data<scalar_t>(),
|
333 | 330 | num_rois,
|
|
0 commit comments