10
10
11
11
12
12
template <typename T>
13
- __global__ void RoIPoolForward (const int nthreads, const T* bottom_data ,
13
+ __global__ void RoIPoolForward (const int nthreads, const T* input ,
14
14
const T spatial_scale, const int channels, const int height,
15
15
const int width, const int pooled_height, const int pooled_width,
16
- const T* bottom_rois , T* top_data , int * argmax_data) {
16
+ const T* rois , T* output , int * argmax_data) {
17
17
CUDA_1D_KERNEL_LOOP (index , nthreads) {
18
18
// (n, c, ph, pw) is an element in the pooled output
19
19
int pw = index % pooled_width;
20
20
int ph = (index / pooled_width) % pooled_height;
21
21
int c = (index / pooled_width / pooled_height) % channels;
22
22
int n = index / pooled_width / pooled_height / channels;
23
23
24
- const T* offset_bottom_rois = bottom_rois + n * 5 ;
25
- int roi_batch_ind = offset_bottom_rois [0 ];
26
- int roi_start_w = round (offset_bottom_rois [1 ] * spatial_scale);
27
- int roi_start_h = round (offset_bottom_rois [2 ] * spatial_scale);
28
- int roi_end_w = round (offset_bottom_rois [3 ] * spatial_scale);
29
- int roi_end_h = round (offset_bottom_rois [4 ] * spatial_scale);
24
+ const T* offset_rois = rois + n * 5 ;
25
+ int roi_batch_ind = offset_rois [0 ];
26
+ int roi_start_w = round (offset_rois [1 ] * spatial_scale);
27
+ int roi_start_h = round (offset_rois [2 ] * spatial_scale);
28
+ int roi_end_w = round (offset_rois [3 ] * spatial_scale);
29
+ int roi_end_h = round (offset_rois [4 ] * spatial_scale);
30
30
31
- // Force malformed ROIs to be 1x1 or HxW
31
+ // Force malformed ROIs to be 1x1
32
32
int roi_width = max (roi_end_w - roi_start_w + 1 , 1 );
33
33
int roi_height = max (roi_end_h - roi_start_h + 1 , 1 );
34
34
T bin_size_h = static_cast <T>(roi_height)
@@ -56,28 +56,28 @@ __global__ void RoIPoolForward(const int nthreads, const T* bottom_data,
56
56
T maxval = is_empty ? 0 : -FLT_MAX;
57
57
// If nothing is pooled, argmax = -1 causes nothing to be backprop'd
58
58
int maxidx = -1 ;
59
- const T* offset_bottom_data =
60
- bottom_data + (roi_batch_ind * channels + c) * height * width;
59
+ const T* offset_input =
60
+ input + (roi_batch_ind * channels + c) * height * width;
61
61
for (int h = hstart; h < hend; ++h) {
62
62
for (int w = wstart; w < wend; ++w) {
63
- int bottom_index = h * width + w;
64
- if (offset_bottom_data[bottom_index ] > maxval) {
65
- maxval = offset_bottom_data[bottom_index ];
66
- maxidx = bottom_index ;
63
+ int input_index = h * width + w;
64
+ if (offset_input[input_index ] > maxval) {
65
+ maxval = offset_input[input_index ];
66
+ maxidx = input_index ;
67
67
}
68
68
}
69
69
}
70
- top_data [index ] = maxval;
70
+ output [index ] = maxval;
71
71
argmax_data[index ] = maxidx;
72
72
}
73
73
}
74
74
75
75
template <typename T>
76
- __global__ void RoIPoolBackward (const int nthreads, const T* top_grad ,
76
+ __global__ void RoIPoolBackward (const int nthreads, const T* grad_output ,
77
77
const int * argmax_data, const int num_rois, const T spatial_scale,
78
78
const int channels, const int height, const int width,
79
- const int pooled_height, const int pooled_width, T* bottom_data ,
80
- const T* bottom_rois ,
79
+ const int pooled_height, const int pooled_width, T* grad_input ,
80
+ const T* rois ,
81
81
const int n_stride, const int c_stride,
82
82
const int h_stride, const int w_stride) {
83
83
@@ -88,18 +88,17 @@ __global__ void RoIPoolBackward(const int nthreads, const T* top_grad,
88
88
int c = (index / pooled_width / pooled_height) % channels;
89
89
int n = index / pooled_width / pooled_height / channels;
90
90
91
- const T* offset_bottom_rois = bottom_rois + n * 5 ;
92
- int roi_batch_ind = offset_bottom_rois[0 ];
93
- int bottom_offset = (roi_batch_ind * channels + c) * height * width;
94
- T* bottom_data_offset = bottom_data + bottom_offset;
91
+ const T* offset_rois = rois + n * 5 ;
92
+ int roi_batch_ind = offset_rois[0 ];
93
+ T* grad_input_offset = grad_input + ((roi_batch_ind * channels + c) * height * width);
95
94
96
- int top_offset = n*n_stride + c*c_stride;
95
+ int output_offset = n*n_stride + c*c_stride;
97
96
const int * argmax_data_offset = argmax_data + n*channels*pooled_height*pooled_width;
98
97
int argmax = argmax_data_offset[c*pooled_height*pooled_width + ph*pooled_width + pw];
99
98
100
99
if (argmax != -1 ) {
101
- atomicAdd (bottom_data_offset + argmax,
102
- static_cast <T>(top_grad[top_offset + ph*h_stride + pw*w_stride]));
100
+ atomicAdd (grad_input_offset + argmax,
101
+ static_cast <T>(grad_output[output_offset + ph*h_stride + pw*w_stride]));
103
102
}
104
103
}
105
104
}
0 commit comments