@@ -199,11 +199,11 @@ bool ConvOp<T, Context>::RunOnDeviceWithOrderNHWC() {
199
199
const int M = filter.dim32 (0 );
200
200
CAFFE_ENFORCE_EQ (filter.dim32 (1 ), kernel_h ());
201
201
CAFFE_ENFORCE_EQ (filter.dim32 (2 ), kernel_w ());
202
- CAFFE_ENFORCE_EQ (filter.dim32 (3 ), C / group_ );
202
+ CAFFE_ENFORCE_EQ (filter.dim32 (3 ), C);
203
203
204
204
ConvPoolOpBase<Context>::SetOutputSize (X, Y, filter.dim32 (0 ));
205
205
// The dimension of each kernel
206
- const int kernel_dim = kernel_h () * kernel_w () * (C / group_) ;
206
+ const int kernel_dim = kernel_h () * kernel_w () * C ;
207
207
// The offset corresponding to a single input image, and a single output
208
208
// image.
209
209
const int input_offset = H * W * C;
@@ -224,7 +224,7 @@ bool ConvOp<T, Context>::RunOnDeviceWithOrderNHWC() {
224
224
}
225
225
// Specialized path for 1 by 1 convolution with stride 1, pad 0 - we
226
226
// can skip im2col.
227
- if (kernel_dim == (C / group_) && !HasPad () && !HasStride ()) {
227
+ if (kernel_dim == C && !HasPad () && !HasStride ()) {
228
228
const int HxW = X.size () / (N * C);
229
229
if (bias_data != nullptr ) {
230
230
ConvPoolOpBase<Context>::template SetBiasMultiplier<T>(
@@ -260,26 +260,20 @@ bool ConvOp<T, Context>::RunOnDeviceWithOrderNHWC() {
260
260
stride_w (),
261
261
X_data,
262
262
col_buffer_data,
263
- &context_,
264
- group_);
263
+ &context_);
265
264
// Weight term
266
- for (int group_id = 0 ; group_id < group_; ++group_id) {
267
- math::GemmEx<T, Context>(
268
- CblasNoTrans,
269
- CblasTrans,
270
- output_image_size,
271
- M / group_,
272
- kernel_dim,
273
- 1 ,
274
- col_buffer_data + group_id * kernel_dim,
275
- group_ * kernel_dim,
276
- filter_data + group_id * (M / group_) * kernel_dim,
277
- kernel_dim,
278
- 0 ,
279
- Y_data + group_id * (M / group_),
280
- M,
281
- &context_);
282
- }
265
+ math::Gemm<T, Context>(
266
+ CblasNoTrans,
267
+ CblasTrans,
268
+ output_image_size,
269
+ M,
270
+ kernel_dim,
271
+ 1 ,
272
+ col_buffer_data,
273
+ filter_data,
274
+ 0 ,
275
+ Y_data,
276
+ &context_);
283
277
if (bias_data != nullptr ) {
284
278
// Bias term
285
279
math::Gemm<T, Context>(
@@ -400,24 +394,19 @@ bool ConvOp<T, Context>::Run1x1ConvOnDeviceWithOrderNHWC(
400
394
const T* bias,
401
395
T* Y) {
402
396
const int G = group_;
403
- const int kernel_dim = C / G;
404
- for (int group_id = 0 ; group_id < group_; ++group_id) {
405
- math::GemmEx<T, Context>(
406
- CblasNoTrans,
407
- CblasTrans,
408
- N * HxW,
409
- M / group_,
410
- kernel_dim,
411
- 1 .0f ,
412
- X + group_id * kernel_dim,
413
- C,
414
- filter + group_id * (M / group_) * kernel_dim,
415
- kernel_dim,
416
- 0 .0f ,
417
- Y + group_id * (M / group_),
418
- M,
419
- &context_);
420
- }
397
+ CAFFE_ENFORCE_EQ (G, 1 );
398
+ math::Gemm<T, Context>(
399
+ CblasNoTrans,
400
+ CblasTrans,
401
+ N * HxW,
402
+ M,
403
+ C,
404
+ 1 .0f ,
405
+ X,
406
+ filter,
407
+ 0 .0f ,
408
+ Y,
409
+ &context_);
421
410
if (bias != nullptr ) {
422
411
const T* bias_multiplier_data = bias_multiplier_.template data <T>();
423
412
math::Gemm<T, Context>(
@@ -657,11 +646,11 @@ bool ConvGradientOp<T, Context>::RunOnDeviceWithOrderNHWC() {
657
646
const int M = filter.dim32 (0 );
658
647
CAFFE_ENFORCE_EQ (filter.dim32 (1 ), kernel_h ());
659
648
CAFFE_ENFORCE_EQ (filter.dim32 (2 ), kernel_w ());
660
- CAFFE_ENFORCE_EQ (filter.dim32 (3 ), C / group_ );
649
+ CAFFE_ENFORCE_EQ (filter.dim32 (3 ), C);
661
650
dfilter->ResizeLike (filter);
662
651
663
652
// The dimension of each kernel
664
- const int kernel_dim = kernel_h () * kernel_w () * (C / group_) ;
653
+ const int kernel_dim = kernel_h () * kernel_w () * C ;
665
654
// The offset corresponding to a single input image, and a single output
666
655
// image.
667
656
const int input_offset = H * W * C;
@@ -670,7 +659,7 @@ bool ConvGradientOp<T, Context>::RunOnDeviceWithOrderNHWC() {
670
659
const int output_image_size = dY.dim32 (1 ) * dY.dim32 (2 );
671
660
// The col buffer is stored in CHW order as well - kernel_dim, and the height
672
661
// and width.
673
- col_buffer_.Resize (output_image_size, group_ * kernel_dim);
662
+ col_buffer_.Resize (output_image_size, kernel_dim);
674
663
675
664
const T* Xdata = X.template data <T>();
676
665
const T* const filter_data = filter.template data <T>();
@@ -717,26 +706,20 @@ bool ConvGradientOp<T, Context>::RunOnDeviceWithOrderNHWC() {
717
706
stride_w (),
718
707
Xdata,
719
708
col_buffer_data,
720
- &context_,
721
- group_);
709
+ &context_);
722
710
// Gradient with respect to filter.
723
- for (int group_id = 0 ; group_id < group_; ++group_id) {
724
- math::GemmEx<T, Context>(
725
- CblasTrans,
726
- CblasNoTrans,
727
- M / group_,
728
- kernel_dim,
729
- output_image_size,
730
- 1 ,
731
- dYdata + output_offset * image_id + group_id * (M / group_),
732
- M,
733
- col_buffer_data + group_id * kernel_dim,
734
- group_ * kernel_dim,
735
- 1 ,
736
- dfilter_data + group_id * (M / group_) * kernel_dim,
737
- kernel_dim,
738
- &context_);
739
- }
711
+ math::Gemm<T, Context>(
712
+ CblasTrans,
713
+ CblasNoTrans,
714
+ M,
715
+ kernel_dim,
716
+ output_image_size,
717
+ 1 ,
718
+ dYdata + output_offset * image_id,
719
+ col_buffer_data,
720
+ 1 ,
721
+ dfilter_data,
722
+ &context_);
740
723
if (!no_bias_) {
741
724
// Gradient with respect to bias
742
725
math::Gemv<T, Context>(
@@ -760,23 +743,18 @@ bool ConvGradientOp<T, Context>::RunOnDeviceWithOrderNHWC() {
760
743
T* dXdata = dX->template mutable_data <T>();
761
744
for (int image_id = 0 ; image_id < N; ++image_id) {
762
745
// Compute gradient into col_buffer.
763
- for (int group_id = 0 ; group_id < group_; ++group_id) {
764
- math::GemmEx<T, Context>(
765
- CblasNoTrans,
766
- CblasNoTrans,
767
- output_image_size,
768
- kernel_dim,
769
- M / group_,
770
- 1 ,
771
- dYdata + output_offset * image_id + group_id * (M / group_),
772
- M,
773
- filter_data + group_id * (M / group_) * kernel_dim,
774
- kernel_dim,
775
- 0 ,
776
- col_buffer_data + group_id * kernel_dim,
777
- group_ * kernel_dim,
778
- &context_);
779
- }
746
+ math::Gemm<T, Context>(
747
+ CblasNoTrans,
748
+ CblasNoTrans,
749
+ output_image_size,
750
+ kernel_dim,
751
+ M,
752
+ 1 ,
753
+ dYdata + output_offset * image_id,
754
+ filter_data,
755
+ 0 ,
756
+ col_buffer_data,
757
+ &context_);
780
758
math::Col2Im<T, Context, StorageOrder::NHWC>(
781
759
C,
782
760
H,
@@ -793,8 +771,7 @@ bool ConvGradientOp<T, Context>::RunOnDeviceWithOrderNHWC() {
793
771
stride_w (),
794
772
col_buffer_data,
795
773
dXdata,
796
- &context_,
797
- group_);
774
+ &context_);
798
775
dXdata += input_offset;
799
776
}
800
777
}
0 commit comments