Fix arguments order of deform_conv2d

Licht-T · Licht-T · commit 2c60f5cc27c9 · 2020-10-19T01:04:57.000+09:00
diff --git a/test/test_ops.py b/test/test_ops.py
@@ -567,35 +567,31 @@ def _test_backward_with_batchsize(self, device, contiguous, batch_sz):
         x, weight, offset, mask, bias, stride, padding, dilation = self.get_fn_args(device, contiguous, batch_sz)
 
         def func(x_, offset_, mask_, weight_, bias_):
-            return ops.deform_conv2d(x_, offset_, mask_,
-                                     weight_, bias_, stride=stride,
-                                     padding=padding, dilation=dilation)
+            return ops.deform_conv2d(x_, offset_, weight_, bias_, stride=stride,
+                                     padding=padding, dilation=dilation, mask=mask_)
 
         gradcheck(func, (x, offset, mask, weight, bias), nondet_tol=1e-5)
 
         def func_no_mask(x_, offset_, weight_, bias_):
-            return ops.deform_conv2d(x_, offset_, None,
-                                     weight_, bias_, stride=stride,
-                                     padding=padding, dilation=dilation)
+            return ops.deform_conv2d(x_, offset_, weight_, bias_, stride=stride,
+                                     padding=padding, dilation=dilation, mask=None)
 
         gradcheck(func_no_mask, (x, offset, weight, bias), nondet_tol=1e-5)
 
         @torch.jit.script
         def script_func(x_, offset_, mask_, weight_, bias_, stride_, pad_, dilation_):
             # type:(Tensor, Tensor, Tensor, Tensor, Tensor, Tuple[int, int], Tuple[int, int], Tuple[int, int])->Tensor
-            return ops.deform_conv2d(x_, offset_, mask_,
-                                     weight_, bias_, stride=stride_,
-                                     padding=pad_, dilation=dilation_)
+            return ops.deform_conv2d(x_, offset_, weight_, bias_, stride=stride_,
+                                     padding=pad_, dilation=dilation_, mask=mask_)
 
         gradcheck(lambda z, off, msk, wei, bi: script_func(z, off, msk, wei, bi, stride, padding, dilation),
                   (x, offset, mask, weight, bias), nondet_tol=1e-5)
 
         @torch.jit.script
         def script_func_no_mask(x_, offset_, weight_, bias_, stride_, pad_, dilation_):
             # type:(Tensor, Tensor, Tensor, Tensor, Tuple[int, int], Tuple[int, int], Tuple[int, int])->Tensor
-            return ops.deform_conv2d(x_, offset_, None,
-                                     weight_, bias_, stride=stride_,
-                                     padding=pad_, dilation=dilation_)
+            return ops.deform_conv2d(x_, offset_, weight_, bias_, stride=stride_,
+                                     padding=pad_, dilation=dilation_, mask=None)
 
         gradcheck(lambda z, off, wei, bi: script_func_no_mask(z, off, wei, bi, stride, padding, dilation),
                   (x, offset, weight, bias), nondet_tol=1e-5)
@@ -621,7 +617,7 @@ def script_func_no_mask(x_, offset_, weight_, bias_, stride_, pad_, dilation_):
 
             for d in ["cpu", "cuda"]:
 
-                out = ops.deform_conv2d(img.to(d), offset.to(d), mask.to(d), weight.to(d), padding=1)
+                out = ops.deform_conv2d(img.to(d), offset.to(d), weight.to(d), padding=1, mask=mask.to(d))
                 out.mean().backward()
                 if true_cpu_grads is None:
                     true_cpu_grads = init_weight.grad
diff --git a/torchvision/ops/deform_conv.py b/torchvision/ops/deform_conv.py
@@ -12,12 +12,12 @@
 def deform_conv2d(
     input: Tensor,
     offset: Tensor,
-    mask: Optional[Tensor],
     weight: Tensor,
     bias: Optional[Tensor] = None,
     stride: Tuple[int, int] = (1, 1),
     padding: Tuple[int, int] = (0, 0),
     dilation: Tuple[int, int] = (1, 1),
+    mask: Optional[Tensor] = None,
 ) -> Tensor:
     """
     Performs Deformable Convolution, described in Deformable Convolutional Networks
@@ -27,16 +27,16 @@ def deform_conv2d(
         offset (Tensor[batch_size, 2 * offset_groups * kernel_height * kernel_width,
             out_height, out_width]): offsets to be applied for each position in the
             convolution kernel.
-        mask (Tensor[batch_size, offset_groups * kernel_height * kernel_width,
-            out_height, out_width]): masks to be applied for each position in the
-            convolution kernel.
         weight (Tensor[out_channels, in_channels // groups, kernel_height, kernel_width]):
             convolution weights, split into groups of size (in_channels // groups)
         bias (Tensor[out_channels]): optional bias of shape (out_channels,). Default: None
         stride (int or Tuple[int, int]): distance between convolution centers. Default: 1
         padding (int or Tuple[int, int]): height/width of padding of zeroes around
             each image. Default: 0
         dilation (int or Tuple[int, int]): the spacing between kernel elements. Default: 1
+        mask (Tensor[batch_size, offset_groups * kernel_height * kernel_width,
+            out_height, out_width]): masks to be applied for each position in the
+            convolution kernel.
 
     Returns:
         output (Tensor[batch_sz, out_channels, out_h, out_w]): result of convolution
@@ -51,7 +51,7 @@ def deform_conv2d(
         >>> # and kernel size of 3, without padding, the output size is 8
         >>> offset = torch.rand(4, 2 * kh * kw, 8, 8)
         >>> mask = torch.rand(4, kh * kw, 8, 8)
-        >>> out = deform_conv2d(input, offset, mask, weight)
+        >>> out = deform_conv2d(input, offset, weight, mask=mask)
         >>> print(out.shape)
         >>> # returns
         >>>  torch.Size([4, 5, 8, 8])
@@ -158,8 +158,8 @@ def forward(self, input: Tensor, offset: Tensor, mask: Tensor = None) -> Tensor:
                 out_height, out_width]): masks to be applied for each position in the
                 convolution kernel.
         """
-        return deform_conv2d(input, offset, mask, self.weight, self.bias, stride=self.stride,
-                             padding=self.padding, dilation=self.dilation)
+        return deform_conv2d(input, offset, self.weight, self.bias, stride=self.stride,
+                             padding=self.padding, dilation=self.dilation, mask=mask)
 
     def __repr__(self) -> str:
         s = self.__class__.__name__ + '('