Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/ops.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ torchvision.ops
.. autofunction:: batched_nms
.. autofunction:: remove_small_boxes
.. autofunction:: clip_boxes_to_image
.. autofunction:: box_convert
.. autofunction:: box_area
.. autofunction:: box_iou
.. autofunction:: generalized_box_iou
Expand Down
96 changes: 96 additions & 0 deletions test/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,102 @@ def test_convert_boxes_to_roi_format(self):
self.assertTrue(torch.equal(ref_tensor, ops._utils.convert_boxes_to_roi_format(box_sequence)))


class BoxTester(unittest.TestCase):
def test_bbox_same(self):
box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
[10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)

exp_xyxy = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
[10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)

box_same = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xyxy")
self.assertEqual(exp_xyxy.size(), torch.Size([4, 4]))
self.assertEqual(exp_xyxy.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_same, exp_xyxy)).item()

box_same = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="xywh")
self.assertEqual(exp_xyxy.size(), torch.Size([4, 4]))
self.assertEqual(exp_xyxy.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_same, exp_xyxy)).item()

box_same = ops.box_convert(box_tensor, in_fmt="cxcywh", out_fmt="cxcywh")
self.assertEqual(exp_xyxy.size(), torch.Size([4, 4]))
self.assertEqual(exp_xyxy.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_same, exp_xyxy)).item()

def test_bbox_xyxy_xywh(self):
# Simple test convert boxes to xywh and back. Make sure they are same.
# box_tensor is in x1 y1 x2 y2 format.
box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
[10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
exp_xywh = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
[10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float)

box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh")
self.assertEqual(exp_xywh.size(), torch.Size([4, 4]))
self.assertEqual(exp_xywh.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_xywh, exp_xywh)).item()

# Reverse conversion
box_xyxy = ops.box_convert(box_xywh, in_fmt="xywh", out_fmt="xyxy")
self.assertEqual(box_xyxy.size(), torch.Size([4, 4]))
self.assertEqual(box_xyxy.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_xyxy, box_tensor)).item()

def test_bbox_xyxy_cxcywh(self):
# Simple test convert boxes to xywh and back. Make sure they are same.
# box_tensor is in x1 y1 x2 y2 format.
box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
[10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
exp_cxcywh = torch.tensor([[50, 50, 100, 100], [0, 0, 0, 0],
[20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float)

box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh")
self.assertEqual(exp_cxcywh.size(), torch.Size([4, 4]))
self.assertEqual(exp_cxcywh.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_cxcywh, exp_cxcywh)).item()

# Reverse conversion
box_xyxy = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xyxy")
self.assertEqual(box_xyxy.size(), torch.Size([4, 4]))
self.assertEqual(box_xyxy.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_xyxy, box_tensor)).item()

def test_bbox_xywh_cxcywh(self):
box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
[10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float)

# This is wrong
exp_cxcywh = torch.tensor([[50, 50, 100, 100], [0, 0, 0, 0],
[20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float)

box_cxcywh = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="cxcywh")
self.assertEqual(exp_cxcywh.size(), torch.Size([4, 4]))
self.assertEqual(exp_cxcywh.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_cxcywh, exp_cxcywh)).item()

# Reverse conversion
box_xywh = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xywh")
self.assertEqual(box_xywh.size(), torch.Size([4, 4]))
self.assertEqual(box_xywh.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_xywh, box_tensor)).item()

# def test_bbox_convert_jit(self):
# box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
# [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
Comment on lines +730 to +732
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two options here:

  • we merge the PR now and try to fix torchscript later
  • we fix torchscript right now.

Do you remember what type of errors you were facing? I'm fine with both approaches, so that we can move forward with this PR (but we should fix torchscript soon if we merge this without torchscript support)


# scripted_fn = torch.jit.script(ops.box_convert)
# TOLERANCE = 1e-3

# box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh")
# scripted_xywh = scripted_fn(box_tensor, 'xyxy', 'xywh')
# self.assertTrue((scripted_xywh - box_xywh).abs().max() < TOLERANCE)

# box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh")
# scripted_cxcywh = scripted_fn(box_tensor, 'xyxy', 'cxcywh')
# self.assertTrue((scripted_cxcywh - box_cxcywh).abs().max() < TOLERANCE)


class BoxAreaTester(unittest.TestCase):
def test_box_area(self):
# A bounding box of area 10000 and a degenerate case
Expand Down
4 changes: 3 additions & 1 deletion torchvision/ops/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .boxes import nms, batched_nms, remove_small_boxes, clip_boxes_to_image, box_area, box_iou, generalized_box_iou
from .boxes import box_convert
from .new_empty_tensor import _new_empty_tensor
from .deform_conv import deform_conv2d, DeformConv2d
from .roi_align import roi_align, RoIAlign
Expand All @@ -15,7 +16,8 @@

__all__ = [
'deform_conv2d', 'DeformConv2d', 'nms', 'batched_nms', 'remove_small_boxes',
'clip_boxes_to_image', 'box_area', 'box_iou', 'generalized_box_iou', 'roi_align', 'RoIAlign', 'roi_pool',
'clip_boxes_to_image', 'box_convert',
'box_area', 'box_iou', 'generalized_box_iou', 'roi_align', 'RoIAlign', 'roi_pool',
'RoIPool', '_new_empty_tensor', 'ps_roi_align', 'PSRoIAlign', 'ps_roi_pool',
'PSRoIPool', 'MultiScaleRoIAlign', 'FeaturePyramidNetwork'
]
83 changes: 83 additions & 0 deletions torchvision/ops/_box_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import torch
from torch.jit.annotations import Tuple
from torch import Tensor
import torchvision


def _box_cxcywh_to_xyxy(boxes: Tensor) -> Tensor:
"""
Converts bounding boxes from (cx, cy, w, h) format to (x1, y1, x2, y2) format.
(cx, cy) refers to center of bounding box
(w, h) are width and height of bounding box
Arguments:
boxes (Tensor[N, 4]): boxes in (cx, cy, w, h) format which will be converted.

Returns:
boxes (Tensor(N, 4)): boxes in (x1, y1, x2, y2) format.
"""
# We need to change all 4 of them so some temporary variable is needed.
cx, cy, w, h = boxes.unbind(-1)
x1 = cx - 0.5 * w
y1 = cy - 0.5 * h
x2 = cx + 0.5 * w
y2 = cy + 0.5 * h

boxes = torch.stack((x1, y1, x2, y2), dim=-1)

return boxes


def _box_xyxy_to_cxcywh(boxes: Tensor) -> Tensor:
"""
Converts bounding boxes from (x1, y1, x2, y2) format to (cx, cy, w, h) format.
(x1, y1) refer to top left of bounding box
(x2, y2) refer to bottom right of bounding box
Arguments:
boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format which will be converted.

Returns:
boxes (Tensor(N, 4)): boxes in (cx, cy, w, h) format.
"""
x1, y1, x2, y2 = boxes.unbind(-1)
cx = (x1 + x2) / 2
cy = (y1 + y2) / 2
w = x2 - x1
h = y2 - y1

boxes = torch.stack((cx, cy, w, h), dim=-1)

return boxes


def _box_xywh_to_xyxy(boxes: Tensor) -> Tensor:
"""
Converts bounding boxes from (x, y, w, h) format to (x1, y1, x2, y2) format.
(x, y) refers to top left of bouding box.
(w, h) refers to width and height of box.
Arguments:
boxes (Tensor[N, 4]): boxes in (x, y, w, h) which will be converted.

Returns:
boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format.
"""
x, y, w, h = boxes.unbind(-1)
boxes = torch.stack([x, y, x + w, y + h], dim=-1)
return boxes


def _box_xyxy_to_xywh(boxes: Tensor) -> Tensor:
"""
Converts bounding boxes from (x1, y1, x2, y2) format to (x, y, w, h) format.
(x1, y1) refer to top left of bounding box
(x2, y2) refer to bottom right of bounding box
Arguments:
boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) which will be converted.

Returns:
boxes (Tensor[N, 4]): boxes in (x, y, w, h) format.
"""
x1, y1, x2, y2 = boxes.unbind(-1)
x2 = x2 - x1 # x2 - x1
y2 = y2 - y1 # y2 - y1
boxes = torch.stack((x1, y1, x2, y2), dim=-1)
return boxes
56 changes: 56 additions & 0 deletions torchvision/ops/boxes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import torch
from torch.jit.annotations import Tuple
from torch import Tensor
from ._box_convert import _box_cxcywh_to_xyxy, _box_xyxy_to_cxcywh, _box_xywh_to_xyxy, _box_xyxy_to_xywh
import torchvision


Expand Down Expand Up @@ -133,6 +134,61 @@ def clip_boxes_to_image(boxes: Tensor, size: Tuple[int, int]) -> Tensor:
return clipped_boxes.reshape(boxes.shape)


def box_convert(boxes: Tensor, in_fmt: str, out_fmt: str) -> Tensor:
"""
Converts boxes from given in_fmt to out_fmt.
Supported in_fmt and out_fmt are:

'xyxy': boxes are represented via corners, x1, y1 being top left and x2, y2 being bottom right.

'xywh' : boxes are represented via corner, width and height, x1, y2 being top left, w, h being width and height.

'cxcywh' : boxes are represented via centre, width and height, cx, cy being center of box, w, h
being width and height.

Arguments:
boxes (Tensor[N, 4]): boxes which will be converted.
in_fmt (str): Input format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh'].
out_fmt (str): Output format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh']

Returns:
boxes (Tensor[N, 4]): Boxes into converted format.
"""
allowed_fmts = ("xyxy", "xywh", "cxcywh")
assert in_fmt in allowed_fmts
assert out_fmt in allowed_fmts

if in_fmt == out_fmt:
boxes_converted = boxes.clone()
return boxes_converted

if in_fmt != 'xyxy' and out_fmt != 'xyxy':
if in_fmt == "xywh":
boxes_xyxy = _box_xywh_to_xyxy(boxes)
if out_fmt == "cxcywh":
boxes_converted = _box_xyxy_to_cxcywh(boxes_xyxy)

elif in_fmt == "cxcywh":
boxes_xyxy = _box_cxcywh_to_xyxy(boxes)
if out_fmt == "xywh":
boxes_converted = _box_xyxy_to_xywh(boxes_xyxy)

# convert one to xyxy and change either in_fmt or out_fmt to xyxy
else:
if in_fmt == "xyxy":
if out_fmt == "xywh":
boxes_converted = _box_xyxy_to_xywh(boxes)
elif out_fmt == "cxcywh":
boxes_converted = _box_xyxy_to_cxcywh(boxes)
elif out_fmt == "xyxy":
if in_fmt == "xywh":
boxes_converted = _box_xywh_to_xyxy(boxes)
elif in_fmt == "cxcywh":
boxes_converted = _box_cxcywh_to_xyxy(boxes)

return boxes_converted


def box_area(boxes: Tensor) -> Tensor:
"""
Computes the area of a set of bounding boxes, which are specified by its
Expand Down