Passing backbones instead of backbone names to builders.

datumbox · datumbox · commit de1d2ad3b3f0 · 2021-10-18T20:00:36.000+01:00
diff --git a/torchvision/models/segmentation/deeplabv3.py b/torchvision/models/segmentation/deeplabv3.py
@@ -1,4 +1,4 @@
-from typing import Any, List
+from typing import List
 
 import torch
 from torch import nn
@@ -114,48 +114,25 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
 
 def _deeplabv3_resnet(
-    backbone_name: str,
-    pretrained: bool,
-    progress: bool,
+    backbone: resnet.ResNet,
     num_classes: int,
     aux: bool,
-    pretrained_backbone: bool = True,
 ) -> DeepLabV3:
-    if pretrained:
-        aux = True
-        pretrained_backbone = False
-
-    backbone = resnet.__dict__[backbone_name](
-        pretrained=pretrained_backbone, replace_stride_with_dilation=[False, True, True]
-    )
     return_layers = {"layer4": "out"}
     if aux:
         return_layers["layer3"] = "aux"
     backbone = create_feature_extractor(backbone, return_layers)
 
     aux_classifier = FCNHead(1024, num_classes) if aux else None
     classifier = DeepLabHead(2048, num_classes)
-    model = DeepLabV3(backbone, classifier, aux_classifier)
-
-    if pretrained:
-        arch = "deeplabv3_" + backbone_name + "_coco"
-        _load_weights(arch, model, model_urls.get(arch, None), progress)
-    return model
+    return DeepLabV3(backbone, classifier, aux_classifier)
 
 
 def _deeplabv3_mobilenetv3(
-    backbone_name: str,
-    pretrained: bool,
-    progress: bool,
+    backbone: mobilenetv3.MobileNetV3,
     num_classes: int,
     aux: bool,
-    pretrained_backbone: bool = True,
 ) -> DeepLabV3:
-    if pretrained:
-        aux = True
-        pretrained_backbone = False
-
-    backbone = mobilenetv3.__dict__[backbone_name](pretrained=pretrained_backbone, dilated=True).features
     # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks.
     # The first and last blocks are always included because they are the C0 (conv1) and Cn.
     stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "_is_cn", False)] + [len(backbone) - 1]
@@ -170,20 +147,15 @@ def _deeplabv3_mobilenetv3(
 
     aux_classifier = FCNHead(aux_inplanes, num_classes) if aux else None
     classifier = DeepLabHead(out_inplanes, num_classes)
-    model = DeepLabV3(backbone, classifier, aux_classifier)
-
-    if pretrained:
-        arch = "deeplabv3_" + backbone_name + "_coco"
-        _load_weights(arch, model, model_urls.get(arch, None), progress)
-    return model
+    return DeepLabV3(backbone, classifier, aux_classifier)
 
 
 def deeplabv3_resnet50(
     pretrained: bool = False,
     progress: bool = True,
     num_classes: int = 21,
     aux_loss: bool = False,
-    **kwargs: Any,
+    pretrained_backbone: bool = True,
 ) -> DeepLabV3:
     """Constructs a DeepLabV3 model with a ResNet-50 backbone.
 
@@ -193,16 +165,27 @@ def deeplabv3_resnet50(
         progress (bool): If True, displays a progress bar of the download to stderr
         num_classes (int): number of output classes of the model (including the background)
         aux_loss (bool): If True, it uses an auxiliary loss
+        pretrained_backbone (bool): If True, the backbone will be pre-trained.
     """
-    return _deeplabv3_resnet("resnet50", pretrained, progress, num_classes, aux_loss, **kwargs)
+    if pretrained:
+        aux_loss = True
+        pretrained_backbone = False
+
+    backbone = resnet.resnet50(pretrained=pretrained_backbone, replace_stride_with_dilation=[False, True, True])
+    model = _deeplabv3_resnet(backbone, num_classes, aux_loss)
+
+    if pretrained:
+        arch = "deeplabv3_resnet50_coco"
+        _load_weights(arch, model, model_urls.get(arch, None), progress)
+    return model
 
 
 def deeplabv3_resnet101(
     pretrained: bool = False,
     progress: bool = True,
     num_classes: int = 21,
     aux_loss: bool = False,
-    **kwargs: Any,
+    pretrained_backbone: bool = True,
 ) -> DeepLabV3:
     """Constructs a DeepLabV3 model with a ResNet-101 backbone.
 
@@ -212,16 +195,27 @@ def deeplabv3_resnet101(
         progress (bool): If True, displays a progress bar of the download to stderr
         num_classes (int): The number of classes
         aux_loss (bool): If True, include an auxiliary classifier
+        pretrained_backbone (bool): If True, the backbone will be pre-trained.
     """
-    return _deeplabv3_resnet("resnet101", pretrained, progress, num_classes, aux_loss, **kwargs)
+    if pretrained:
+        aux_loss = True
+        pretrained_backbone = False
+
+    backbone = resnet.resnet101(pretrained=pretrained_backbone, replace_stride_with_dilation=[False, True, True])
+    model = _deeplabv3_resnet(backbone, num_classes, aux_loss)
+
+    if pretrained:
+        arch = "deeplabv3_resnet101_coco"
+        _load_weights(arch, model, model_urls.get(arch, None), progress)
+    return model
 
 
 def deeplabv3_mobilenet_v3_large(
     pretrained: bool = False,
     progress: bool = True,
     num_classes: int = 21,
     aux_loss: bool = False,
-    **kwargs: Any,
+    pretrained_backbone: bool = True,
 ) -> DeepLabV3:
     """Constructs a DeepLabV3 model with a MobileNetV3-Large backbone.
 
@@ -231,5 +225,16 @@ def deeplabv3_mobilenet_v3_large(
         progress (bool): If True, displays a progress bar of the download to stderr
         num_classes (int): number of output classes of the model (including the background)
         aux_loss (bool): If True, it uses an auxiliary loss
+        pretrained_backbone (bool): If True, the backbone will be pre-trained.
     """
-    return _deeplabv3_mobilenetv3("mobilenet_v3_large", pretrained, progress, num_classes, aux_loss, **kwargs)
+    if pretrained:
+        aux_loss = True
+        pretrained_backbone = False
+
+    backbone = mobilenetv3.mobilenet_v3_large(pretrained=pretrained_backbone, dilated=True).features
+    model = _deeplabv3_mobilenetv3(backbone, num_classes, aux_loss)
+
+    if pretrained:
+        arch = "deeplabv3_mobilenet_v3_large_coco"
+        _load_weights(arch, model, model_urls.get(arch, None), progress)
+    return model
diff --git a/torchvision/models/segmentation/fcn.py b/torchvision/models/segmentation/fcn.py
@@ -1,5 +1,3 @@
-from typing import Any
-
 from torch import nn
 
 from .. import resnet
@@ -48,41 +46,26 @@ def __init__(self, in_channels: int, channels: int) -> None:
 
 
 def _fcn_resnet(
-    backbone_name: str,
-    pretrained: bool,
-    progress: bool,
+    backbone: resnet.ResNet,
     num_classes: int,
     aux: bool,
-    pretrained_backbone: bool = True,
 ) -> FCN:
-    if pretrained:
-        aux = True
-        pretrained_backbone = False
-
-    backbone = resnet.__dict__[backbone_name](
-        pretrained=pretrained_backbone, replace_stride_with_dilation=[False, True, True]
-    )
     return_layers = {"layer4": "out"}
     if aux:
         return_layers["layer3"] = "aux"
     backbone = create_feature_extractor(backbone, return_layers)
 
     aux_classifier = FCNHead(1024, num_classes) if aux else None
     classifier = FCNHead(2048, num_classes)
-    model = FCN(backbone, classifier, aux_classifier)
-
-    if pretrained:
-        arch = "fcn_" + backbone_name + "_coco"
-        _load_weights(arch, model, model_urls.get(arch, None), progress)
-    return model
+    return FCN(backbone, classifier, aux_classifier)
 
 
 def fcn_resnet50(
     pretrained: bool = False,
     progress: bool = True,
     num_classes: int = 21,
     aux_loss: bool = False,
-    **kwargs: Any,
+    pretrained_backbone: bool = True,
 ) -> FCN:
     """Constructs a Fully-Convolutional Network model with a ResNet-50 backbone.
 
@@ -92,16 +75,27 @@ def fcn_resnet50(
         progress (bool): If True, displays a progress bar of the download to stderr
         num_classes (int): number of output classes of the model (including the background)
         aux_loss (bool): If True, it uses an auxiliary loss
+        pretrained_backbone (bool): If True, the backbone will be pre-trained.
     """
-    return _fcn_resnet("resnet50", pretrained, progress, num_classes, aux_loss, **kwargs)
+    if pretrained:
+        aux_loss = True
+        pretrained_backbone = False
+
+    backbone = resnet.resnet50(pretrained=pretrained_backbone, replace_stride_with_dilation=[False, True, True])
+    model = _fcn_resnet(backbone, num_classes, aux_loss)
+
+    if pretrained:
+        arch = "fcn_resnet50_coco"
+        _load_weights(arch, model, model_urls.get(arch, None), progress)
+    return model
 
 
 def fcn_resnet101(
     pretrained: bool = False,
     progress: bool = True,
     num_classes: int = 21,
     aux_loss: bool = False,
-    **kwargs: Any,
+    pretrained_backbone: bool = True,
 ) -> FCN:
     """Constructs a Fully-Convolutional Network model with a ResNet-101 backbone.
 
@@ -111,5 +105,16 @@ def fcn_resnet101(
         progress (bool): If True, displays a progress bar of the download to stderr
         num_classes (int): number of output classes of the model (including the background)
         aux_loss (bool): If True, it uses an auxiliary loss
+        pretrained_backbone (bool): If True, the backbone will be pre-trained.
     """
-    return _fcn_resnet("resnet101", pretrained, progress, num_classes, aux_loss, **kwargs)
+    if pretrained:
+        aux_loss = True
+        pretrained_backbone = False
+
+    backbone = resnet.resnet101(pretrained=pretrained_backbone, replace_stride_with_dilation=[False, True, True])
+    model = _fcn_resnet(backbone, num_classes, aux_loss)
+
+    if pretrained:
+        arch = "fcn_resnet101_coco"
+        _load_weights(arch, model, model_urls.get(arch, None), progress)
+    return model
diff --git a/torchvision/models/segmentation/lraspp.py b/torchvision/models/segmentation/lraspp.py
@@ -1,5 +1,5 @@
 from collections import OrderedDict
-from typing import Any, Dict
+from typing import Dict
 
 from torch import nn, Tensor
 from torch.nn import functional as F
@@ -79,13 +79,7 @@ def forward(self, input: Dict[str, Tensor]) -> Tensor:
         return self.low_classifier(low) + self.high_classifier(x)
 
 
-def _lraspp_mobilenetv3(
-    backbone_name: str, pretrained: bool, progress: bool, num_classes: int, pretrained_backbone: bool = True
-) -> LRASPP:
-    if pretrained:
-        pretrained_backbone = False
-
-    backbone = mobilenetv3.__dict__[backbone_name](pretrained=pretrained_backbone, dilated=True).features
+def _lraspp_mobilenetv3(backbone: mobilenetv3.MobileNetV3, num_classes: int) -> LRASPP:
     # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks.
     # The first and last blocks are always included because they are the C0 (conv1) and Cn.
     stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "_is_cn", False)] + [len(backbone) - 1]
@@ -95,16 +89,11 @@ def _lraspp_mobilenetv3(
     high_channels = backbone[high_pos].out_channels
     backbone = create_feature_extractor(backbone, {str(low_pos): "low", str(high_pos): "high"})
 
-    model = LRASPP(backbone, low_channels, high_channels, num_classes)
-
-    if pretrained:
-        arch = "lraspp_" + backbone_name + "_coco"
-        _load_weights(arch, model, model_urls.get(arch, None), progress)
-    return model
+    return LRASPP(backbone, low_channels, high_channels, num_classes)
 
 
 def lraspp_mobilenet_v3_large(
-    pretrained: bool = False, progress: bool = True, num_classes: int = 21, **kwargs: Any
+    pretrained: bool = False, progress: bool = True, num_classes: int = 21, pretrained_backbone: bool = True
 ) -> LRASPP:
     """Constructs a Lite R-ASPP Network model with a MobileNetV3-Large backbone.
 
@@ -113,8 +102,15 @@ def lraspp_mobilenet_v3_large(
             contains the same classes as Pascal VOC
         progress (bool): If True, displays a progress bar of the download to stderr
         num_classes (int): number of output classes of the model (including the background)
+        pretrained_backbone (bool): If True, the backbone will be pre-trained.
     """
-    if kwargs.pop("aux_loss", False):
-        raise NotImplementedError("This model does not use auxiliary loss")
+    if pretrained:
+        pretrained_backbone = False
+
+    backbone = mobilenetv3.mobilenet_v3_large(pretrained=pretrained_backbone, dilated=True).features
+    model = _lraspp_mobilenetv3(backbone, num_classes)
 
-    return _lraspp_mobilenetv3("mobilenet_v3_large", pretrained, progress, num_classes, **kwargs)
+    if pretrained:
+        arch = "lraspp_mobilenet_v3_large_coco"
+        _load_weights(arch, model, model_urls.get(arch, None), progress)
+    return model