@@ -410,7 +410,7 @@ def postprocess_detections(self, head_outputs: Dict[str, Tensor], image_anchors:
410
410
411
411
412
412
class SSDFeatureExtractorVGG (nn .Module ):
413
- def __init__ (self , backbone : nn .Module , highres : bool , rescaling : bool ):
413
+ def __init__ (self , backbone : nn .Module , highres : bool ):
414
414
super ().__init__ ()
415
415
416
416
_ , _ , maxpool3_pos , maxpool4_pos , _ = (i for i , layer in enumerate (backbone ) if isinstance (layer , nn .MaxPool2d ))
@@ -476,13 +476,8 @@ def __init__(self, backbone: nn.Module, highres: bool, rescaling: bool):
476
476
fc ,
477
477
))
478
478
self .extra = extra
479
- self .rescaling = rescaling
480
479
481
480
def forward (self , x : Tensor ) -> Dict [str , Tensor ]:
482
- # Undo the 0-1 scaling of toTensor. Necessary for some backbones.
483
- if self .rescaling :
484
- x *= 255
485
-
486
481
# L2 regularization + Rescaling of 1st block's feature map
487
482
x = self .features (x )
488
483
rescaled = self .scale_weight .view (1 , - 1 , 1 , 1 ) * F .normalize (x )
@@ -496,8 +491,7 @@ def forward(self, x: Tensor) -> Dict[str, Tensor]:
496
491
return OrderedDict ([(str (i ), v ) for i , v in enumerate (output )])
497
492
498
493
499
- def _vgg_extractor (backbone_name : str , highres : bool , progress : bool , pretrained : bool , trainable_layers : int ,
500
- rescaling : bool ):
494
+ def _vgg_extractor (backbone_name : str , highres : bool , progress : bool , pretrained : bool , trainable_layers : int ):
501
495
if backbone_name in backbone_urls :
502
496
# Use custom backbones more appropriate for SSD
503
497
arch = backbone_name .split ('_' )[0 ]
@@ -521,19 +515,19 @@ def _vgg_extractor(backbone_name: str, highres: bool, progress: bool, pretrained
521
515
for parameter in b .parameters ():
522
516
parameter .requires_grad_ (False )
523
517
524
- return SSDFeatureExtractorVGG (backbone , highres , rescaling )
518
+ return SSDFeatureExtractorVGG (backbone , highres )
525
519
526
520
527
521
def ssd300_vgg16 (pretrained : bool = False , progress : bool = True , num_classes : int = 91 ,
528
522
pretrained_backbone : bool = True , trainable_backbone_layers : Optional [int ] = None , ** kwargs : Any ):
529
523
"""
530
- Constructs an SSD model with a VGG16 backbone. See `SSD` for more details.
524
+ Constructs an SSD model with input size 300x300 and a VGG16 backbone. See `SSD` for more details.
531
525
532
526
Example:
533
527
534
528
>>> model = torchvision.models.detection.ssd300_vgg16(pretrained=True)
535
529
>>> model.eval()
536
- >>> x = [torch.rand(3, 300, 400 ), torch.rand(3, 500, 400)]
530
+ >>> x = [torch.rand(3, 300, 300 ), torch.rand(3, 500, 400)]
537
531
>>> predictions = model(x)
538
532
539
533
Args:
@@ -544,19 +538,28 @@ def ssd300_vgg16(pretrained: bool = False, progress: bool = True, num_classes: i
544
538
trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block.
545
539
Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable.
546
540
"""
541
+ if "size" in kwargs :
542
+ warnings .warn ("The size of the model is already fixed; ignoring the argument." )
543
+
547
544
trainable_backbone_layers = _validate_trainable_layers (
548
545
pretrained or pretrained_backbone , trainable_backbone_layers , 5 , 5 )
549
546
550
547
if pretrained :
551
548
# no need to download the backbone if pretrained is set
552
549
pretrained_backbone = False
553
550
554
- backbone = _vgg_extractor ("vgg16_features" , False , progress , pretrained_backbone , trainable_backbone_layers , True )
551
+ backbone = _vgg_extractor ("vgg16_features" , False , progress , pretrained_backbone , trainable_backbone_layers )
555
552
anchor_generator = DefaultBoxGenerator ([[2 ], [2 , 3 ], [2 , 3 ], [2 , 3 ], [2 ], [2 ]],
556
553
scales = [0.07 , 0.15 , 0.33 , 0.51 , 0.69 , 0.87 , 1.05 ],
557
554
steps = [8 , 16 , 32 , 64 , 100 , 300 ])
558
- model = SSD (backbone , anchor_generator , (300 , 300 ), num_classes ,
559
- image_mean = [0.48235 , 0.45882 , 0.40784 ], image_std = [1. , 1. , 1. ], ** kwargs )
555
+
556
+ defaults = {
557
+ # Rescale the input in a way compatible to the backbone
558
+ "image_mean" : [0.48235 , 0.45882 , 0.40784 ],
559
+ "image_std" : [1.0 / 255.0 , 1.0 / 255.0 , 1.0 / 255.0 ], # undo the 0-1 scaling of toTensor
560
+ }
561
+ kwargs = {** defaults , ** kwargs }
562
+ model = SSD (backbone , anchor_generator , (300 , 300 ), num_classes , ** kwargs )
560
563
if pretrained :
561
564
weights_name = 'ssd300_vgg16_coco'
562
565
if model_urls .get (weights_name , None ) is None :
0 commit comments