diff --git a/docs/source/conf.py b/docs/source/conf.py index bedef5a5215..db7b2ef14a2 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -335,8 +335,6 @@ def inject_weight_metadata(app, what, name, obj, options, lines): for field in obj: lines += [f"**{str(field)}**:", ""] - if field == obj.DEFAULT: - lines += [f"This weight is also available as ``{obj.__name__}.DEFAULT``.", ""] table = [] @@ -349,7 +347,12 @@ def inject_weight_metadata(app, what, name, obj, options, lines): custom_docs = meta_with_metrics.pop("_docs", None) # Custom per-Weights docs if custom_docs is not None: - lines += [custom_docs, ""] + lines += [custom_docs] + + if field == obj.DEFAULT: + lines += [f"Also available as ``{obj.__name__}.DEFAULT``."] + + lines += [""] for k, v in meta_with_metrics.items(): if k in {"recipe", "license"}: @@ -367,8 +370,8 @@ def inject_weight_metadata(app, what, name, obj, options, lines): lines += textwrap.indent(table, " " * 4).split("\n") lines.append("") lines.append( - f"The preprocessing/inference transforms are available at ``{str(field)}.transforms`` and " - f"perform the following operations: {field.transforms().describe()}" + f"The inference transforms are available at ``{str(field)}.transforms`` and " + f"perform the following preprocessing operations: {field.transforms().describe()}" ) lines.append("") diff --git a/test/test_extended_models.py b/test/test_extended_models.py index a39ca62ca78..e3f79e28af4 100644 --- a/test/test_extended_models.py +++ b/test/test_extended_models.py @@ -96,7 +96,7 @@ def test_schema_meta_validation(model_fn): classification_fields = {"categories", ("metrics", "acc@1"), ("metrics", "acc@5")} defaults = { "all": {"metrics", "min_size", "num_params", "recipe"}, - "models": classification_fields, + "models": classification_fields | {"_docs"}, "detection": {"categories", ("metrics", "box_map")}, "quantization": classification_fields | {"backend", "unquantized"}, "segmentation": {"categories", ("metrics", "miou"), ("metrics", "pixel_acc")}, diff --git a/torchvision/models/alexnet.py b/torchvision/models/alexnet.py index dff0bbad1a4..733928bb5e3 100644 --- a/torchvision/models/alexnet.py +++ b/torchvision/models/alexnet.py @@ -65,6 +65,9 @@ class AlexNet_Weights(WeightsEnum): "acc@1": 56.522, "acc@5": 79.066, }, + "_docs": """ + These weights reproduce closely the results of the paper using a simplified training recipe. + """, }, ) DEFAULT = IMAGENET1K_V1 diff --git a/torchvision/models/convnext.py b/torchvision/models/convnext.py index f2c7ee1e03a..f9aa8d3b1c9 100644 --- a/torchvision/models/convnext.py +++ b/torchvision/models/convnext.py @@ -207,6 +207,11 @@ def _convnext( "min_size": (32, 32), "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#convnext", + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, } diff --git a/torchvision/models/densenet.py b/torchvision/models/densenet.py index 47447991f0e..fc354a0c437 100644 --- a/torchvision/models/densenet.py +++ b/torchvision/models/densenet.py @@ -261,6 +261,7 @@ def _densenet( "min_size": (29, 29), "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/pull/116", + "_docs": """These weights are ported from LuaTorch.""", } diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py index be166140209..4b911dbfaba 100644 --- a/torchvision/models/efficientnet.py +++ b/torchvision/models/efficientnet.py @@ -431,24 +431,26 @@ def _efficientnet_conf( _COMMON_META: Dict[str, Any] = { "categories": _IMAGENET_CATEGORIES, - "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#efficientnet", } _COMMON_META_V1 = { **_COMMON_META, "min_size": (1, 1), + "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#efficientnet-v1", } _COMMON_META_V2 = { **_COMMON_META, "min_size": (33, 33), + "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#efficientnet-v2", } class EfficientNet_B0_Weights(WeightsEnum): IMAGENET1K_V1 = Weights( + # Weights ported from https://github.com/rwightman/pytorch-image-models/ url="https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth", transforms=partial( ImageClassification, crop_size=224, resize_size=256, interpolation=InterpolationMode.BICUBIC @@ -460,6 +462,7 @@ class EfficientNet_B0_Weights(WeightsEnum): "acc@1": 77.692, "acc@5": 93.532, }, + "_docs": """These weights are ported from the original paper.""", }, ) DEFAULT = IMAGENET1K_V1 @@ -467,6 +470,7 @@ class EfficientNet_B0_Weights(WeightsEnum): class EfficientNet_B1_Weights(WeightsEnum): IMAGENET1K_V1 = Weights( + # Weights ported from https://github.com/rwightman/pytorch-image-models/ url="https://download.pytorch.org/models/efficientnet_b1_rwightman-533bc792.pth", transforms=partial( ImageClassification, crop_size=240, resize_size=256, interpolation=InterpolationMode.BICUBIC @@ -478,6 +482,7 @@ class EfficientNet_B1_Weights(WeightsEnum): "acc@1": 78.642, "acc@5": 94.186, }, + "_docs": """These weights are ported from the original paper.""", }, ) IMAGENET1K_V2 = Weights( @@ -493,6 +498,11 @@ class EfficientNet_B1_Weights(WeightsEnum): "acc@1": 79.838, "acc@5": 94.934, }, + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V2 @@ -500,6 +510,7 @@ class EfficientNet_B1_Weights(WeightsEnum): class EfficientNet_B2_Weights(WeightsEnum): IMAGENET1K_V1 = Weights( + # Weights ported from https://github.com/rwightman/pytorch-image-models/ url="https://download.pytorch.org/models/efficientnet_b2_rwightman-bcdf34b7.pth", transforms=partial( ImageClassification, crop_size=288, resize_size=288, interpolation=InterpolationMode.BICUBIC @@ -511,6 +522,7 @@ class EfficientNet_B2_Weights(WeightsEnum): "acc@1": 80.608, "acc@5": 95.310, }, + "_docs": """These weights are ported from the original paper.""", }, ) DEFAULT = IMAGENET1K_V1 @@ -518,6 +530,7 @@ class EfficientNet_B2_Weights(WeightsEnum): class EfficientNet_B3_Weights(WeightsEnum): IMAGENET1K_V1 = Weights( + # Weights ported from https://github.com/rwightman/pytorch-image-models/ url="https://download.pytorch.org/models/efficientnet_b3_rwightman-cf984f9c.pth", transforms=partial( ImageClassification, crop_size=300, resize_size=320, interpolation=InterpolationMode.BICUBIC @@ -529,6 +542,7 @@ class EfficientNet_B3_Weights(WeightsEnum): "acc@1": 82.008, "acc@5": 96.054, }, + "_docs": """These weights are ported from the original paper.""", }, ) DEFAULT = IMAGENET1K_V1 @@ -536,6 +550,7 @@ class EfficientNet_B3_Weights(WeightsEnum): class EfficientNet_B4_Weights(WeightsEnum): IMAGENET1K_V1 = Weights( + # Weights ported from https://github.com/rwightman/pytorch-image-models/ url="https://download.pytorch.org/models/efficientnet_b4_rwightman-7eb33cd5.pth", transforms=partial( ImageClassification, crop_size=380, resize_size=384, interpolation=InterpolationMode.BICUBIC @@ -547,6 +562,7 @@ class EfficientNet_B4_Weights(WeightsEnum): "acc@1": 83.384, "acc@5": 96.594, }, + "_docs": """These weights are ported from the original paper.""", }, ) DEFAULT = IMAGENET1K_V1 @@ -554,6 +570,7 @@ class EfficientNet_B4_Weights(WeightsEnum): class EfficientNet_B5_Weights(WeightsEnum): IMAGENET1K_V1 = Weights( + # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/ url="https://download.pytorch.org/models/efficientnet_b5_lukemelas-b6417697.pth", transforms=partial( ImageClassification, crop_size=456, resize_size=456, interpolation=InterpolationMode.BICUBIC @@ -565,6 +582,7 @@ class EfficientNet_B5_Weights(WeightsEnum): "acc@1": 83.444, "acc@5": 96.628, }, + "_docs": """These weights are ported from the original paper.""", }, ) DEFAULT = IMAGENET1K_V1 @@ -572,6 +590,7 @@ class EfficientNet_B5_Weights(WeightsEnum): class EfficientNet_B6_Weights(WeightsEnum): IMAGENET1K_V1 = Weights( + # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/ url="https://download.pytorch.org/models/efficientnet_b6_lukemelas-c76e70fd.pth", transforms=partial( ImageClassification, crop_size=528, resize_size=528, interpolation=InterpolationMode.BICUBIC @@ -583,6 +602,7 @@ class EfficientNet_B6_Weights(WeightsEnum): "acc@1": 84.008, "acc@5": 96.916, }, + "_docs": """These weights are ported from the original paper.""", }, ) DEFAULT = IMAGENET1K_V1 @@ -590,6 +610,7 @@ class EfficientNet_B6_Weights(WeightsEnum): class EfficientNet_B7_Weights(WeightsEnum): IMAGENET1K_V1 = Weights( + # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/ url="https://download.pytorch.org/models/efficientnet_b7_lukemelas-dcc49843.pth", transforms=partial( ImageClassification, crop_size=600, resize_size=600, interpolation=InterpolationMode.BICUBIC @@ -601,6 +622,7 @@ class EfficientNet_B7_Weights(WeightsEnum): "acc@1": 84.122, "acc@5": 96.908, }, + "_docs": """These weights are ported from the original paper.""", }, ) DEFAULT = IMAGENET1K_V1 @@ -622,6 +644,11 @@ class EfficientNet_V2_S_Weights(WeightsEnum): "acc@1": 84.228, "acc@5": 96.878, }, + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V1 @@ -643,12 +670,18 @@ class EfficientNet_V2_M_Weights(WeightsEnum): "acc@1": 85.112, "acc@5": 97.156, }, + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V1 class EfficientNet_V2_L_Weights(WeightsEnum): + # Weights ported from https://github.com/google/automl/tree/master/efficientnetv2 IMAGENET1K_V1 = Weights( url="https://download.pytorch.org/models/efficientnet_v2_l-59c71312.pth", transforms=partial( @@ -666,6 +699,7 @@ class EfficientNet_V2_L_Weights(WeightsEnum): "acc@1": 85.808, "acc@5": 97.788, }, + "_docs": """These weights are ported from the original paper.""", }, ) DEFAULT = IMAGENET1K_V1 @@ -1036,13 +1070,11 @@ def efficientnet_v2_l( model_urls = _ModelURLs( { - # Weights ported from https://github.com/rwightman/pytorch-image-models/ "efficientnet_b0": EfficientNet_B0_Weights.IMAGENET1K_V1.url, "efficientnet_b1": EfficientNet_B1_Weights.IMAGENET1K_V1.url, "efficientnet_b2": EfficientNet_B2_Weights.IMAGENET1K_V1.url, "efficientnet_b3": EfficientNet_B3_Weights.IMAGENET1K_V1.url, "efficientnet_b4": EfficientNet_B4_Weights.IMAGENET1K_V1.url, - # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/ "efficientnet_b5": EfficientNet_B5_Weights.IMAGENET1K_V1.url, "efficientnet_b6": EfficientNet_B6_Weights.IMAGENET1K_V1.url, "efficientnet_b7": EfficientNet_B7_Weights.IMAGENET1K_V1.url, diff --git a/torchvision/models/googlenet.py b/torchvision/models/googlenet.py index 755740abc11..94923dd2a48 100644 --- a/torchvision/models/googlenet.py +++ b/torchvision/models/googlenet.py @@ -288,6 +288,7 @@ class GoogLeNet_Weights(WeightsEnum): "acc@1": 69.778, "acc@5": 89.530, }, + "_docs": """These weights are ported from the original paper.""", }, ) DEFAULT = IMAGENET1K_V1 diff --git a/torchvision/models/inception.py b/torchvision/models/inception.py index 0abd195742c..b034d3aa79a 100644 --- a/torchvision/models/inception.py +++ b/torchvision/models/inception.py @@ -420,6 +420,7 @@ class Inception_V3_Weights(WeightsEnum): "acc@1": 77.294, "acc@5": 93.450, }, + "_docs": """These weights are ported from the original paper.""", }, ) DEFAULT = IMAGENET1K_V1 diff --git a/torchvision/models/mnasnet.py b/torchvision/models/mnasnet.py index b1da02f4697..eb26a7ea5c0 100644 --- a/torchvision/models/mnasnet.py +++ b/torchvision/models/mnasnet.py @@ -229,6 +229,7 @@ class MNASNet0_5_Weights(WeightsEnum): "acc@1": 67.734, "acc@5": 87.490, }, + "_docs": """These weights reproduce closely the results of the paper.""", }, ) DEFAULT = IMAGENET1K_V1 @@ -246,6 +247,10 @@ class MNASNet0_75_Weights(WeightsEnum): "acc@1": 71.180, "acc@5": 90.496, }, + "_docs": """ + These weights were trained from scratch by using TorchVision's `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V1 @@ -262,6 +267,7 @@ class MNASNet1_0_Weights(WeightsEnum): "acc@1": 73.456, "acc@5": 91.510, }, + "_docs": """These weights reproduce closely the results of the paper.""", }, ) DEFAULT = IMAGENET1K_V1 @@ -279,6 +285,10 @@ class MNASNet1_3_Weights(WeightsEnum): "acc@1": 76.506, "acc@5": 93.522, }, + "_docs": """ + These weights were trained from scratch by using TorchVision's `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V1 diff --git a/torchvision/models/mobilenetv2.py b/torchvision/models/mobilenetv2.py index d63ecb01f38..b27d305dc82 100644 --- a/torchvision/models/mobilenetv2.py +++ b/torchvision/models/mobilenetv2.py @@ -212,6 +212,7 @@ class MobileNet_V2_Weights(WeightsEnum): "acc@1": 71.878, "acc@5": 90.286, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -224,6 +225,11 @@ class MobileNet_V2_Weights(WeightsEnum): "acc@1": 72.154, "acc@5": 90.822, }, + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V2 diff --git a/torchvision/models/mobilenetv3.py b/torchvision/models/mobilenetv3.py index 787526cc1b3..465d4e58ac8 100644 --- a/torchvision/models/mobilenetv3.py +++ b/torchvision/models/mobilenetv3.py @@ -321,6 +321,7 @@ class MobileNet_V3_Large_Weights(WeightsEnum): "acc@1": 74.042, "acc@5": 91.340, }, + "_docs": """These weights were trained from scratch by using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -334,6 +335,11 @@ class MobileNet_V3_Large_Weights(WeightsEnum): "acc@1": 75.274, "acc@5": 92.566, }, + "_docs": """ + These weights improve marginally upon the results of the original paper by using a modified version of + TorchVision's `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V2 @@ -351,6 +357,9 @@ class MobileNet_V3_Small_Weights(WeightsEnum): "acc@1": 67.668, "acc@5": 87.402, }, + "_docs": """ + These weights improve upon the results of the original paper by using a simple training recipe. + """, }, ) DEFAULT = IMAGENET1K_V1 diff --git a/torchvision/models/regnet.py b/torchvision/models/regnet.py index 2376d13ffe8..be80c9acf35 100644 --- a/torchvision/models/regnet.py +++ b/torchvision/models/regnet.py @@ -426,6 +426,7 @@ class RegNet_Y_400MF_Weights(WeightsEnum): "acc@1": 74.046, "acc@5": 91.716, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -439,6 +440,11 @@ class RegNet_Y_400MF_Weights(WeightsEnum): "acc@1": 75.804, "acc@5": 92.742, }, + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V2 @@ -456,6 +462,7 @@ class RegNet_Y_800MF_Weights(WeightsEnum): "acc@1": 76.420, "acc@5": 93.136, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -469,6 +476,11 @@ class RegNet_Y_800MF_Weights(WeightsEnum): "acc@1": 78.828, "acc@5": 94.502, }, + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V2 @@ -486,6 +498,7 @@ class RegNet_Y_1_6GF_Weights(WeightsEnum): "acc@1": 77.950, "acc@5": 93.966, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -499,6 +512,11 @@ class RegNet_Y_1_6GF_Weights(WeightsEnum): "acc@1": 80.876, "acc@5": 95.444, }, + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V2 @@ -516,6 +534,7 @@ class RegNet_Y_3_2GF_Weights(WeightsEnum): "acc@1": 78.948, "acc@5": 94.576, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -529,6 +548,11 @@ class RegNet_Y_3_2GF_Weights(WeightsEnum): "acc@1": 81.982, "acc@5": 95.972, }, + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V2 @@ -546,6 +570,7 @@ class RegNet_Y_8GF_Weights(WeightsEnum): "acc@1": 80.032, "acc@5": 95.048, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -559,6 +584,11 @@ class RegNet_Y_8GF_Weights(WeightsEnum): "acc@1": 82.828, "acc@5": 96.330, }, + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V2 @@ -576,6 +606,7 @@ class RegNet_Y_16GF_Weights(WeightsEnum): "acc@1": 80.424, "acc@5": 95.240, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -589,6 +620,11 @@ class RegNet_Y_16GF_Weights(WeightsEnum): "acc@1": 82.886, "acc@5": 96.328, }, + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, }, ) IMAGENET1K_SWAG_E2E_V1 = Weights( @@ -603,6 +639,10 @@ class RegNet_Y_16GF_Weights(WeightsEnum): "acc@1": 86.012, "acc@5": 98.054, }, + "_docs": """ + These weights are learnt via transfer learning by end-to-end fine-tuning the original + `SWAG `_ weights on ImageNet-1K data. + """, }, ) IMAGENET1K_SWAG_LINEAR_V1 = Weights( @@ -618,6 +658,10 @@ class RegNet_Y_16GF_Weights(WeightsEnum): "acc@1": 83.976, "acc@5": 97.244, }, + "_docs": """ + These weights are composed of the original frozen `SWAG `_ trunk + weights and a linear classifier learnt on top of them trained on ImageNet-1K data. + """, }, ) DEFAULT = IMAGENET1K_V2 @@ -635,6 +679,7 @@ class RegNet_Y_32GF_Weights(WeightsEnum): "acc@1": 80.878, "acc@5": 95.340, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -648,6 +693,11 @@ class RegNet_Y_32GF_Weights(WeightsEnum): "acc@1": 83.368, "acc@5": 96.498, }, + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, }, ) IMAGENET1K_SWAG_E2E_V1 = Weights( @@ -662,6 +712,10 @@ class RegNet_Y_32GF_Weights(WeightsEnum): "acc@1": 86.838, "acc@5": 98.362, }, + "_docs": """ + These weights are learnt via transfer learning by end-to-end fine-tuning the original + `SWAG `_ weights on ImageNet-1K data. + """, }, ) IMAGENET1K_SWAG_LINEAR_V1 = Weights( @@ -677,6 +731,10 @@ class RegNet_Y_32GF_Weights(WeightsEnum): "acc@1": 84.622, "acc@5": 97.480, }, + "_docs": """ + These weights are composed of the original frozen `SWAG `_ trunk + weights and a linear classifier learnt on top of them trained on ImageNet-1K data. + """, }, ) DEFAULT = IMAGENET1K_V2 @@ -695,6 +753,10 @@ class RegNet_Y_128GF_Weights(WeightsEnum): "acc@1": 88.228, "acc@5": 98.682, }, + "_docs": """ + These weights are learnt via transfer learning by end-to-end fine-tuning the original + `SWAG `_ weights on ImageNet-1K data. + """, }, ) IMAGENET1K_SWAG_LINEAR_V1 = Weights( @@ -710,6 +772,10 @@ class RegNet_Y_128GF_Weights(WeightsEnum): "acc@1": 86.068, "acc@5": 97.844, }, + "_docs": """ + These weights are composed of the original frozen `SWAG `_ trunk + weights and a linear classifier learnt on top of them trained on ImageNet-1K data. + """, }, ) DEFAULT = IMAGENET1K_SWAG_E2E_V1 @@ -727,6 +793,7 @@ class RegNet_X_400MF_Weights(WeightsEnum): "acc@1": 72.834, "acc@5": 90.950, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -740,6 +807,11 @@ class RegNet_X_400MF_Weights(WeightsEnum): "acc@1": 74.864, "acc@5": 92.322, }, + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V2 @@ -757,6 +829,7 @@ class RegNet_X_800MF_Weights(WeightsEnum): "acc@1": 75.212, "acc@5": 92.348, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -770,6 +843,11 @@ class RegNet_X_800MF_Weights(WeightsEnum): "acc@1": 77.522, "acc@5": 93.826, }, + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V2 @@ -787,6 +865,7 @@ class RegNet_X_1_6GF_Weights(WeightsEnum): "acc@1": 77.040, "acc@5": 93.440, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -800,6 +879,11 @@ class RegNet_X_1_6GF_Weights(WeightsEnum): "acc@1": 79.668, "acc@5": 94.922, }, + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V2 @@ -817,6 +901,7 @@ class RegNet_X_3_2GF_Weights(WeightsEnum): "acc@1": 78.364, "acc@5": 93.992, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -830,6 +915,11 @@ class RegNet_X_3_2GF_Weights(WeightsEnum): "acc@1": 81.196, "acc@5": 95.430, }, + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V2 @@ -847,6 +937,7 @@ class RegNet_X_8GF_Weights(WeightsEnum): "acc@1": 79.344, "acc@5": 94.686, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -860,6 +951,11 @@ class RegNet_X_8GF_Weights(WeightsEnum): "acc@1": 81.682, "acc@5": 95.678, }, + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V2 @@ -877,6 +973,7 @@ class RegNet_X_16GF_Weights(WeightsEnum): "acc@1": 80.058, "acc@5": 94.944, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -890,6 +987,11 @@ class RegNet_X_16GF_Weights(WeightsEnum): "acc@1": 82.716, "acc@5": 96.196, }, + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V2 @@ -907,6 +1009,7 @@ class RegNet_X_32GF_Weights(WeightsEnum): "acc@1": 80.622, "acc@5": 95.248, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -920,6 +1023,11 @@ class RegNet_X_32GF_Weights(WeightsEnum): "acc@1": 83.014, "acc@5": 96.288, }, + "_docs": """ + These weights improve upon the results of the original paper by using a modified version of TorchVision's + `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V2 diff --git a/torchvision/models/resnet.py b/torchvision/models/resnet.py index e8ed33d5080..b46c07ef600 100644 --- a/torchvision/models/resnet.py +++ b/torchvision/models/resnet.py @@ -321,6 +321,7 @@ class ResNet18_Weights(WeightsEnum): "acc@1": 69.758, "acc@5": 89.078, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) DEFAULT = IMAGENET1K_V1 @@ -338,6 +339,7 @@ class ResNet34_Weights(WeightsEnum): "acc@1": 73.314, "acc@5": 91.420, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) DEFAULT = IMAGENET1K_V1 @@ -355,9 +357,7 @@ class ResNet50_Weights(WeightsEnum): "acc@1": 76.130, "acc@5": 92.862, }, - "_docs": """ - These are standard weights using the basic recipe of the paper. - """, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -372,8 +372,8 @@ class ResNet50_Weights(WeightsEnum): "acc@5": 95.434, }, "_docs": """ - These are improved weights, using TorchVision's `new recipe - `_. + These weights improve upon the results of the original paper by using TorchVision's `new training recipe + `_. """, }, ) @@ -392,6 +392,7 @@ class ResNet101_Weights(WeightsEnum): "acc@1": 77.374, "acc@5": 93.546, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -405,6 +406,10 @@ class ResNet101_Weights(WeightsEnum): "acc@1": 81.886, "acc@5": 95.780, }, + "_docs": """ + These weights improve upon the results of the original paper by using TorchVision's `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V2 @@ -422,6 +427,7 @@ class ResNet152_Weights(WeightsEnum): "acc@1": 78.312, "acc@5": 94.046, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -435,6 +441,10 @@ class ResNet152_Weights(WeightsEnum): "acc@1": 82.284, "acc@5": 96.002, }, + "_docs": """ + These weights improve upon the results of the original paper by using TorchVision's `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V2 @@ -452,6 +462,7 @@ class ResNeXt50_32X4D_Weights(WeightsEnum): "acc@1": 77.618, "acc@5": 93.698, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -465,6 +476,10 @@ class ResNeXt50_32X4D_Weights(WeightsEnum): "acc@1": 81.198, "acc@5": 95.340, }, + "_docs": """ + These weights improve upon the results of the original paper by using TorchVision's `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V2 @@ -482,6 +497,7 @@ class ResNeXt101_32X8D_Weights(WeightsEnum): "acc@1": 79.312, "acc@5": 94.526, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -495,6 +511,10 @@ class ResNeXt101_32X8D_Weights(WeightsEnum): "acc@1": 82.834, "acc@5": 96.228, }, + "_docs": """ + These weights improve upon the results of the original paper by using TorchVision's `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V2 @@ -509,10 +529,13 @@ class ResNeXt101_64X4D_Weights(WeightsEnum): "num_params": 83455272, "recipe": "https://github.com/pytorch/vision/pull/5935", "metrics": { - # Mock "acc@1": 83.246, "acc@5": 96.454, }, + "_docs": """ + These weights were trained from scratch by using TorchVision's `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V1 @@ -530,6 +553,7 @@ class Wide_ResNet50_2_Weights(WeightsEnum): "acc@1": 78.468, "acc@5": 94.086, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -543,6 +567,10 @@ class Wide_ResNet50_2_Weights(WeightsEnum): "acc@1": 81.602, "acc@5": 95.758, }, + "_docs": """ + These weights improve upon the results of the original paper by using TorchVision's `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V2 @@ -560,6 +588,7 @@ class Wide_ResNet101_2_Weights(WeightsEnum): "acc@1": 78.848, "acc@5": 94.284, }, + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", }, ) IMAGENET1K_V2 = Weights( @@ -573,6 +602,10 @@ class Wide_ResNet101_2_Weights(WeightsEnum): "acc@1": 82.510, "acc@5": 96.020, }, + "_docs": """ + These weights improve upon the results of the original paper by using TorchVision's `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V2 diff --git a/torchvision/models/shufflenetv2.py b/torchvision/models/shufflenetv2.py index 3a314c867ca..4e163573655 100644 --- a/torchvision/models/shufflenetv2.py +++ b/torchvision/models/shufflenetv2.py @@ -186,12 +186,13 @@ def _shufflenetv2( _COMMON_META = { "min_size": (1, 1), "categories": _IMAGENET_CATEGORIES, - "recipe": "https://github.com/barrh/Shufflenet-v2-Pytorch/tree/v0.1.0", + "recipe": "https://github.com/ericsun99/Shufflenet-v2-Pytorch", } class ShuffleNet_V2_X0_5_Weights(WeightsEnum): IMAGENET1K_V1 = Weights( + # Weights ported from https://github.com/ericsun99/Shufflenet-v2-Pytorch url="https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth", transforms=partial(ImageClassification, crop_size=224), meta={ @@ -201,6 +202,7 @@ class ShuffleNet_V2_X0_5_Weights(WeightsEnum): "acc@1": 60.552, "acc@5": 81.746, }, + "_docs": """These weights were trained from scratch to reproduce closely the results of the paper.""", }, ) DEFAULT = IMAGENET1K_V1 @@ -208,6 +210,7 @@ class ShuffleNet_V2_X0_5_Weights(WeightsEnum): class ShuffleNet_V2_X1_0_Weights(WeightsEnum): IMAGENET1K_V1 = Weights( + # Weights ported from https://github.com/ericsun99/Shufflenet-v2-Pytorch url="https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth", transforms=partial(ImageClassification, crop_size=224), meta={ @@ -217,6 +220,7 @@ class ShuffleNet_V2_X1_0_Weights(WeightsEnum): "acc@1": 69.362, "acc@5": 88.316, }, + "_docs": """These weights were trained from scratch to reproduce closely the results of the paper.""", }, ) DEFAULT = IMAGENET1K_V1 @@ -234,6 +238,10 @@ class ShuffleNet_V2_X1_5_Weights(WeightsEnum): "acc@1": 72.996, "acc@5": 91.086, }, + "_docs": """ + These weights were trained from scratch by using TorchVision's `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V1 @@ -251,6 +259,10 @@ class ShuffleNet_V2_X2_0_Weights(WeightsEnum): "acc@1": 76.230, "acc@5": 93.006, }, + "_docs": """ + These weights were trained from scratch by using TorchVision's `new training recipe + `_. + """, }, ) DEFAULT = IMAGENET1K_V1 diff --git a/torchvision/models/squeezenet.py b/torchvision/models/squeezenet.py index dd474cbe7f7..a93a06fc80d 100644 --- a/torchvision/models/squeezenet.py +++ b/torchvision/models/squeezenet.py @@ -117,6 +117,7 @@ def _squeezenet( _COMMON_META = { "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/pull/49#issuecomment-277560717", + "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""", } diff --git a/torchvision/models/swin_transformer.py b/torchvision/models/swin_transformer.py index 455397c8403..f60db5096a7 100644 --- a/torchvision/models/swin_transformer.py +++ b/torchvision/models/swin_transformer.py @@ -416,11 +416,12 @@ class Swin_T_Weights(WeightsEnum): **_COMMON_META, "num_params": 28288354, "min_size": (224, 224), - "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#swin_t", + "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#swintransformer", "metrics": { "acc@1": 81.358, "acc@5": 95.526, }, + "_docs": """These weights reproduce closely the results of the paper using its training recipe.""", }, ) DEFAULT = IMAGENET1K_V1 diff --git a/torchvision/models/vgg.py b/torchvision/models/vgg.py index 6b82f90d58b..9fb77d9bf97 100644 --- a/torchvision/models/vgg.py +++ b/torchvision/models/vgg.py @@ -110,6 +110,7 @@ def _vgg(cfg: str, batch_norm: bool, weights: Optional[WeightsEnum], progress: b "min_size": (32, 32), "categories": _IMAGENET_CATEGORIES, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#alexnet-and-vgg", + "_docs": """These weights were trained from scratch by using a simplified training recipe.""", } @@ -190,10 +191,8 @@ class VGG16_Weights(WeightsEnum): }, }, ) - # We port the features of a VGG16 backbone trained by amdegroot because unlike the one on TorchVision, it uses the - # same input standardization method as the paper. Only the `features` weights have proper values, those on the - # `classifier` module are filled with nans. IMAGENET1K_FEATURES = Weights( + # Weights ported from https://github.com/amdegroot/ssd.pytorch/ url="https://download.pytorch.org/models/vgg16_features-amdegroot-88682ab5.pth", transforms=partial( ImageClassification, @@ -210,6 +209,11 @@ class VGG16_Weights(WeightsEnum): "acc@1": float("nan"), "acc@5": float("nan"), }, + "_docs": """ + These weights can't be used for classification because they are missing values in the `classifier` + module. Only the `features` module has valid values and can be used for feature extraction. The weights + were trained using the original input standardization method as described in the paper. + """, }, ) DEFAULT = IMAGENET1K_V1 diff --git a/torchvision/models/vision_transformer.py b/torchvision/models/vision_transformer.py index e672721be65..dc54b8735f0 100644 --- a/torchvision/models/vision_transformer.py +++ b/torchvision/models/vision_transformer.py @@ -332,6 +332,10 @@ class ViT_B_16_Weights(WeightsEnum): "acc@1": 81.072, "acc@5": 95.318, }, + "_docs": """ + These weights were trained from scratch by using a modified version of `DeIT + `_'s training recipe. + """, }, ) IMAGENET1K_SWAG_E2E_V1 = Weights( @@ -350,6 +354,10 @@ class ViT_B_16_Weights(WeightsEnum): "acc@1": 85.304, "acc@5": 97.650, }, + "_docs": """ + These weights are learnt via transfer learning by end-to-end fine-tuning the original + `SWAG `_ weights on ImageNet-1K data. + """, }, ) IMAGENET1K_SWAG_LINEAR_V1 = Weights( @@ -369,6 +377,10 @@ class ViT_B_16_Weights(WeightsEnum): "acc@1": 81.886, "acc@5": 96.180, }, + "_docs": """ + These weights are composed of the original frozen `SWAG `_ trunk + weights and a linear classifier learnt on top of them trained on ImageNet-1K data. + """, }, ) DEFAULT = IMAGENET1K_V1 @@ -387,6 +399,10 @@ class ViT_B_32_Weights(WeightsEnum): "acc@1": 75.912, "acc@5": 92.466, }, + "_docs": """ + These weights were trained from scratch by using a modified version of `DeIT + `_'s training recipe. + """, }, ) DEFAULT = IMAGENET1K_V1 @@ -405,6 +421,11 @@ class ViT_L_16_Weights(WeightsEnum): "acc@1": 79.662, "acc@5": 94.638, }, + "_docs": """ + These weights were trained from scratch by using a modified version of TorchVision's + `new training recipe + `_. + """, }, ) IMAGENET1K_SWAG_E2E_V1 = Weights( @@ -423,6 +444,10 @@ class ViT_L_16_Weights(WeightsEnum): "acc@1": 88.064, "acc@5": 98.512, }, + "_docs": """ + These weights are learnt via transfer learning by end-to-end fine-tuning the original + `SWAG `_ weights on ImageNet-1K data. + """, }, ) IMAGENET1K_SWAG_LINEAR_V1 = Weights( @@ -442,6 +467,10 @@ class ViT_L_16_Weights(WeightsEnum): "acc@1": 85.146, "acc@5": 97.422, }, + "_docs": """ + These weights are composed of the original frozen `SWAG `_ trunk + weights and a linear classifier learnt on top of them trained on ImageNet-1K data. + """, }, ) DEFAULT = IMAGENET1K_V1 @@ -460,6 +489,10 @@ class ViT_L_32_Weights(WeightsEnum): "acc@1": 76.972, "acc@5": 93.07, }, + "_docs": """ + These weights were trained from scratch by using a modified version of `DeIT + `_'s training recipe. + """, }, ) DEFAULT = IMAGENET1K_V1 @@ -482,6 +515,10 @@ class ViT_H_14_Weights(WeightsEnum): "acc@1": 88.552, "acc@5": 98.694, }, + "_docs": """ + These weights are learnt via transfer learning by end-to-end fine-tuning the original + `SWAG `_ weights on ImageNet-1K data. + """, }, ) IMAGENET1K_SWAG_LINEAR_V1 = Weights( @@ -501,6 +538,10 @@ class ViT_H_14_Weights(WeightsEnum): "acc@1": 85.708, "acc@5": 97.730, }, + "_docs": """ + These weights are composed of the original frozen `SWAG `_ trunk + weights and a linear classifier learnt on top of them trained on ImageNet-1K data. + """, }, ) DEFAULT = IMAGENET1K_SWAG_E2E_V1