diff --git a/docs/source/conf.py b/docs/source/conf.py
index bedef5a5215..db7b2ef14a2 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -335,8 +335,6 @@ def inject_weight_metadata(app, what, name, obj, options, lines):
for field in obj:
lines += [f"**{str(field)}**:", ""]
- if field == obj.DEFAULT:
- lines += [f"This weight is also available as ``{obj.__name__}.DEFAULT``.", ""]
table = []
@@ -349,7 +347,12 @@ def inject_weight_metadata(app, what, name, obj, options, lines):
custom_docs = meta_with_metrics.pop("_docs", None) # Custom per-Weights docs
if custom_docs is not None:
- lines += [custom_docs, ""]
+ lines += [custom_docs]
+
+ if field == obj.DEFAULT:
+ lines += [f"Also available as ``{obj.__name__}.DEFAULT``."]
+
+ lines += [""]
for k, v in meta_with_metrics.items():
if k in {"recipe", "license"}:
@@ -367,8 +370,8 @@ def inject_weight_metadata(app, what, name, obj, options, lines):
lines += textwrap.indent(table, " " * 4).split("\n")
lines.append("")
lines.append(
- f"The preprocessing/inference transforms are available at ``{str(field)}.transforms`` and "
- f"perform the following operations: {field.transforms().describe()}"
+ f"The inference transforms are available at ``{str(field)}.transforms`` and "
+ f"perform the following preprocessing operations: {field.transforms().describe()}"
)
lines.append("")
diff --git a/test/test_extended_models.py b/test/test_extended_models.py
index a39ca62ca78..e3f79e28af4 100644
--- a/test/test_extended_models.py
+++ b/test/test_extended_models.py
@@ -96,7 +96,7 @@ def test_schema_meta_validation(model_fn):
classification_fields = {"categories", ("metrics", "acc@1"), ("metrics", "acc@5")}
defaults = {
"all": {"metrics", "min_size", "num_params", "recipe"},
- "models": classification_fields,
+ "models": classification_fields | {"_docs"},
"detection": {"categories", ("metrics", "box_map")},
"quantization": classification_fields | {"backend", "unquantized"},
"segmentation": {"categories", ("metrics", "miou"), ("metrics", "pixel_acc")},
diff --git a/torchvision/models/alexnet.py b/torchvision/models/alexnet.py
index dff0bbad1a4..733928bb5e3 100644
--- a/torchvision/models/alexnet.py
+++ b/torchvision/models/alexnet.py
@@ -65,6 +65,9 @@ class AlexNet_Weights(WeightsEnum):
"acc@1": 56.522,
"acc@5": 79.066,
},
+ "_docs": """
+ These weights reproduce closely the results of the paper using a simplified training recipe.
+ """,
},
)
DEFAULT = IMAGENET1K_V1
diff --git a/torchvision/models/convnext.py b/torchvision/models/convnext.py
index f2c7ee1e03a..f9aa8d3b1c9 100644
--- a/torchvision/models/convnext.py
+++ b/torchvision/models/convnext.py
@@ -207,6 +207,11 @@ def _convnext(
"min_size": (32, 32),
"categories": _IMAGENET_CATEGORIES,
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#convnext",
+ "_docs": """
+ These weights improve upon the results of the original paper by using a modified version of TorchVision's
+ `new training recipe
+ `_.
+ """,
}
diff --git a/torchvision/models/densenet.py b/torchvision/models/densenet.py
index 47447991f0e..fc354a0c437 100644
--- a/torchvision/models/densenet.py
+++ b/torchvision/models/densenet.py
@@ -261,6 +261,7 @@ def _densenet(
"min_size": (29, 29),
"categories": _IMAGENET_CATEGORIES,
"recipe": "https://github.com/pytorch/vision/pull/116",
+ "_docs": """These weights are ported from LuaTorch.""",
}
diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py
index be166140209..4b911dbfaba 100644
--- a/torchvision/models/efficientnet.py
+++ b/torchvision/models/efficientnet.py
@@ -431,24 +431,26 @@ def _efficientnet_conf(
_COMMON_META: Dict[str, Any] = {
"categories": _IMAGENET_CATEGORIES,
- "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#efficientnet",
}
_COMMON_META_V1 = {
**_COMMON_META,
"min_size": (1, 1),
+ "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#efficientnet-v1",
}
_COMMON_META_V2 = {
**_COMMON_META,
"min_size": (33, 33),
+ "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#efficientnet-v2",
}
class EfficientNet_B0_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
+ # Weights ported from https://github.com/rwightman/pytorch-image-models/
url="https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth",
transforms=partial(
ImageClassification, crop_size=224, resize_size=256, interpolation=InterpolationMode.BICUBIC
@@ -460,6 +462,7 @@ class EfficientNet_B0_Weights(WeightsEnum):
"acc@1": 77.692,
"acc@5": 93.532,
},
+ "_docs": """These weights are ported from the original paper.""",
},
)
DEFAULT = IMAGENET1K_V1
@@ -467,6 +470,7 @@ class EfficientNet_B0_Weights(WeightsEnum):
class EfficientNet_B1_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
+ # Weights ported from https://github.com/rwightman/pytorch-image-models/
url="https://download.pytorch.org/models/efficientnet_b1_rwightman-533bc792.pth",
transforms=partial(
ImageClassification, crop_size=240, resize_size=256, interpolation=InterpolationMode.BICUBIC
@@ -478,6 +482,7 @@ class EfficientNet_B1_Weights(WeightsEnum):
"acc@1": 78.642,
"acc@5": 94.186,
},
+ "_docs": """These weights are ported from the original paper.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -493,6 +498,11 @@ class EfficientNet_B1_Weights(WeightsEnum):
"acc@1": 79.838,
"acc@5": 94.934,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using a modified version of TorchVision's
+ `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
@@ -500,6 +510,7 @@ class EfficientNet_B1_Weights(WeightsEnum):
class EfficientNet_B2_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
+ # Weights ported from https://github.com/rwightman/pytorch-image-models/
url="https://download.pytorch.org/models/efficientnet_b2_rwightman-bcdf34b7.pth",
transforms=partial(
ImageClassification, crop_size=288, resize_size=288, interpolation=InterpolationMode.BICUBIC
@@ -511,6 +522,7 @@ class EfficientNet_B2_Weights(WeightsEnum):
"acc@1": 80.608,
"acc@5": 95.310,
},
+ "_docs": """These weights are ported from the original paper.""",
},
)
DEFAULT = IMAGENET1K_V1
@@ -518,6 +530,7 @@ class EfficientNet_B2_Weights(WeightsEnum):
class EfficientNet_B3_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
+ # Weights ported from https://github.com/rwightman/pytorch-image-models/
url="https://download.pytorch.org/models/efficientnet_b3_rwightman-cf984f9c.pth",
transforms=partial(
ImageClassification, crop_size=300, resize_size=320, interpolation=InterpolationMode.BICUBIC
@@ -529,6 +542,7 @@ class EfficientNet_B3_Weights(WeightsEnum):
"acc@1": 82.008,
"acc@5": 96.054,
},
+ "_docs": """These weights are ported from the original paper.""",
},
)
DEFAULT = IMAGENET1K_V1
@@ -536,6 +550,7 @@ class EfficientNet_B3_Weights(WeightsEnum):
class EfficientNet_B4_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
+ # Weights ported from https://github.com/rwightman/pytorch-image-models/
url="https://download.pytorch.org/models/efficientnet_b4_rwightman-7eb33cd5.pth",
transforms=partial(
ImageClassification, crop_size=380, resize_size=384, interpolation=InterpolationMode.BICUBIC
@@ -547,6 +562,7 @@ class EfficientNet_B4_Weights(WeightsEnum):
"acc@1": 83.384,
"acc@5": 96.594,
},
+ "_docs": """These weights are ported from the original paper.""",
},
)
DEFAULT = IMAGENET1K_V1
@@ -554,6 +570,7 @@ class EfficientNet_B4_Weights(WeightsEnum):
class EfficientNet_B5_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
+ # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/
url="https://download.pytorch.org/models/efficientnet_b5_lukemelas-b6417697.pth",
transforms=partial(
ImageClassification, crop_size=456, resize_size=456, interpolation=InterpolationMode.BICUBIC
@@ -565,6 +582,7 @@ class EfficientNet_B5_Weights(WeightsEnum):
"acc@1": 83.444,
"acc@5": 96.628,
},
+ "_docs": """These weights are ported from the original paper.""",
},
)
DEFAULT = IMAGENET1K_V1
@@ -572,6 +590,7 @@ class EfficientNet_B5_Weights(WeightsEnum):
class EfficientNet_B6_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
+ # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/
url="https://download.pytorch.org/models/efficientnet_b6_lukemelas-c76e70fd.pth",
transforms=partial(
ImageClassification, crop_size=528, resize_size=528, interpolation=InterpolationMode.BICUBIC
@@ -583,6 +602,7 @@ class EfficientNet_B6_Weights(WeightsEnum):
"acc@1": 84.008,
"acc@5": 96.916,
},
+ "_docs": """These weights are ported from the original paper.""",
},
)
DEFAULT = IMAGENET1K_V1
@@ -590,6 +610,7 @@ class EfficientNet_B6_Weights(WeightsEnum):
class EfficientNet_B7_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
+ # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/
url="https://download.pytorch.org/models/efficientnet_b7_lukemelas-dcc49843.pth",
transforms=partial(
ImageClassification, crop_size=600, resize_size=600, interpolation=InterpolationMode.BICUBIC
@@ -601,6 +622,7 @@ class EfficientNet_B7_Weights(WeightsEnum):
"acc@1": 84.122,
"acc@5": 96.908,
},
+ "_docs": """These weights are ported from the original paper.""",
},
)
DEFAULT = IMAGENET1K_V1
@@ -622,6 +644,11 @@ class EfficientNet_V2_S_Weights(WeightsEnum):
"acc@1": 84.228,
"acc@5": 96.878,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using a modified version of TorchVision's
+ `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V1
@@ -643,12 +670,18 @@ class EfficientNet_V2_M_Weights(WeightsEnum):
"acc@1": 85.112,
"acc@5": 97.156,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using a modified version of TorchVision's
+ `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V1
class EfficientNet_V2_L_Weights(WeightsEnum):
+ # Weights ported from https://github.com/google/automl/tree/master/efficientnetv2
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/efficientnet_v2_l-59c71312.pth",
transforms=partial(
@@ -666,6 +699,7 @@ class EfficientNet_V2_L_Weights(WeightsEnum):
"acc@1": 85.808,
"acc@5": 97.788,
},
+ "_docs": """These weights are ported from the original paper.""",
},
)
DEFAULT = IMAGENET1K_V1
@@ -1036,13 +1070,11 @@ def efficientnet_v2_l(
model_urls = _ModelURLs(
{
- # Weights ported from https://github.com/rwightman/pytorch-image-models/
"efficientnet_b0": EfficientNet_B0_Weights.IMAGENET1K_V1.url,
"efficientnet_b1": EfficientNet_B1_Weights.IMAGENET1K_V1.url,
"efficientnet_b2": EfficientNet_B2_Weights.IMAGENET1K_V1.url,
"efficientnet_b3": EfficientNet_B3_Weights.IMAGENET1K_V1.url,
"efficientnet_b4": EfficientNet_B4_Weights.IMAGENET1K_V1.url,
- # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/
"efficientnet_b5": EfficientNet_B5_Weights.IMAGENET1K_V1.url,
"efficientnet_b6": EfficientNet_B6_Weights.IMAGENET1K_V1.url,
"efficientnet_b7": EfficientNet_B7_Weights.IMAGENET1K_V1.url,
diff --git a/torchvision/models/googlenet.py b/torchvision/models/googlenet.py
index 755740abc11..94923dd2a48 100644
--- a/torchvision/models/googlenet.py
+++ b/torchvision/models/googlenet.py
@@ -288,6 +288,7 @@ class GoogLeNet_Weights(WeightsEnum):
"acc@1": 69.778,
"acc@5": 89.530,
},
+ "_docs": """These weights are ported from the original paper.""",
},
)
DEFAULT = IMAGENET1K_V1
diff --git a/torchvision/models/inception.py b/torchvision/models/inception.py
index 0abd195742c..b034d3aa79a 100644
--- a/torchvision/models/inception.py
+++ b/torchvision/models/inception.py
@@ -420,6 +420,7 @@ class Inception_V3_Weights(WeightsEnum):
"acc@1": 77.294,
"acc@5": 93.450,
},
+ "_docs": """These weights are ported from the original paper.""",
},
)
DEFAULT = IMAGENET1K_V1
diff --git a/torchvision/models/mnasnet.py b/torchvision/models/mnasnet.py
index b1da02f4697..eb26a7ea5c0 100644
--- a/torchvision/models/mnasnet.py
+++ b/torchvision/models/mnasnet.py
@@ -229,6 +229,7 @@ class MNASNet0_5_Weights(WeightsEnum):
"acc@1": 67.734,
"acc@5": 87.490,
},
+ "_docs": """These weights reproduce closely the results of the paper.""",
},
)
DEFAULT = IMAGENET1K_V1
@@ -246,6 +247,10 @@ class MNASNet0_75_Weights(WeightsEnum):
"acc@1": 71.180,
"acc@5": 90.496,
},
+ "_docs": """
+ These weights were trained from scratch by using TorchVision's `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V1
@@ -262,6 +267,7 @@ class MNASNet1_0_Weights(WeightsEnum):
"acc@1": 73.456,
"acc@5": 91.510,
},
+ "_docs": """These weights reproduce closely the results of the paper.""",
},
)
DEFAULT = IMAGENET1K_V1
@@ -279,6 +285,10 @@ class MNASNet1_3_Weights(WeightsEnum):
"acc@1": 76.506,
"acc@5": 93.522,
},
+ "_docs": """
+ These weights were trained from scratch by using TorchVision's `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V1
diff --git a/torchvision/models/mobilenetv2.py b/torchvision/models/mobilenetv2.py
index d63ecb01f38..b27d305dc82 100644
--- a/torchvision/models/mobilenetv2.py
+++ b/torchvision/models/mobilenetv2.py
@@ -212,6 +212,7 @@ class MobileNet_V2_Weights(WeightsEnum):
"acc@1": 71.878,
"acc@5": 90.286,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -224,6 +225,11 @@ class MobileNet_V2_Weights(WeightsEnum):
"acc@1": 72.154,
"acc@5": 90.822,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using a modified version of TorchVision's
+ `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
diff --git a/torchvision/models/mobilenetv3.py b/torchvision/models/mobilenetv3.py
index 787526cc1b3..465d4e58ac8 100644
--- a/torchvision/models/mobilenetv3.py
+++ b/torchvision/models/mobilenetv3.py
@@ -321,6 +321,7 @@ class MobileNet_V3_Large_Weights(WeightsEnum):
"acc@1": 74.042,
"acc@5": 91.340,
},
+ "_docs": """These weights were trained from scratch by using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -334,6 +335,11 @@ class MobileNet_V3_Large_Weights(WeightsEnum):
"acc@1": 75.274,
"acc@5": 92.566,
},
+ "_docs": """
+ These weights improve marginally upon the results of the original paper by using a modified version of
+ TorchVision's `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
@@ -351,6 +357,9 @@ class MobileNet_V3_Small_Weights(WeightsEnum):
"acc@1": 67.668,
"acc@5": 87.402,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using a simple training recipe.
+ """,
},
)
DEFAULT = IMAGENET1K_V1
diff --git a/torchvision/models/regnet.py b/torchvision/models/regnet.py
index 2376d13ffe8..be80c9acf35 100644
--- a/torchvision/models/regnet.py
+++ b/torchvision/models/regnet.py
@@ -426,6 +426,7 @@ class RegNet_Y_400MF_Weights(WeightsEnum):
"acc@1": 74.046,
"acc@5": 91.716,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -439,6 +440,11 @@ class RegNet_Y_400MF_Weights(WeightsEnum):
"acc@1": 75.804,
"acc@5": 92.742,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using a modified version of TorchVision's
+ `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
@@ -456,6 +462,7 @@ class RegNet_Y_800MF_Weights(WeightsEnum):
"acc@1": 76.420,
"acc@5": 93.136,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -469,6 +476,11 @@ class RegNet_Y_800MF_Weights(WeightsEnum):
"acc@1": 78.828,
"acc@5": 94.502,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using a modified version of TorchVision's
+ `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
@@ -486,6 +498,7 @@ class RegNet_Y_1_6GF_Weights(WeightsEnum):
"acc@1": 77.950,
"acc@5": 93.966,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -499,6 +512,11 @@ class RegNet_Y_1_6GF_Weights(WeightsEnum):
"acc@1": 80.876,
"acc@5": 95.444,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using a modified version of TorchVision's
+ `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
@@ -516,6 +534,7 @@ class RegNet_Y_3_2GF_Weights(WeightsEnum):
"acc@1": 78.948,
"acc@5": 94.576,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -529,6 +548,11 @@ class RegNet_Y_3_2GF_Weights(WeightsEnum):
"acc@1": 81.982,
"acc@5": 95.972,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using a modified version of TorchVision's
+ `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
@@ -546,6 +570,7 @@ class RegNet_Y_8GF_Weights(WeightsEnum):
"acc@1": 80.032,
"acc@5": 95.048,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -559,6 +584,11 @@ class RegNet_Y_8GF_Weights(WeightsEnum):
"acc@1": 82.828,
"acc@5": 96.330,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using a modified version of TorchVision's
+ `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
@@ -576,6 +606,7 @@ class RegNet_Y_16GF_Weights(WeightsEnum):
"acc@1": 80.424,
"acc@5": 95.240,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -589,6 +620,11 @@ class RegNet_Y_16GF_Weights(WeightsEnum):
"acc@1": 82.886,
"acc@5": 96.328,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using a modified version of TorchVision's
+ `new training recipe
+ `_.
+ """,
},
)
IMAGENET1K_SWAG_E2E_V1 = Weights(
@@ -603,6 +639,10 @@ class RegNet_Y_16GF_Weights(WeightsEnum):
"acc@1": 86.012,
"acc@5": 98.054,
},
+ "_docs": """
+ These weights are learnt via transfer learning by end-to-end fine-tuning the original
+ `SWAG `_ weights on ImageNet-1K data.
+ """,
},
)
IMAGENET1K_SWAG_LINEAR_V1 = Weights(
@@ -618,6 +658,10 @@ class RegNet_Y_16GF_Weights(WeightsEnum):
"acc@1": 83.976,
"acc@5": 97.244,
},
+ "_docs": """
+ These weights are composed of the original frozen `SWAG `_ trunk
+ weights and a linear classifier learnt on top of them trained on ImageNet-1K data.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
@@ -635,6 +679,7 @@ class RegNet_Y_32GF_Weights(WeightsEnum):
"acc@1": 80.878,
"acc@5": 95.340,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -648,6 +693,11 @@ class RegNet_Y_32GF_Weights(WeightsEnum):
"acc@1": 83.368,
"acc@5": 96.498,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using a modified version of TorchVision's
+ `new training recipe
+ `_.
+ """,
},
)
IMAGENET1K_SWAG_E2E_V1 = Weights(
@@ -662,6 +712,10 @@ class RegNet_Y_32GF_Weights(WeightsEnum):
"acc@1": 86.838,
"acc@5": 98.362,
},
+ "_docs": """
+ These weights are learnt via transfer learning by end-to-end fine-tuning the original
+ `SWAG `_ weights on ImageNet-1K data.
+ """,
},
)
IMAGENET1K_SWAG_LINEAR_V1 = Weights(
@@ -677,6 +731,10 @@ class RegNet_Y_32GF_Weights(WeightsEnum):
"acc@1": 84.622,
"acc@5": 97.480,
},
+ "_docs": """
+ These weights are composed of the original frozen `SWAG `_ trunk
+ weights and a linear classifier learnt on top of them trained on ImageNet-1K data.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
@@ -695,6 +753,10 @@ class RegNet_Y_128GF_Weights(WeightsEnum):
"acc@1": 88.228,
"acc@5": 98.682,
},
+ "_docs": """
+ These weights are learnt via transfer learning by end-to-end fine-tuning the original
+ `SWAG `_ weights on ImageNet-1K data.
+ """,
},
)
IMAGENET1K_SWAG_LINEAR_V1 = Weights(
@@ -710,6 +772,10 @@ class RegNet_Y_128GF_Weights(WeightsEnum):
"acc@1": 86.068,
"acc@5": 97.844,
},
+ "_docs": """
+ These weights are composed of the original frozen `SWAG `_ trunk
+ weights and a linear classifier learnt on top of them trained on ImageNet-1K data.
+ """,
},
)
DEFAULT = IMAGENET1K_SWAG_E2E_V1
@@ -727,6 +793,7 @@ class RegNet_X_400MF_Weights(WeightsEnum):
"acc@1": 72.834,
"acc@5": 90.950,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -740,6 +807,11 @@ class RegNet_X_400MF_Weights(WeightsEnum):
"acc@1": 74.864,
"acc@5": 92.322,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using a modified version of TorchVision's
+ `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
@@ -757,6 +829,7 @@ class RegNet_X_800MF_Weights(WeightsEnum):
"acc@1": 75.212,
"acc@5": 92.348,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -770,6 +843,11 @@ class RegNet_X_800MF_Weights(WeightsEnum):
"acc@1": 77.522,
"acc@5": 93.826,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using a modified version of TorchVision's
+ `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
@@ -787,6 +865,7 @@ class RegNet_X_1_6GF_Weights(WeightsEnum):
"acc@1": 77.040,
"acc@5": 93.440,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -800,6 +879,11 @@ class RegNet_X_1_6GF_Weights(WeightsEnum):
"acc@1": 79.668,
"acc@5": 94.922,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using a modified version of TorchVision's
+ `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
@@ -817,6 +901,7 @@ class RegNet_X_3_2GF_Weights(WeightsEnum):
"acc@1": 78.364,
"acc@5": 93.992,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -830,6 +915,11 @@ class RegNet_X_3_2GF_Weights(WeightsEnum):
"acc@1": 81.196,
"acc@5": 95.430,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using a modified version of TorchVision's
+ `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
@@ -847,6 +937,7 @@ class RegNet_X_8GF_Weights(WeightsEnum):
"acc@1": 79.344,
"acc@5": 94.686,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -860,6 +951,11 @@ class RegNet_X_8GF_Weights(WeightsEnum):
"acc@1": 81.682,
"acc@5": 95.678,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using a modified version of TorchVision's
+ `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
@@ -877,6 +973,7 @@ class RegNet_X_16GF_Weights(WeightsEnum):
"acc@1": 80.058,
"acc@5": 94.944,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -890,6 +987,11 @@ class RegNet_X_16GF_Weights(WeightsEnum):
"acc@1": 82.716,
"acc@5": 96.196,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using a modified version of TorchVision's
+ `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
@@ -907,6 +1009,7 @@ class RegNet_X_32GF_Weights(WeightsEnum):
"acc@1": 80.622,
"acc@5": 95.248,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -920,6 +1023,11 @@ class RegNet_X_32GF_Weights(WeightsEnum):
"acc@1": 83.014,
"acc@5": 96.288,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using a modified version of TorchVision's
+ `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
diff --git a/torchvision/models/resnet.py b/torchvision/models/resnet.py
index e8ed33d5080..b46c07ef600 100644
--- a/torchvision/models/resnet.py
+++ b/torchvision/models/resnet.py
@@ -321,6 +321,7 @@ class ResNet18_Weights(WeightsEnum):
"acc@1": 69.758,
"acc@5": 89.078,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
DEFAULT = IMAGENET1K_V1
@@ -338,6 +339,7 @@ class ResNet34_Weights(WeightsEnum):
"acc@1": 73.314,
"acc@5": 91.420,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
DEFAULT = IMAGENET1K_V1
@@ -355,9 +357,7 @@ class ResNet50_Weights(WeightsEnum):
"acc@1": 76.130,
"acc@5": 92.862,
},
- "_docs": """
- These are standard weights using the basic recipe of the paper.
- """,
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -372,8 +372,8 @@ class ResNet50_Weights(WeightsEnum):
"acc@5": 95.434,
},
"_docs": """
- These are improved weights, using TorchVision's `new recipe
- `_.
+ These weights improve upon the results of the original paper by using TorchVision's `new training recipe
+ `_.
""",
},
)
@@ -392,6 +392,7 @@ class ResNet101_Weights(WeightsEnum):
"acc@1": 77.374,
"acc@5": 93.546,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -405,6 +406,10 @@ class ResNet101_Weights(WeightsEnum):
"acc@1": 81.886,
"acc@5": 95.780,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using TorchVision's `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
@@ -422,6 +427,7 @@ class ResNet152_Weights(WeightsEnum):
"acc@1": 78.312,
"acc@5": 94.046,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -435,6 +441,10 @@ class ResNet152_Weights(WeightsEnum):
"acc@1": 82.284,
"acc@5": 96.002,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using TorchVision's `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
@@ -452,6 +462,7 @@ class ResNeXt50_32X4D_Weights(WeightsEnum):
"acc@1": 77.618,
"acc@5": 93.698,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -465,6 +476,10 @@ class ResNeXt50_32X4D_Weights(WeightsEnum):
"acc@1": 81.198,
"acc@5": 95.340,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using TorchVision's `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
@@ -482,6 +497,7 @@ class ResNeXt101_32X8D_Weights(WeightsEnum):
"acc@1": 79.312,
"acc@5": 94.526,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -495,6 +511,10 @@ class ResNeXt101_32X8D_Weights(WeightsEnum):
"acc@1": 82.834,
"acc@5": 96.228,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using TorchVision's `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
@@ -509,10 +529,13 @@ class ResNeXt101_64X4D_Weights(WeightsEnum):
"num_params": 83455272,
"recipe": "https://github.com/pytorch/vision/pull/5935",
"metrics": {
- # Mock
"acc@1": 83.246,
"acc@5": 96.454,
},
+ "_docs": """
+ These weights were trained from scratch by using TorchVision's `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V1
@@ -530,6 +553,7 @@ class Wide_ResNet50_2_Weights(WeightsEnum):
"acc@1": 78.468,
"acc@5": 94.086,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -543,6 +567,10 @@ class Wide_ResNet50_2_Weights(WeightsEnum):
"acc@1": 81.602,
"acc@5": 95.758,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using TorchVision's `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
@@ -560,6 +588,7 @@ class Wide_ResNet101_2_Weights(WeightsEnum):
"acc@1": 78.848,
"acc@5": 94.284,
},
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
@@ -573,6 +602,10 @@ class Wide_ResNet101_2_Weights(WeightsEnum):
"acc@1": 82.510,
"acc@5": 96.020,
},
+ "_docs": """
+ These weights improve upon the results of the original paper by using TorchVision's `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V2
diff --git a/torchvision/models/shufflenetv2.py b/torchvision/models/shufflenetv2.py
index 3a314c867ca..4e163573655 100644
--- a/torchvision/models/shufflenetv2.py
+++ b/torchvision/models/shufflenetv2.py
@@ -186,12 +186,13 @@ def _shufflenetv2(
_COMMON_META = {
"min_size": (1, 1),
"categories": _IMAGENET_CATEGORIES,
- "recipe": "https://github.com/barrh/Shufflenet-v2-Pytorch/tree/v0.1.0",
+ "recipe": "https://github.com/ericsun99/Shufflenet-v2-Pytorch",
}
class ShuffleNet_V2_X0_5_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
+ # Weights ported from https://github.com/ericsun99/Shufflenet-v2-Pytorch
url="https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
@@ -201,6 +202,7 @@ class ShuffleNet_V2_X0_5_Weights(WeightsEnum):
"acc@1": 60.552,
"acc@5": 81.746,
},
+ "_docs": """These weights were trained from scratch to reproduce closely the results of the paper.""",
},
)
DEFAULT = IMAGENET1K_V1
@@ -208,6 +210,7 @@ class ShuffleNet_V2_X0_5_Weights(WeightsEnum):
class ShuffleNet_V2_X1_0_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
+ # Weights ported from https://github.com/ericsun99/Shufflenet-v2-Pytorch
url="https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
@@ -217,6 +220,7 @@ class ShuffleNet_V2_X1_0_Weights(WeightsEnum):
"acc@1": 69.362,
"acc@5": 88.316,
},
+ "_docs": """These weights were trained from scratch to reproduce closely the results of the paper.""",
},
)
DEFAULT = IMAGENET1K_V1
@@ -234,6 +238,10 @@ class ShuffleNet_V2_X1_5_Weights(WeightsEnum):
"acc@1": 72.996,
"acc@5": 91.086,
},
+ "_docs": """
+ These weights were trained from scratch by using TorchVision's `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V1
@@ -251,6 +259,10 @@ class ShuffleNet_V2_X2_0_Weights(WeightsEnum):
"acc@1": 76.230,
"acc@5": 93.006,
},
+ "_docs": """
+ These weights were trained from scratch by using TorchVision's `new training recipe
+ `_.
+ """,
},
)
DEFAULT = IMAGENET1K_V1
diff --git a/torchvision/models/squeezenet.py b/torchvision/models/squeezenet.py
index dd474cbe7f7..a93a06fc80d 100644
--- a/torchvision/models/squeezenet.py
+++ b/torchvision/models/squeezenet.py
@@ -117,6 +117,7 @@ def _squeezenet(
_COMMON_META = {
"categories": _IMAGENET_CATEGORIES,
"recipe": "https://github.com/pytorch/vision/pull/49#issuecomment-277560717",
+ "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
}
diff --git a/torchvision/models/swin_transformer.py b/torchvision/models/swin_transformer.py
index 455397c8403..f60db5096a7 100644
--- a/torchvision/models/swin_transformer.py
+++ b/torchvision/models/swin_transformer.py
@@ -416,11 +416,12 @@ class Swin_T_Weights(WeightsEnum):
**_COMMON_META,
"num_params": 28288354,
"min_size": (224, 224),
- "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#swin_t",
+ "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#swintransformer",
"metrics": {
"acc@1": 81.358,
"acc@5": 95.526,
},
+ "_docs": """These weights reproduce closely the results of the paper using its training recipe.""",
},
)
DEFAULT = IMAGENET1K_V1
diff --git a/torchvision/models/vgg.py b/torchvision/models/vgg.py
index 6b82f90d58b..9fb77d9bf97 100644
--- a/torchvision/models/vgg.py
+++ b/torchvision/models/vgg.py
@@ -110,6 +110,7 @@ def _vgg(cfg: str, batch_norm: bool, weights: Optional[WeightsEnum], progress: b
"min_size": (32, 32),
"categories": _IMAGENET_CATEGORIES,
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#alexnet-and-vgg",
+ "_docs": """These weights were trained from scratch by using a simplified training recipe.""",
}
@@ -190,10 +191,8 @@ class VGG16_Weights(WeightsEnum):
},
},
)
- # We port the features of a VGG16 backbone trained by amdegroot because unlike the one on TorchVision, it uses the
- # same input standardization method as the paper. Only the `features` weights have proper values, those on the
- # `classifier` module are filled with nans.
IMAGENET1K_FEATURES = Weights(
+ # Weights ported from https://github.com/amdegroot/ssd.pytorch/
url="https://download.pytorch.org/models/vgg16_features-amdegroot-88682ab5.pth",
transforms=partial(
ImageClassification,
@@ -210,6 +209,11 @@ class VGG16_Weights(WeightsEnum):
"acc@1": float("nan"),
"acc@5": float("nan"),
},
+ "_docs": """
+ These weights can't be used for classification because they are missing values in the `classifier`
+ module. Only the `features` module has valid values and can be used for feature extraction. The weights
+ were trained using the original input standardization method as described in the paper.
+ """,
},
)
DEFAULT = IMAGENET1K_V1
diff --git a/torchvision/models/vision_transformer.py b/torchvision/models/vision_transformer.py
index e672721be65..dc54b8735f0 100644
--- a/torchvision/models/vision_transformer.py
+++ b/torchvision/models/vision_transformer.py
@@ -332,6 +332,10 @@ class ViT_B_16_Weights(WeightsEnum):
"acc@1": 81.072,
"acc@5": 95.318,
},
+ "_docs": """
+ These weights were trained from scratch by using a modified version of `DeIT
+ `_'s training recipe.
+ """,
},
)
IMAGENET1K_SWAG_E2E_V1 = Weights(
@@ -350,6 +354,10 @@ class ViT_B_16_Weights(WeightsEnum):
"acc@1": 85.304,
"acc@5": 97.650,
},
+ "_docs": """
+ These weights are learnt via transfer learning by end-to-end fine-tuning the original
+ `SWAG `_ weights on ImageNet-1K data.
+ """,
},
)
IMAGENET1K_SWAG_LINEAR_V1 = Weights(
@@ -369,6 +377,10 @@ class ViT_B_16_Weights(WeightsEnum):
"acc@1": 81.886,
"acc@5": 96.180,
},
+ "_docs": """
+ These weights are composed of the original frozen `SWAG `_ trunk
+ weights and a linear classifier learnt on top of them trained on ImageNet-1K data.
+ """,
},
)
DEFAULT = IMAGENET1K_V1
@@ -387,6 +399,10 @@ class ViT_B_32_Weights(WeightsEnum):
"acc@1": 75.912,
"acc@5": 92.466,
},
+ "_docs": """
+ These weights were trained from scratch by using a modified version of `DeIT
+ `_'s training recipe.
+ """,
},
)
DEFAULT = IMAGENET1K_V1
@@ -405,6 +421,11 @@ class ViT_L_16_Weights(WeightsEnum):
"acc@1": 79.662,
"acc@5": 94.638,
},
+ "_docs": """
+ These weights were trained from scratch by using a modified version of TorchVision's
+ `new training recipe
+ `_.
+ """,
},
)
IMAGENET1K_SWAG_E2E_V1 = Weights(
@@ -423,6 +444,10 @@ class ViT_L_16_Weights(WeightsEnum):
"acc@1": 88.064,
"acc@5": 98.512,
},
+ "_docs": """
+ These weights are learnt via transfer learning by end-to-end fine-tuning the original
+ `SWAG `_ weights on ImageNet-1K data.
+ """,
},
)
IMAGENET1K_SWAG_LINEAR_V1 = Weights(
@@ -442,6 +467,10 @@ class ViT_L_16_Weights(WeightsEnum):
"acc@1": 85.146,
"acc@5": 97.422,
},
+ "_docs": """
+ These weights are composed of the original frozen `SWAG `_ trunk
+ weights and a linear classifier learnt on top of them trained on ImageNet-1K data.
+ """,
},
)
DEFAULT = IMAGENET1K_V1
@@ -460,6 +489,10 @@ class ViT_L_32_Weights(WeightsEnum):
"acc@1": 76.972,
"acc@5": 93.07,
},
+ "_docs": """
+ These weights were trained from scratch by using a modified version of `DeIT
+ `_'s training recipe.
+ """,
},
)
DEFAULT = IMAGENET1K_V1
@@ -482,6 +515,10 @@ class ViT_H_14_Weights(WeightsEnum):
"acc@1": 88.552,
"acc@5": 98.694,
},
+ "_docs": """
+ These weights are learnt via transfer learning by end-to-end fine-tuning the original
+ `SWAG `_ weights on ImageNet-1K data.
+ """,
},
)
IMAGENET1K_SWAG_LINEAR_V1 = Weights(
@@ -501,6 +538,10 @@ class ViT_H_14_Weights(WeightsEnum):
"acc@1": 85.708,
"acc@5": 97.730,
},
+ "_docs": """
+ These weights are composed of the original frozen `SWAG `_ trunk
+ weights and a linear classifier learnt on top of them trained on ImageNet-1K data.
+ """,
},
)
DEFAULT = IMAGENET1K_SWAG_E2E_V1