diff --git a/docs/source/conf.py b/docs/source/conf.py
index bedef5a5215..db7b2ef14a2 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -335,8 +335,6 @@ def inject_weight_metadata(app, what, name, obj, options, lines):
 
         for field in obj:
             lines += [f"**{str(field)}**:", ""]
-            if field == obj.DEFAULT:
-                lines += [f"This weight is also available as ``{obj.__name__}.DEFAULT``.", ""]
 
             table = []
 
@@ -349,7 +347,12 @@ def inject_weight_metadata(app, what, name, obj, options, lines):
 
             custom_docs = meta_with_metrics.pop("_docs", None)  # Custom per-Weights docs
             if custom_docs is not None:
-                lines += [custom_docs, ""]
+                lines += [custom_docs]
+
+            if field == obj.DEFAULT:
+                lines += [f"Also available as ``{obj.__name__}.DEFAULT``."]
+
+            lines += [""]
 
             for k, v in meta_with_metrics.items():
                 if k in {"recipe", "license"}:
@@ -367,8 +370,8 @@ def inject_weight_metadata(app, what, name, obj, options, lines):
             lines += textwrap.indent(table, " " * 4).split("\n")
             lines.append("")
             lines.append(
-                f"The preprocessing/inference transforms are available at ``{str(field)}.transforms`` and "
-                f"perform the following operations: {field.transforms().describe()}"
+                f"The inference transforms are available at ``{str(field)}.transforms`` and "
+                f"perform the following preprocessing operations: {field.transforms().describe()}"
             )
             lines.append("")
 
diff --git a/test/test_extended_models.py b/test/test_extended_models.py
index a39ca62ca78..e3f79e28af4 100644
--- a/test/test_extended_models.py
+++ b/test/test_extended_models.py
@@ -96,7 +96,7 @@ def test_schema_meta_validation(model_fn):
     classification_fields = {"categories", ("metrics", "acc@1"), ("metrics", "acc@5")}
     defaults = {
         "all": {"metrics", "min_size", "num_params", "recipe"},
-        "models": classification_fields,
+        "models": classification_fields | {"_docs"},
         "detection": {"categories", ("metrics", "box_map")},
         "quantization": classification_fields | {"backend", "unquantized"},
         "segmentation": {"categories", ("metrics", "miou"), ("metrics", "pixel_acc")},
diff --git a/torchvision/models/alexnet.py b/torchvision/models/alexnet.py
index dff0bbad1a4..733928bb5e3 100644
--- a/torchvision/models/alexnet.py
+++ b/torchvision/models/alexnet.py
@@ -65,6 +65,9 @@ class AlexNet_Weights(WeightsEnum):
                 "acc@1": 56.522,
                 "acc@5": 79.066,
             },
+            "_docs": """
+                These weights reproduce closely the results of the paper using a simplified training recipe.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V1
diff --git a/torchvision/models/convnext.py b/torchvision/models/convnext.py
index f2c7ee1e03a..f9aa8d3b1c9 100644
--- a/torchvision/models/convnext.py
+++ b/torchvision/models/convnext.py
@@ -207,6 +207,11 @@ def _convnext(
     "min_size": (32, 32),
     "categories": _IMAGENET_CATEGORIES,
     "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#convnext",
+    "_docs": """
+        These weights improve upon the results of the original paper by using a modified version of TorchVision's
+        `new training recipe
+        <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+    """,
 }
 
 
diff --git a/torchvision/models/densenet.py b/torchvision/models/densenet.py
index 47447991f0e..fc354a0c437 100644
--- a/torchvision/models/densenet.py
+++ b/torchvision/models/densenet.py
@@ -261,6 +261,7 @@ def _densenet(
     "min_size": (29, 29),
     "categories": _IMAGENET_CATEGORIES,
     "recipe": "https://github.com/pytorch/vision/pull/116",
+    "_docs": """These weights are ported from LuaTorch.""",
 }
 
 
diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py
index be166140209..4b911dbfaba 100644
--- a/torchvision/models/efficientnet.py
+++ b/torchvision/models/efficientnet.py
@@ -431,24 +431,26 @@ def _efficientnet_conf(
 
 _COMMON_META: Dict[str, Any] = {
     "categories": _IMAGENET_CATEGORIES,
-    "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#efficientnet",
 }
 
 
 _COMMON_META_V1 = {
     **_COMMON_META,
     "min_size": (1, 1),
+    "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#efficientnet-v1",
 }
 
 
 _COMMON_META_V2 = {
     **_COMMON_META,
     "min_size": (33, 33),
+    "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#efficientnet-v2",
 }
 
 
 class EfficientNet_B0_Weights(WeightsEnum):
     IMAGENET1K_V1 = Weights(
+        # Weights ported from https://github.com/rwightman/pytorch-image-models/
         url="https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth",
         transforms=partial(
             ImageClassification, crop_size=224, resize_size=256, interpolation=InterpolationMode.BICUBIC
@@ -460,6 +462,7 @@ class EfficientNet_B0_Weights(WeightsEnum):
                 "acc@1": 77.692,
                 "acc@5": 93.532,
             },
+            "_docs": """These weights are ported from the original paper.""",
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -467,6 +470,7 @@ class EfficientNet_B0_Weights(WeightsEnum):
 
 class EfficientNet_B1_Weights(WeightsEnum):
     IMAGENET1K_V1 = Weights(
+        # Weights ported from https://github.com/rwightman/pytorch-image-models/
         url="https://download.pytorch.org/models/efficientnet_b1_rwightman-533bc792.pth",
         transforms=partial(
             ImageClassification, crop_size=240, resize_size=256, interpolation=InterpolationMode.BICUBIC
@@ -478,6 +482,7 @@ class EfficientNet_B1_Weights(WeightsEnum):
                 "acc@1": 78.642,
                 "acc@5": 94.186,
             },
+            "_docs": """These weights are ported from the original paper.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -493,6 +498,11 @@ class EfficientNet_B1_Weights(WeightsEnum):
                 "acc@1": 79.838,
                 "acc@5": 94.934,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using a modified version of TorchVision's
+                `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
@@ -500,6 +510,7 @@ class EfficientNet_B1_Weights(WeightsEnum):
 
 class EfficientNet_B2_Weights(WeightsEnum):
     IMAGENET1K_V1 = Weights(
+        # Weights ported from https://github.com/rwightman/pytorch-image-models/
         url="https://download.pytorch.org/models/efficientnet_b2_rwightman-bcdf34b7.pth",
         transforms=partial(
             ImageClassification, crop_size=288, resize_size=288, interpolation=InterpolationMode.BICUBIC
@@ -511,6 +522,7 @@ class EfficientNet_B2_Weights(WeightsEnum):
                 "acc@1": 80.608,
                 "acc@5": 95.310,
             },
+            "_docs": """These weights are ported from the original paper.""",
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -518,6 +530,7 @@ class EfficientNet_B2_Weights(WeightsEnum):
 
 class EfficientNet_B3_Weights(WeightsEnum):
     IMAGENET1K_V1 = Weights(
+        # Weights ported from https://github.com/rwightman/pytorch-image-models/
         url="https://download.pytorch.org/models/efficientnet_b3_rwightman-cf984f9c.pth",
         transforms=partial(
             ImageClassification, crop_size=300, resize_size=320, interpolation=InterpolationMode.BICUBIC
@@ -529,6 +542,7 @@ class EfficientNet_B3_Weights(WeightsEnum):
                 "acc@1": 82.008,
                 "acc@5": 96.054,
             },
+            "_docs": """These weights are ported from the original paper.""",
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -536,6 +550,7 @@ class EfficientNet_B3_Weights(WeightsEnum):
 
 class EfficientNet_B4_Weights(WeightsEnum):
     IMAGENET1K_V1 = Weights(
+        # Weights ported from https://github.com/rwightman/pytorch-image-models/
         url="https://download.pytorch.org/models/efficientnet_b4_rwightman-7eb33cd5.pth",
         transforms=partial(
             ImageClassification, crop_size=380, resize_size=384, interpolation=InterpolationMode.BICUBIC
@@ -547,6 +562,7 @@ class EfficientNet_B4_Weights(WeightsEnum):
                 "acc@1": 83.384,
                 "acc@5": 96.594,
             },
+            "_docs": """These weights are ported from the original paper.""",
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -554,6 +570,7 @@ class EfficientNet_B4_Weights(WeightsEnum):
 
 class EfficientNet_B5_Weights(WeightsEnum):
     IMAGENET1K_V1 = Weights(
+        # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/
         url="https://download.pytorch.org/models/efficientnet_b5_lukemelas-b6417697.pth",
         transforms=partial(
             ImageClassification, crop_size=456, resize_size=456, interpolation=InterpolationMode.BICUBIC
@@ -565,6 +582,7 @@ class EfficientNet_B5_Weights(WeightsEnum):
                 "acc@1": 83.444,
                 "acc@5": 96.628,
             },
+            "_docs": """These weights are ported from the original paper.""",
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -572,6 +590,7 @@ class EfficientNet_B5_Weights(WeightsEnum):
 
 class EfficientNet_B6_Weights(WeightsEnum):
     IMAGENET1K_V1 = Weights(
+        # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/
         url="https://download.pytorch.org/models/efficientnet_b6_lukemelas-c76e70fd.pth",
         transforms=partial(
             ImageClassification, crop_size=528, resize_size=528, interpolation=InterpolationMode.BICUBIC
@@ -583,6 +602,7 @@ class EfficientNet_B6_Weights(WeightsEnum):
                 "acc@1": 84.008,
                 "acc@5": 96.916,
             },
+            "_docs": """These weights are ported from the original paper.""",
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -590,6 +610,7 @@ class EfficientNet_B6_Weights(WeightsEnum):
 
 class EfficientNet_B7_Weights(WeightsEnum):
     IMAGENET1K_V1 = Weights(
+        # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/
         url="https://download.pytorch.org/models/efficientnet_b7_lukemelas-dcc49843.pth",
         transforms=partial(
             ImageClassification, crop_size=600, resize_size=600, interpolation=InterpolationMode.BICUBIC
@@ -601,6 +622,7 @@ class EfficientNet_B7_Weights(WeightsEnum):
                 "acc@1": 84.122,
                 "acc@5": 96.908,
             },
+            "_docs": """These weights are ported from the original paper.""",
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -622,6 +644,11 @@ class EfficientNet_V2_S_Weights(WeightsEnum):
                 "acc@1": 84.228,
                 "acc@5": 96.878,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using a modified version of TorchVision's
+                `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -643,12 +670,18 @@ class EfficientNet_V2_M_Weights(WeightsEnum):
                 "acc@1": 85.112,
                 "acc@5": 97.156,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using a modified version of TorchVision's
+                `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V1
 
 
 class EfficientNet_V2_L_Weights(WeightsEnum):
+    # Weights ported from https://github.com/google/automl/tree/master/efficientnetv2
     IMAGENET1K_V1 = Weights(
         url="https://download.pytorch.org/models/efficientnet_v2_l-59c71312.pth",
         transforms=partial(
@@ -666,6 +699,7 @@ class EfficientNet_V2_L_Weights(WeightsEnum):
                 "acc@1": 85.808,
                 "acc@5": 97.788,
             },
+            "_docs": """These weights are ported from the original paper.""",
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -1036,13 +1070,11 @@ def efficientnet_v2_l(
 
 model_urls = _ModelURLs(
     {
-        # Weights ported from https://github.com/rwightman/pytorch-image-models/
         "efficientnet_b0": EfficientNet_B0_Weights.IMAGENET1K_V1.url,
         "efficientnet_b1": EfficientNet_B1_Weights.IMAGENET1K_V1.url,
         "efficientnet_b2": EfficientNet_B2_Weights.IMAGENET1K_V1.url,
         "efficientnet_b3": EfficientNet_B3_Weights.IMAGENET1K_V1.url,
         "efficientnet_b4": EfficientNet_B4_Weights.IMAGENET1K_V1.url,
-        # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/
         "efficientnet_b5": EfficientNet_B5_Weights.IMAGENET1K_V1.url,
         "efficientnet_b6": EfficientNet_B6_Weights.IMAGENET1K_V1.url,
         "efficientnet_b7": EfficientNet_B7_Weights.IMAGENET1K_V1.url,
diff --git a/torchvision/models/googlenet.py b/torchvision/models/googlenet.py
index 755740abc11..94923dd2a48 100644
--- a/torchvision/models/googlenet.py
+++ b/torchvision/models/googlenet.py
@@ -288,6 +288,7 @@ class GoogLeNet_Weights(WeightsEnum):
                 "acc@1": 69.778,
                 "acc@5": 89.530,
             },
+            "_docs": """These weights are ported from the original paper.""",
         },
     )
     DEFAULT = IMAGENET1K_V1
diff --git a/torchvision/models/inception.py b/torchvision/models/inception.py
index 0abd195742c..b034d3aa79a 100644
--- a/torchvision/models/inception.py
+++ b/torchvision/models/inception.py
@@ -420,6 +420,7 @@ class Inception_V3_Weights(WeightsEnum):
                 "acc@1": 77.294,
                 "acc@5": 93.450,
             },
+            "_docs": """These weights are ported from the original paper.""",
         },
     )
     DEFAULT = IMAGENET1K_V1
diff --git a/torchvision/models/mnasnet.py b/torchvision/models/mnasnet.py
index b1da02f4697..eb26a7ea5c0 100644
--- a/torchvision/models/mnasnet.py
+++ b/torchvision/models/mnasnet.py
@@ -229,6 +229,7 @@ class MNASNet0_5_Weights(WeightsEnum):
                 "acc@1": 67.734,
                 "acc@5": 87.490,
             },
+            "_docs": """These weights reproduce closely the results of the paper.""",
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -246,6 +247,10 @@ class MNASNet0_75_Weights(WeightsEnum):
                 "acc@1": 71.180,
                 "acc@5": 90.496,
             },
+            "_docs": """
+                These weights were trained from scratch by using TorchVision's `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -262,6 +267,7 @@ class MNASNet1_0_Weights(WeightsEnum):
                 "acc@1": 73.456,
                 "acc@5": 91.510,
             },
+            "_docs": """These weights reproduce closely the results of the paper.""",
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -279,6 +285,10 @@ class MNASNet1_3_Weights(WeightsEnum):
                 "acc@1": 76.506,
                 "acc@5": 93.522,
             },
+            "_docs": """
+                These weights were trained from scratch by using TorchVision's `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V1
diff --git a/torchvision/models/mobilenetv2.py b/torchvision/models/mobilenetv2.py
index d63ecb01f38..b27d305dc82 100644
--- a/torchvision/models/mobilenetv2.py
+++ b/torchvision/models/mobilenetv2.py
@@ -212,6 +212,7 @@ class MobileNet_V2_Weights(WeightsEnum):
                 "acc@1": 71.878,
                 "acc@5": 90.286,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -224,6 +225,11 @@ class MobileNet_V2_Weights(WeightsEnum):
                 "acc@1": 72.154,
                 "acc@5": 90.822,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using a modified version of TorchVision's
+                `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
diff --git a/torchvision/models/mobilenetv3.py b/torchvision/models/mobilenetv3.py
index 787526cc1b3..465d4e58ac8 100644
--- a/torchvision/models/mobilenetv3.py
+++ b/torchvision/models/mobilenetv3.py
@@ -321,6 +321,7 @@ class MobileNet_V3_Large_Weights(WeightsEnum):
                 "acc@1": 74.042,
                 "acc@5": 91.340,
             },
+            "_docs": """These weights were trained from scratch by using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -334,6 +335,11 @@ class MobileNet_V3_Large_Weights(WeightsEnum):
                 "acc@1": 75.274,
                 "acc@5": 92.566,
             },
+            "_docs": """
+                These weights improve marginally upon the results of the original paper by using a modified version of
+                TorchVision's `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
@@ -351,6 +357,9 @@ class MobileNet_V3_Small_Weights(WeightsEnum):
                 "acc@1": 67.668,
                 "acc@5": 87.402,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using a simple training recipe.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V1
diff --git a/torchvision/models/regnet.py b/torchvision/models/regnet.py
index 2376d13ffe8..be80c9acf35 100644
--- a/torchvision/models/regnet.py
+++ b/torchvision/models/regnet.py
@@ -426,6 +426,7 @@ class RegNet_Y_400MF_Weights(WeightsEnum):
                 "acc@1": 74.046,
                 "acc@5": 91.716,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -439,6 +440,11 @@ class RegNet_Y_400MF_Weights(WeightsEnum):
                 "acc@1": 75.804,
                 "acc@5": 92.742,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using a modified version of TorchVision's
+                `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
@@ -456,6 +462,7 @@ class RegNet_Y_800MF_Weights(WeightsEnum):
                 "acc@1": 76.420,
                 "acc@5": 93.136,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -469,6 +476,11 @@ class RegNet_Y_800MF_Weights(WeightsEnum):
                 "acc@1": 78.828,
                 "acc@5": 94.502,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using a modified version of TorchVision's
+                `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
@@ -486,6 +498,7 @@ class RegNet_Y_1_6GF_Weights(WeightsEnum):
                 "acc@1": 77.950,
                 "acc@5": 93.966,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -499,6 +512,11 @@ class RegNet_Y_1_6GF_Weights(WeightsEnum):
                 "acc@1": 80.876,
                 "acc@5": 95.444,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using a modified version of TorchVision's
+                `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
@@ -516,6 +534,7 @@ class RegNet_Y_3_2GF_Weights(WeightsEnum):
                 "acc@1": 78.948,
                 "acc@5": 94.576,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -529,6 +548,11 @@ class RegNet_Y_3_2GF_Weights(WeightsEnum):
                 "acc@1": 81.982,
                 "acc@5": 95.972,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using a modified version of TorchVision's
+                `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
@@ -546,6 +570,7 @@ class RegNet_Y_8GF_Weights(WeightsEnum):
                 "acc@1": 80.032,
                 "acc@5": 95.048,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -559,6 +584,11 @@ class RegNet_Y_8GF_Weights(WeightsEnum):
                 "acc@1": 82.828,
                 "acc@5": 96.330,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using a modified version of TorchVision's
+                `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
@@ -576,6 +606,7 @@ class RegNet_Y_16GF_Weights(WeightsEnum):
                 "acc@1": 80.424,
                 "acc@5": 95.240,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -589,6 +620,11 @@ class RegNet_Y_16GF_Weights(WeightsEnum):
                 "acc@1": 82.886,
                 "acc@5": 96.328,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using a modified version of TorchVision's
+                `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     IMAGENET1K_SWAG_E2E_V1 = Weights(
@@ -603,6 +639,10 @@ class RegNet_Y_16GF_Weights(WeightsEnum):
                 "acc@1": 86.012,
                 "acc@5": 98.054,
             },
+            "_docs": """
+                These weights are learnt via transfer learning by end-to-end fine-tuning the original
+                `SWAG <https://arxiv.org/abs/2201.08371>`_ weights on ImageNet-1K data.
+            """,
         },
     )
     IMAGENET1K_SWAG_LINEAR_V1 = Weights(
@@ -618,6 +658,10 @@ class RegNet_Y_16GF_Weights(WeightsEnum):
                 "acc@1": 83.976,
                 "acc@5": 97.244,
             },
+            "_docs": """
+                These weights are composed of the original frozen `SWAG <https://arxiv.org/abs/2201.08371>`_ trunk
+                weights and a linear classifier learnt on top of them trained on ImageNet-1K data.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
@@ -635,6 +679,7 @@ class RegNet_Y_32GF_Weights(WeightsEnum):
                 "acc@1": 80.878,
                 "acc@5": 95.340,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -648,6 +693,11 @@ class RegNet_Y_32GF_Weights(WeightsEnum):
                 "acc@1": 83.368,
                 "acc@5": 96.498,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using a modified version of TorchVision's
+                `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     IMAGENET1K_SWAG_E2E_V1 = Weights(
@@ -662,6 +712,10 @@ class RegNet_Y_32GF_Weights(WeightsEnum):
                 "acc@1": 86.838,
                 "acc@5": 98.362,
             },
+            "_docs": """
+                These weights are learnt via transfer learning by end-to-end fine-tuning the original
+                `SWAG <https://arxiv.org/abs/2201.08371>`_ weights on ImageNet-1K data.
+            """,
         },
     )
     IMAGENET1K_SWAG_LINEAR_V1 = Weights(
@@ -677,6 +731,10 @@ class RegNet_Y_32GF_Weights(WeightsEnum):
                 "acc@1": 84.622,
                 "acc@5": 97.480,
             },
+            "_docs": """
+                These weights are composed of the original frozen `SWAG <https://arxiv.org/abs/2201.08371>`_ trunk
+                weights and a linear classifier learnt on top of them trained on ImageNet-1K data.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
@@ -695,6 +753,10 @@ class RegNet_Y_128GF_Weights(WeightsEnum):
                 "acc@1": 88.228,
                 "acc@5": 98.682,
             },
+            "_docs": """
+                These weights are learnt via transfer learning by end-to-end fine-tuning the original
+                `SWAG <https://arxiv.org/abs/2201.08371>`_ weights on ImageNet-1K data.
+            """,
         },
     )
     IMAGENET1K_SWAG_LINEAR_V1 = Weights(
@@ -710,6 +772,10 @@ class RegNet_Y_128GF_Weights(WeightsEnum):
                 "acc@1": 86.068,
                 "acc@5": 97.844,
             },
+            "_docs": """
+                These weights are composed of the original frozen `SWAG <https://arxiv.org/abs/2201.08371>`_ trunk
+                weights and a linear classifier learnt on top of them trained on ImageNet-1K data.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_SWAG_E2E_V1
@@ -727,6 +793,7 @@ class RegNet_X_400MF_Weights(WeightsEnum):
                 "acc@1": 72.834,
                 "acc@5": 90.950,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -740,6 +807,11 @@ class RegNet_X_400MF_Weights(WeightsEnum):
                 "acc@1": 74.864,
                 "acc@5": 92.322,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using a modified version of TorchVision's
+                `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
@@ -757,6 +829,7 @@ class RegNet_X_800MF_Weights(WeightsEnum):
                 "acc@1": 75.212,
                 "acc@5": 92.348,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -770,6 +843,11 @@ class RegNet_X_800MF_Weights(WeightsEnum):
                 "acc@1": 77.522,
                 "acc@5": 93.826,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using a modified version of TorchVision's
+                `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
@@ -787,6 +865,7 @@ class RegNet_X_1_6GF_Weights(WeightsEnum):
                 "acc@1": 77.040,
                 "acc@5": 93.440,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -800,6 +879,11 @@ class RegNet_X_1_6GF_Weights(WeightsEnum):
                 "acc@1": 79.668,
                 "acc@5": 94.922,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using a modified version of TorchVision's
+                `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
@@ -817,6 +901,7 @@ class RegNet_X_3_2GF_Weights(WeightsEnum):
                 "acc@1": 78.364,
                 "acc@5": 93.992,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -830,6 +915,11 @@ class RegNet_X_3_2GF_Weights(WeightsEnum):
                 "acc@1": 81.196,
                 "acc@5": 95.430,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using a modified version of TorchVision's
+                `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
@@ -847,6 +937,7 @@ class RegNet_X_8GF_Weights(WeightsEnum):
                 "acc@1": 79.344,
                 "acc@5": 94.686,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -860,6 +951,11 @@ class RegNet_X_8GF_Weights(WeightsEnum):
                 "acc@1": 81.682,
                 "acc@5": 95.678,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using a modified version of TorchVision's
+                `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
@@ -877,6 +973,7 @@ class RegNet_X_16GF_Weights(WeightsEnum):
                 "acc@1": 80.058,
                 "acc@5": 94.944,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -890,6 +987,11 @@ class RegNet_X_16GF_Weights(WeightsEnum):
                 "acc@1": 82.716,
                 "acc@5": 96.196,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using a modified version of TorchVision's
+                `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
@@ -907,6 +1009,7 @@ class RegNet_X_32GF_Weights(WeightsEnum):
                 "acc@1": 80.622,
                 "acc@5": 95.248,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -920,6 +1023,11 @@ class RegNet_X_32GF_Weights(WeightsEnum):
                 "acc@1": 83.014,
                 "acc@5": 96.288,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using a modified version of TorchVision's
+                `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
diff --git a/torchvision/models/resnet.py b/torchvision/models/resnet.py
index e8ed33d5080..b46c07ef600 100644
--- a/torchvision/models/resnet.py
+++ b/torchvision/models/resnet.py
@@ -321,6 +321,7 @@ class ResNet18_Weights(WeightsEnum):
                 "acc@1": 69.758,
                 "acc@5": 89.078,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -338,6 +339,7 @@ class ResNet34_Weights(WeightsEnum):
                 "acc@1": 73.314,
                 "acc@5": 91.420,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -355,9 +357,7 @@ class ResNet50_Weights(WeightsEnum):
                 "acc@1": 76.130,
                 "acc@5": 92.862,
             },
-            "_docs": """
-            These are standard weights using the basic recipe of the paper.
-            """,
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -372,8 +372,8 @@ class ResNet50_Weights(WeightsEnum):
                 "acc@5": 95.434,
             },
             "_docs": """
-            These are improved weights, using TorchVision's `new recipe
-            <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+                These weights improve upon the results of the original paper by using TorchVision's `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
             """,
         },
     )
@@ -392,6 +392,7 @@ class ResNet101_Weights(WeightsEnum):
                 "acc@1": 77.374,
                 "acc@5": 93.546,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -405,6 +406,10 @@ class ResNet101_Weights(WeightsEnum):
                 "acc@1": 81.886,
                 "acc@5": 95.780,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using TorchVision's `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
@@ -422,6 +427,7 @@ class ResNet152_Weights(WeightsEnum):
                 "acc@1": 78.312,
                 "acc@5": 94.046,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -435,6 +441,10 @@ class ResNet152_Weights(WeightsEnum):
                 "acc@1": 82.284,
                 "acc@5": 96.002,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using TorchVision's `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
@@ -452,6 +462,7 @@ class ResNeXt50_32X4D_Weights(WeightsEnum):
                 "acc@1": 77.618,
                 "acc@5": 93.698,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -465,6 +476,10 @@ class ResNeXt50_32X4D_Weights(WeightsEnum):
                 "acc@1": 81.198,
                 "acc@5": 95.340,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using TorchVision's `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
@@ -482,6 +497,7 @@ class ResNeXt101_32X8D_Weights(WeightsEnum):
                 "acc@1": 79.312,
                 "acc@5": 94.526,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -495,6 +511,10 @@ class ResNeXt101_32X8D_Weights(WeightsEnum):
                 "acc@1": 82.834,
                 "acc@5": 96.228,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using TorchVision's `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
@@ -509,10 +529,13 @@ class ResNeXt101_64X4D_Weights(WeightsEnum):
             "num_params": 83455272,
             "recipe": "https://github.com/pytorch/vision/pull/5935",
             "metrics": {
-                # Mock
                 "acc@1": 83.246,
                 "acc@5": 96.454,
             },
+            "_docs": """
+                These weights were trained from scratch by using TorchVision's `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -530,6 +553,7 @@ class Wide_ResNet50_2_Weights(WeightsEnum):
                 "acc@1": 78.468,
                 "acc@5": 94.086,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -543,6 +567,10 @@ class Wide_ResNet50_2_Weights(WeightsEnum):
                 "acc@1": 81.602,
                 "acc@5": 95.758,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using TorchVision's `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
@@ -560,6 +588,7 @@ class Wide_ResNet101_2_Weights(WeightsEnum):
                 "acc@1": 78.848,
                 "acc@5": 94.284,
             },
+            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
         },
     )
     IMAGENET1K_V2 = Weights(
@@ -573,6 +602,10 @@ class Wide_ResNet101_2_Weights(WeightsEnum):
                 "acc@1": 82.510,
                 "acc@5": 96.020,
             },
+            "_docs": """
+                These weights improve upon the results of the original paper by using TorchVision's `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V2
diff --git a/torchvision/models/shufflenetv2.py b/torchvision/models/shufflenetv2.py
index 3a314c867ca..4e163573655 100644
--- a/torchvision/models/shufflenetv2.py
+++ b/torchvision/models/shufflenetv2.py
@@ -186,12 +186,13 @@ def _shufflenetv2(
 _COMMON_META = {
     "min_size": (1, 1),
     "categories": _IMAGENET_CATEGORIES,
-    "recipe": "https://github.com/barrh/Shufflenet-v2-Pytorch/tree/v0.1.0",
+    "recipe": "https://github.com/ericsun99/Shufflenet-v2-Pytorch",
 }
 
 
 class ShuffleNet_V2_X0_5_Weights(WeightsEnum):
     IMAGENET1K_V1 = Weights(
+        # Weights ported from https://github.com/ericsun99/Shufflenet-v2-Pytorch
         url="https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth",
         transforms=partial(ImageClassification, crop_size=224),
         meta={
@@ -201,6 +202,7 @@ class ShuffleNet_V2_X0_5_Weights(WeightsEnum):
                 "acc@1": 60.552,
                 "acc@5": 81.746,
             },
+            "_docs": """These weights were trained from scratch to reproduce closely the results of the paper.""",
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -208,6 +210,7 @@ class ShuffleNet_V2_X0_5_Weights(WeightsEnum):
 
 class ShuffleNet_V2_X1_0_Weights(WeightsEnum):
     IMAGENET1K_V1 = Weights(
+        # Weights ported from https://github.com/ericsun99/Shufflenet-v2-Pytorch
         url="https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth",
         transforms=partial(ImageClassification, crop_size=224),
         meta={
@@ -217,6 +220,7 @@ class ShuffleNet_V2_X1_0_Weights(WeightsEnum):
                 "acc@1": 69.362,
                 "acc@5": 88.316,
             },
+            "_docs": """These weights were trained from scratch to reproduce closely the results of the paper.""",
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -234,6 +238,10 @@ class ShuffleNet_V2_X1_5_Weights(WeightsEnum):
                 "acc@1": 72.996,
                 "acc@5": 91.086,
             },
+            "_docs": """
+                These weights were trained from scratch by using TorchVision's `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -251,6 +259,10 @@ class ShuffleNet_V2_X2_0_Weights(WeightsEnum):
                 "acc@1": 76.230,
                 "acc@5": 93.006,
             },
+            "_docs": """
+                These weights were trained from scratch by using TorchVision's `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V1
diff --git a/torchvision/models/squeezenet.py b/torchvision/models/squeezenet.py
index dd474cbe7f7..a93a06fc80d 100644
--- a/torchvision/models/squeezenet.py
+++ b/torchvision/models/squeezenet.py
@@ -117,6 +117,7 @@ def _squeezenet(
 _COMMON_META = {
     "categories": _IMAGENET_CATEGORIES,
     "recipe": "https://github.com/pytorch/vision/pull/49#issuecomment-277560717",
+    "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
 }
 
 
diff --git a/torchvision/models/swin_transformer.py b/torchvision/models/swin_transformer.py
index 455397c8403..f60db5096a7 100644
--- a/torchvision/models/swin_transformer.py
+++ b/torchvision/models/swin_transformer.py
@@ -416,11 +416,12 @@ class Swin_T_Weights(WeightsEnum):
             **_COMMON_META,
             "num_params": 28288354,
             "min_size": (224, 224),
-            "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#swin_t",
+            "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#swintransformer",
             "metrics": {
                 "acc@1": 81.358,
                 "acc@5": 95.526,
             },
+            "_docs": """These weights reproduce closely the results of the paper using its training recipe.""",
         },
     )
     DEFAULT = IMAGENET1K_V1
diff --git a/torchvision/models/vgg.py b/torchvision/models/vgg.py
index 6b82f90d58b..9fb77d9bf97 100644
--- a/torchvision/models/vgg.py
+++ b/torchvision/models/vgg.py
@@ -110,6 +110,7 @@ def _vgg(cfg: str, batch_norm: bool, weights: Optional[WeightsEnum], progress: b
     "min_size": (32, 32),
     "categories": _IMAGENET_CATEGORIES,
     "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#alexnet-and-vgg",
+    "_docs": """These weights were trained from scratch by using a simplified training recipe.""",
 }
 
 
@@ -190,10 +191,8 @@ class VGG16_Weights(WeightsEnum):
             },
         },
     )
-    # We port the features of a VGG16 backbone trained by amdegroot because unlike the one on TorchVision, it uses the
-    # same input standardization method as the paper. Only the `features` weights have proper values, those on the
-    # `classifier` module are filled with nans.
     IMAGENET1K_FEATURES = Weights(
+        # Weights ported from https://github.com/amdegroot/ssd.pytorch/
         url="https://download.pytorch.org/models/vgg16_features-amdegroot-88682ab5.pth",
         transforms=partial(
             ImageClassification,
@@ -210,6 +209,11 @@ class VGG16_Weights(WeightsEnum):
                 "acc@1": float("nan"),
                 "acc@5": float("nan"),
             },
+            "_docs": """
+                These weights can't be used for classification because they are missing values in the `classifier`
+                module. Only the `features` module has valid values and can be used for feature extraction. The weights
+                were trained using the original input standardization method as described in the paper.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V1
diff --git a/torchvision/models/vision_transformer.py b/torchvision/models/vision_transformer.py
index e672721be65..dc54b8735f0 100644
--- a/torchvision/models/vision_transformer.py
+++ b/torchvision/models/vision_transformer.py
@@ -332,6 +332,10 @@ class ViT_B_16_Weights(WeightsEnum):
                 "acc@1": 81.072,
                 "acc@5": 95.318,
             },
+            "_docs": """
+                These weights were trained from scratch by using a modified version of `DeIT
+                <https://arxiv.org/abs/2012.12877>`_'s training recipe.
+            """,
         },
     )
     IMAGENET1K_SWAG_E2E_V1 = Weights(
@@ -350,6 +354,10 @@ class ViT_B_16_Weights(WeightsEnum):
                 "acc@1": 85.304,
                 "acc@5": 97.650,
             },
+            "_docs": """
+                These weights are learnt via transfer learning by end-to-end fine-tuning the original
+                `SWAG <https://arxiv.org/abs/2201.08371>`_ weights on ImageNet-1K data.
+            """,
         },
     )
     IMAGENET1K_SWAG_LINEAR_V1 = Weights(
@@ -369,6 +377,10 @@ class ViT_B_16_Weights(WeightsEnum):
                 "acc@1": 81.886,
                 "acc@5": 96.180,
             },
+            "_docs": """
+                These weights are composed of the original frozen `SWAG <https://arxiv.org/abs/2201.08371>`_ trunk
+                weights and a linear classifier learnt on top of them trained on ImageNet-1K data.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -387,6 +399,10 @@ class ViT_B_32_Weights(WeightsEnum):
                 "acc@1": 75.912,
                 "acc@5": 92.466,
             },
+            "_docs": """
+                These weights were trained from scratch by using a modified version of `DeIT
+                <https://arxiv.org/abs/2012.12877>`_'s training recipe.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -405,6 +421,11 @@ class ViT_L_16_Weights(WeightsEnum):
                 "acc@1": 79.662,
                 "acc@5": 94.638,
             },
+            "_docs": """
+                These weights were trained from scratch by using a modified version of TorchVision's
+                `new training recipe
+                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
+            """,
         },
     )
     IMAGENET1K_SWAG_E2E_V1 = Weights(
@@ -423,6 +444,10 @@ class ViT_L_16_Weights(WeightsEnum):
                 "acc@1": 88.064,
                 "acc@5": 98.512,
             },
+            "_docs": """
+                These weights are learnt via transfer learning by end-to-end fine-tuning the original
+                `SWAG <https://arxiv.org/abs/2201.08371>`_ weights on ImageNet-1K data.
+            """,
         },
     )
     IMAGENET1K_SWAG_LINEAR_V1 = Weights(
@@ -442,6 +467,10 @@ class ViT_L_16_Weights(WeightsEnum):
                 "acc@1": 85.146,
                 "acc@5": 97.422,
             },
+            "_docs": """
+                These weights are composed of the original frozen `SWAG <https://arxiv.org/abs/2201.08371>`_ trunk
+                weights and a linear classifier learnt on top of them trained on ImageNet-1K data.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -460,6 +489,10 @@ class ViT_L_32_Weights(WeightsEnum):
                 "acc@1": 76.972,
                 "acc@5": 93.07,
             },
+            "_docs": """
+                These weights were trained from scratch by using a modified version of `DeIT
+                <https://arxiv.org/abs/2012.12877>`_'s training recipe.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_V1
@@ -482,6 +515,10 @@ class ViT_H_14_Weights(WeightsEnum):
                 "acc@1": 88.552,
                 "acc@5": 98.694,
             },
+            "_docs": """
+                These weights are learnt via transfer learning by end-to-end fine-tuning the original
+                `SWAG <https://arxiv.org/abs/2201.08371>`_ weights on ImageNet-1K data.
+            """,
         },
     )
     IMAGENET1K_SWAG_LINEAR_V1 = Weights(
@@ -501,6 +538,10 @@ class ViT_H_14_Weights(WeightsEnum):
                 "acc@1": 85.708,
                 "acc@5": 97.730,
             },
+            "_docs": """
+                These weights are composed of the original frozen `SWAG <https://arxiv.org/abs/2201.08371>`_ trunk
+                weights and a linear classifier learnt on top of them trained on ImageNet-1K data.
+            """,
         },
     )
     DEFAULT = IMAGENET1K_SWAG_E2E_V1