Skip to content

Document all remaining pre-trained weights #6039

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,9 +345,7 @@ def inject_weight_metadata(app, what, name, obj, options, lines):
metrics = meta.pop("metrics", {})
meta_with_metrics = dict(meta, **metrics)

custom_docs = meta_with_metrics.pop("_docs", None) # Custom per-Weights docs
if custom_docs is not None:
lines += [custom_docs]
lines += [meta_with_metrics.pop("_docs")]

if field == obj.DEFAULT:
lines += [f"Also available as ``{obj.__name__}.DEFAULT``."]
Expand Down
4 changes: 2 additions & 2 deletions test/test_extended_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ def test_schema_meta_validation(model_fn):
# mandatory fields for each computer vision task
classification_fields = {"categories", ("metrics", "acc@1"), ("metrics", "acc@5")}
defaults = {
"all": {"metrics", "min_size", "num_params", "recipe"},
"models": classification_fields | {"_docs"},
"all": {"metrics", "min_size", "num_params", "recipe", "_docs"},
"models": classification_fields,
"detection": {"categories", ("metrics", "box_map")},
"quantization": classification_fields | {"backend", "unquantized"},
"segmentation": {"categories", ("metrics", "miou"), ("metrics", "pixel_acc")},
Expand Down
6 changes: 5 additions & 1 deletion torchvision/models/detection/faster_rcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,7 @@ class FasterRCNN_ResNet50_FPN_Weights(WeightsEnum):
"metrics": {
"box_map": 37.0,
},
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
},
)
DEFAULT = COCO_V1
Expand All @@ -402,6 +403,7 @@ class FasterRCNN_ResNet50_FPN_V2_Weights(WeightsEnum):
"metrics": {
"box_map": 46.7,
},
"_docs": """These weights were produced using an enhanced training recipe to boost the model accuracy.""",
},
)
DEFAULT = COCO_V1
Expand All @@ -418,6 +420,7 @@ class FasterRCNN_MobileNet_V3_Large_FPN_Weights(WeightsEnum):
"metrics": {
"box_map": 32.8,
},
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
},
)
DEFAULT = COCO_V1
Expand All @@ -434,6 +437,7 @@ class FasterRCNN_MobileNet_V3_Large_320_FPN_Weights(WeightsEnum):
"metrics": {
"box_map": 22.8,
},
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
},
)
DEFAULT = COCO_V1
Expand All @@ -454,7 +458,7 @@ def fasterrcnn_resnet50_fpn(
) -> FasterRCNN:
"""
Faster R-CNN model with a ResNet-50-FPN backbone from the `Faster R-CNN: Towards Real-Time Object
Detection with Region Proposal Networks <https://arxiv.org/abs/1703.06870>`__
Detection with Region Proposal Networks <https://arxiv.org/abs/1506.01497>`__
paper.

The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
Expand Down
1 change: 1 addition & 0 deletions torchvision/models/detection/fcos.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,7 @@ class FCOS_ResNet50_FPN_Weights(WeightsEnum):
"metrics": {
"box_map": 39.2,
},
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
},
)
DEFAULT = COCO_V1
Expand Down
5 changes: 5 additions & 0 deletions torchvision/models/detection/keypoint_rcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,10 @@ class KeypointRCNN_ResNet50_FPN_Weights(WeightsEnum):
"box_map": 50.6,
"kp_map": 61.1,
},
"_docs": """
These weights were produced by following a similar training recipe as on the paper but use a checkpoint
from an early epoch.
""",
},
)
COCO_V1 = Weights(
Expand All @@ -339,6 +343,7 @@ class KeypointRCNN_ResNet50_FPN_Weights(WeightsEnum):
"box_map": 54.6,
"kp_map": 65.0,
},
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
},
)
DEFAULT = COCO_V1
Expand Down
2 changes: 2 additions & 0 deletions torchvision/models/detection/mask_rcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ class MaskRCNN_ResNet50_FPN_Weights(WeightsEnum):
"box_map": 37.9,
"mask_map": 34.6,
},
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
},
)
DEFAULT = COCO_V1
Expand All @@ -385,6 +386,7 @@ class MaskRCNN_ResNet50_FPN_V2_Weights(WeightsEnum):
"box_map": 47.4,
"mask_map": 41.8,
},
"_docs": """These weights were produced using an enhanced training recipe to boost the model accuracy.""",
},
)
DEFAULT = COCO_V1
Expand Down
2 changes: 2 additions & 0 deletions torchvision/models/detection/retinanet.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,7 @@ class RetinaNet_ResNet50_FPN_Weights(WeightsEnum):
"metrics": {
"box_map": 36.4,
},
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
},
)
DEFAULT = COCO_V1
Expand All @@ -706,6 +707,7 @@ class RetinaNet_ResNet50_FPN_V2_Weights(WeightsEnum):
"metrics": {
"box_map": 41.5,
},
"_docs": """These weights were produced using an enhanced training recipe to boost the model accuracy.""",
},
)
DEFAULT = COCO_V1
Expand Down
1 change: 1 addition & 0 deletions torchvision/models/detection/ssd.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class SSD300_VGG16_Weights(WeightsEnum):
"metrics": {
"box_map": 25.1,
},
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
},
)
DEFAULT = COCO_V1
Expand Down
1 change: 1 addition & 0 deletions torchvision/models/detection/ssdlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ class SSDLite320_MobileNet_V3_Large_Weights(WeightsEnum):
"metrics": {
"box_map": 21.3,
},
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
},
)
DEFAULT = COCO_V1
Expand Down
35 changes: 22 additions & 13 deletions torchvision/models/optical_flow/raft.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,7 @@ def forward(self, image1, image2, num_flow_updates: int = 12):

class Raft_Large_Weights(WeightsEnum):
C_T_V1 = Weights(
# Chairs + Things, ported from original paper repo (raft-things.pth)
# Weights ported from https://github.com/princeton-vl/RAFT
url="https://download.pytorch.org/models/raft_large_C_T_V1-22a6c225.pth",
transforms=OpticalFlow,
meta={
Expand All @@ -531,11 +531,11 @@ class Raft_Large_Weights(WeightsEnum):
"kitti_train_per_image_epe": 5.0172,
"kitti_train_fl_all": 17.4506,
},
"_docs": """These weights were ported from the original paper. They are trained on Chairs + Things.""",
},
)

C_T_V2 = Weights(
# Chairs + Things
url="https://download.pytorch.org/models/raft_large_C_T_V2-1bb1363a.pth",
transforms=OpticalFlow,
meta={
Expand All @@ -548,11 +548,12 @@ class Raft_Large_Weights(WeightsEnum):
"kitti_train_per_image_epe": 4.5118,
"kitti_train_fl_all": 16.0679,
},
"_docs": """These weights were trained from scratch on Chairs + Things.""",
},
)

C_T_SKHT_V1 = Weights(
# Chairs + Things + Sintel fine-tuning, ported from original paper repo (raft-sintel.pth)
# Weights ported from https://github.com/princeton-vl/RAFT
url="https://download.pytorch.org/models/raft_large_C_T_SKHT_V1-0b8c9e55.pth",
transforms=OpticalFlow,
meta={
Expand All @@ -563,13 +564,14 @@ class Raft_Large_Weights(WeightsEnum):
"sintel_test_cleanpass_epe": 1.94,
"sintel_test_finalpass_epe": 3.18,
},
"_docs": """
These weights were ported from the original paper. They are trained on Chairs + Things and fine-tuned on
Sintel (C+T+S+K+H).
""",
},
)

C_T_SKHT_V2 = Weights(
# Chairs + Things + Sintel fine-tuning, i.e.:
# Chairs + Things + (Sintel + Kitti + HD1K + Things_clean)
# Corresponds to the C+T+S+K+H on paper with fine-tuning on Sintel
url="https://download.pytorch.org/models/raft_large_C_T_SKHT_V2-ff5fadd5.pth",
transforms=OpticalFlow,
meta={
Expand All @@ -580,11 +582,14 @@ class Raft_Large_Weights(WeightsEnum):
"sintel_test_cleanpass_epe": 1.819,
"sintel_test_finalpass_epe": 3.067,
},
"_docs": """
These weights were trained from scratch on Chairs + Things and fine-tuned on Sintel (C+T+S+K+H).
""",
},
)

C_T_SKHT_K_V1 = Weights(
# Chairs + Things + Sintel fine-tuning + Kitti fine-tuning, ported from the original repo (sintel-kitti.pth)
# Weights ported from https://github.com/princeton-vl/RAFT
url="https://download.pytorch.org/models/raft_large_C_T_SKHT_K_V1-4a6a5039.pth",
transforms=OpticalFlow,
meta={
Expand All @@ -594,14 +599,14 @@ class Raft_Large_Weights(WeightsEnum):
"metrics": {
"kitti_test_fl_all": 5.10,
},
"_docs": """
These weights were ported from the original paper. They are trained on Chairs + Things, fine-tuned on
Sintel and then on Kitti.
""",
},
)

C_T_SKHT_K_V2 = Weights(
# Chairs + Things + Sintel fine-tuning + Kitti fine-tuning i.e.:
# Chairs + Things + (Sintel + Kitti + HD1K + Things_clean) + Kitti
# Same as CT_SKHT with extra fine-tuning on Kitti
# Corresponds to the C+T+S+K+H on paper with fine-tuning on Sintel and then on Kitti
url="https://download.pytorch.org/models/raft_large_C_T_SKHT_K_V2-b5c70766.pth",
transforms=OpticalFlow,
meta={
Expand All @@ -611,6 +616,9 @@ class Raft_Large_Weights(WeightsEnum):
"metrics": {
"kitti_test_fl_all": 5.19,
},
"_docs": """
These weights were trained from scratch on Chairs + Things, fine-tuned on Sintel and then on Kitti.
""",
},
)

Expand All @@ -619,7 +627,7 @@ class Raft_Large_Weights(WeightsEnum):

class Raft_Small_Weights(WeightsEnum):
C_T_V1 = Weights(
# Chairs + Things, ported from original paper repo (raft-small.pth)
# Weights ported from https://github.com/princeton-vl/RAFT
url="https://download.pytorch.org/models/raft_small_C_T_V1-ad48884c.pth",
transforms=OpticalFlow,
meta={
Expand All @@ -632,10 +640,10 @@ class Raft_Small_Weights(WeightsEnum):
"kitti_train_per_image_epe": 7.6557,
"kitti_train_fl_all": 25.2801,
},
"_docs": """These weights were ported from the original paper. They are trained on Chairs + Things.""",
},
)
C_T_V2 = Weights(
# Chairs + Things
url="https://download.pytorch.org/models/raft_small_C_T_V2-01064c6d.pth",
transforms=OpticalFlow,
meta={
Expand All @@ -648,6 +656,7 @@ class Raft_Small_Weights(WeightsEnum):
"kitti_train_per_image_epe": 7.5978,
"kitti_train_fl_all": 25.2369,
},
"_docs": """These weights were trained from scratch on Chairs + Things.""",
},
)

Expand Down
4 changes: 4 additions & 0 deletions torchvision/models/quantization/googlenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ class GoogLeNet_QuantizedWeights(WeightsEnum):
"acc@1": 69.826,
"acc@5": 89.404,
},
"_docs": """
These weights were produced by doing Post Training Quantization (eager mode) on top of the unquantized
weights listed below.
""",
},
)
DEFAULT = IMAGENET1K_FBGEMM_V1
Expand Down
4 changes: 4 additions & 0 deletions torchvision/models/quantization/inception.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,10 @@ class Inception_V3_QuantizedWeights(WeightsEnum):
"acc@1": 77.176,
"acc@5": 93.354,
},
"_docs": """
These weights were produced by doing Post Training Quantization (eager mode) on top of the unquantized
weights listed below.
""",
},
)
DEFAULT = IMAGENET1K_FBGEMM_V1
Expand Down
4 changes: 4 additions & 0 deletions torchvision/models/quantization/mobilenetv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ class MobileNet_V2_QuantizedWeights(WeightsEnum):
"acc@1": 71.658,
"acc@5": 90.150,
},
"_docs": """
These weights were produced by doing Quantization Aware Training (eager mode) on top of the unquantized
weights listed below.
""",
},
)
DEFAULT = IMAGENET1K_QNNPACK_V1
Expand Down
4 changes: 4 additions & 0 deletions torchvision/models/quantization/mobilenetv3.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,10 @@ class MobileNet_V3_Large_QuantizedWeights(WeightsEnum):
"acc@1": 73.004,
"acc@5": 90.858,
},
"_docs": """
These weights were produced by doing Quantization Aware Training (eager mode) on top of the unquantized
weights listed below.
""",
},
)
DEFAULT = IMAGENET1K_QNNPACK_V1
Expand Down
4 changes: 4 additions & 0 deletions torchvision/models/quantization/resnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,10 @@ def _resnet(
"categories": _IMAGENET_CATEGORIES,
"backend": "fbgemm",
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#post-training-quantized-models",
"_docs": """
These weights were produced by doing Post Training Quantization (eager mode) on top of the unquantized
weights listed below.
""",
}


Expand Down
4 changes: 4 additions & 0 deletions torchvision/models/quantization/shufflenetv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,10 @@ def _shufflenetv2(
"categories": _IMAGENET_CATEGORIES,
"backend": "fbgemm",
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#post-training-quantized-models",
"_docs": """
These weights were produced by doing Post Training Quantization (eager mode) on top of the unquantized
weights listed below.
""",
}


Expand Down
4 changes: 4 additions & 0 deletions torchvision/models/segmentation/deeplabv3.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ def _deeplabv3_resnet(
_COMMON_META = {
"categories": _VOC_CATEGORIES,
"min_size": (1, 1),
"_docs": """
These weights were trained on a subset of COCO, using only the 20 categories that are present in the Pascal VOC
dataset.
""",
}


Expand Down
4 changes: 4 additions & 0 deletions torchvision/models/segmentation/fcn.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ def __init__(self, in_channels: int, channels: int) -> None:
_COMMON_META = {
"categories": _VOC_CATEGORIES,
"min_size": (1, 1),
"_docs": """
These weights were trained on a subset of COCO, using only the 20 categories that are present in the Pascal VOC
dataset.
""",
}


Expand Down
4 changes: 4 additions & 0 deletions torchvision/models/segmentation/lraspp.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,10 @@ class LRASPP_MobileNet_V3_Large_Weights(WeightsEnum):
"miou": 57.9,
"pixel_acc": 91.2,
},
"_docs": """
These weights were trained on a subset of COCO, using only the 20 categories that are present in the
Pascal VOC dataset.
""",
},
)
DEFAULT = COCO_WITH_VOC_LABELS_V1
Expand Down
1 change: 1 addition & 0 deletions torchvision/models/video/resnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ def _video_resnet(
"min_size": (1, 1),
"categories": _KINETICS400_CATEGORIES,
"recipe": "https://github.com/pytorch/vision/tree/main/references/video_classification",
"_docs": """These weights reproduce closely the accuracy of the paper for 16-frame clip inputs.""",
}


Expand Down