pytorch · pmeier · Feb 2, 2022 · Feb 2, 2022 · Feb 2, 2022 · Feb 7, 2022
diff --git a/.circleci/config.yml b/.circleci/config.yml
diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in
@@ -351,7 +351,7 @@ jobs:
       - install_torchvision
       - install_prototype_dependencies
       - pip_install:
-          args: scipy pycocotools h5py
+          args: scipy pycocotools h5py av rarfile
           descr: Install optional dependencies
       - run:
           name: Enable prototype tests

diff --git a/mypy.ini b/mypy.ini
@@ -155,3 +155,7 @@ ignore_missing_imports = True
 [mypy-h5py.*]
 
 ignore_missing_imports = True
+
+[mypy-rarfile.*]
+
+ignore_missing_imports = True
diff --git a/test/builtin_dataset_mocks.py b/test/builtin_dataset_mocks.py
@@ -9,18 +9,18 @@
 import pathlib
 import pickle
 import random
+import unittest.mock
 import xml.etree.ElementTree as ET
 from collections import defaultdict, Counter
 
 import numpy as np
 import PIL.Image
 import pytest
 import torch
-from datasets_utils import make_zip, make_tar, create_image_folder, create_image_file
+from datasets_utils import make_zip, make_tar, create_image_folder, create_image_file, create_video_folder
 from torch.nn.functional import one_hot
 from torch.testing import make_tensor as _make_tensor
 from torchvision.prototype.datasets._api import find
-from torchvision.prototype.utils._internal import sequence_to_str
 
 make_tensor = functools.partial(_make_tensor, device="cpu")
 make_scalar = functools.partial(make_tensor, ())
@@ -67,14 +67,15 @@ def prepare(self, home, config):
 
         mock_info = self._parse_mock_info(self.mock_data_fn(self.info, root, config))
 
-        available_file_names = {path.name for path in root.glob("*")}
-        required_file_names = {resource.file_name for resource in self.dataset.resources(config)}
-        missing_file_names = required_file_names - available_file_names
-        if missing_file_names:
-            raise pytest.UsageError(
-                f"Dataset '{self.name}' requires the files {sequence_to_str(sorted(missing_file_names))} "
-                f"for {config}, but they were not created by the mock data function."
-            )
+        for resource in self.dataset.resources(config):
+            with unittest.mock.patch(
+                "torchvision.prototype.datasets.utils._resource.OnlineResource.download",
+                side_effect=TypeError(
-                side_effect=TypeError(
+                side_effect=pytest.UsageError(
-                side_effect=TypeError(
+                side_effect=pytest.UsageError(
+                    f"Dataset '{self.name}' requires the file {resource.file_name} for {config}, "
+                    f"but it was not created by the mock data function."
+                ),
+            ):
+                resource.load(root)
 
         return mock_info
 
@@ -1344,3 +1345,79 @@ def pcam(info, root, config):
             compressed_file.write(compressed_data)
 
     return num_images
+
+
+@register_mock
+def ucf101(info, root, config):
+    video_folder = root / "UCF101" / "UCF-101"
+
+    categories_and_labels = [
+        ("ApplyEyeMakeup", 0),
+        ("LongJump", 50),
+        ("YoYo", 100),
+    ]
+
+    def file_name_fn(cls, idx, clips_per_group=2):
+        return f"v_{cls}_g{(idx // clips_per_group) + 1:02d}_c{(idx % clips_per_group) + 1:02d}.avi"
+
+    video_files = [
+        create_video_folder(
+            video_folder, category, lambda idx: file_name_fn(category, idx), num_examples=int(torch.randint(1, 6, ()))
+        )
+        for category, _ in categories_and_labels
+    ]
+
+    splits_folder = root / "ucfTrainTestList"
+    splits_folder.mkdir()
+
+    with open(splits_folder / "classInd.txt", "w") as file:
+        file.write("\n".join(f"{label} {category}" for category, label in categories_and_labels) + "\n")
+
+    video_ids = [path.relative_to(video_folder).as_posix() for path in itertools.chain.from_iterable(video_files)]
+    splits = ("train", "test")
+    num_samples_map = {}
+    for fold in range(1, 4):
+        random.shuffle(video_ids)
+        for offset, split in enumerate(splits):
+            video_ids_in_config = video_ids[offset :: len(splits)]
+            with open(splits_folder / f"{split}list{fold:02d}.txt", "w") as file:
+                file.write("\n".join(video_ids_in_config) + "\n")
+
+            num_samples_map[info.make_config(split=split, fold=str(fold))] = len(video_ids_in_config)
+
+    make_zip(root, "UCF101TrainTestSplits-RecognitionTask.zip", splits_folder)
+
+    return num_samples_map[config]
+
+
+@register_mock
+def hmdb51(info, root, config):
+    video_folder = root / "hmdb51_org"
+
+    categories = [
+        "brush_hair",
+        "pour",
+        "wave",
+    ]
+
+    video_files = {
+        category: create_video_folder(
+            video_folder, category, lambda idx: f"{category}_{idx}.avi", num_examples=int(torch.randint(3, 10, ()))
+        )
+        for category in categories
+    }
+
+    splits_folder = root / "test_train_splits" / "testTrainMulti_7030_splits"
+    splits_folder.mkdir(parents=True)
+
+    num_samples_map = defaultdict(lambda: 0)
+    for category, fold in itertools.product(categories, range(1, 4)):
+        videos = video_files[category]
+
+        with open(splits_folder / f"{category}_test_split{fold}.txt", "w") as file:
+            file.write("\n".join(f"{path.name} {idx % 3}" for idx, path in enumerate(videos)) + "\n")
+
+        for split, split_id in (("train", 1), ("test", 2)):
+            num_samples_map[info.make_config(split=split, fold=str(fold))] += len(videos[split_id::3])
+
+    return num_samples_map[config]
diff --git a/test/test_prototype_videoutils.py b/test/test_prototype_videoutils.py
@@ -0,0 +1,88 @@
+import math
+import os
+
+import pytest
+import torch
+from torchvision.io import _HAS_VIDEO_DECODER, _HAS_VIDEO_OPT, VideoReader
+from torchvision.prototype.features import EncodedData
+from torchvision.prototype.utils._internal import ReadOnlyTensorBuffer
+from torchvision.prototype.datasets.utils._video import KeyframeDecoder, RandomFrameDecoder
+try:
+    import av
+except ImportError:
+    av = None
+
+VIDEO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "videos")
+
+
+@pytest.mark.skipif(av is None, reason="PyAV unavailable")
+class TestVideoDatasetUtils:
+    # TODO: atm we separate backends in order to allow for testing on different systems;
+    # once we have things packaged we should add this as test parametrisation
+    # (this also applies for GPU decoding as well)
+
+    @pytest.mark.parametrize(
+        "video_file",
+        [
+            "RATRACE_wave_f_nm_np1_fr_goo_37.avi",
+            "TrumanShow_wave_f_nm_np1_fr_med_26.avi",
+            "v_SoccerJuggling_g23_c01.avi",
+            "v_SoccerJuggling_g24_c01.avi",
+            "R6llTwEh07w.mp4",
+            "SOX5yA1l24A.mp4",
+            "WUzgd7C1pWA.mp4",
+        ],
+    )
+    def test_random_decoder_av(self, video_file):
+        """Read a sequence of random frames from a video
+        Checks that files are valid video frames and no error is thrown during decoding.
+        """
+        video_file = os.path.join(VIDEO_DIR, video_file)
+        video = ReadOnlyTensorBuffer(EncodedData.from_path(video_file))
+        print(next(video))
+        pass
+
+    def test_random_decoder_cpu(self, video_file):
+        """Read a sequence of random frames from a video using CPU backend
+        Checks that files are valid video frames and no error is thrown during decoding,
+        and compares them to `pyav` output.
+        """
+        pass
+
+    def test_random_decoder_GPU(self, video_file):
+        """Read a sequence of random frames from a video using GPU backend
+        Checks that files are valid video frames and no error is thrown during decoding,
+        and compares them to `pyav` output.
+        """
+        pass
+
+    def test_keyframe_decoder_av(self, video_file):
+        """Read all keyframes from a video;
+        Compare the output to naive keyframe reading with `pyav`
+        """
+        pass
+
+    def test_keyframe_decoder_cpu(self, video_file):
+        """Read all keyframes from a video using CPU backend;
+        ATM should raise a warning and default to `pyav`
+        TODO: should we fail or default to a working backend
+        """
+        pass
+
+    def test_keyframe_decoder_GPU(self, video_file):
+        """Read all keyframes from a video using CPU backend;
+        ATM should raise a warning and default to `pyav`
+        TODO: should we fail or default to a working backend
+        """
+        pass
+
+    def test_clip_decoder(self, video_file):
+        """ATM very crude test:
+        check only if fails, or if the clip sampling is correct,
+        don't bother with the content just yet.
+        """
+        pass
+
+
+if __name__ == "__main__":
+    pytest.main([__file__])
diff --git a/torchvision/datasets/utils.py b/torchvision/datasets/utils.py
@@ -1,4 +1,5 @@
 import bz2
+import contextlib
 import gzip
 import hashlib
 import itertools
@@ -301,6 +302,15 @@ def _extract_zip(from_path: str, to_path: str, compression: Optional[str]) -> No
     ".tgz": (".tar", ".gz"),
 }
 
+with contextlib.suppress(ImportError):
+    import rarfile
+
+    def _extract_rar(from_path: str, to_path: str, compression: Optional[str]) -> None:
+        with rarfile.RarFile(from_path, "r") as rar:
+            rar.extractall(to_path)
+
+    _ARCHIVE_EXTRACTORS[".rar"] = _extract_rar
+
 
 def _detect_file_type(file: str) -> Tuple[str, Optional[str], Optional[str]]:
     """Detect the archive type and/or compression of a file.

diff --git a/torchvision/prototype/datasets/_builtin/__init__.py b/torchvision/prototype/datasets/_builtin/__init__.py
@@ -7,11 +7,13 @@
 from .dtd import DTD
 from .fer2013 import FER2013
 from .gtsrb import GTSRB
+from .hmdb51 import HMDB51
 from .imagenet import ImageNet
 from .mnist import MNIST, FashionMNIST, KMNIST, EMNIST, QMNIST
 from .oxford_iiit_pet import OxfordIITPet
 from .pcam import PCAM
 from .sbd import SBD
 from .semeion import SEMEION
 from .svhn import SVHN
+from .ucf101 import UCF101
 from .voc import VOC
diff --git a/torchvision/prototype/datasets/_builtin/hmdb51.categories b/torchvision/prototype/datasets/_builtin/hmdb51.categories
@@ -0,0 +1,51 @@
+brush_hair
+cartwheel
+catch
+chew
+clap
+climb
+climb_stairs
+dive
+draw_sword
+dribble
+drink
+eat
+fall_floor
+fencing
+flic_flac
+golf
+handstand
+hit
+hug
+jump
+kick
+kick_ball
+kiss
+laugh
+pick
+pour
+pullup
+punch
+push
+pushup
+ride_bike
+ride_horse
+run
+shake_hands
+shoot_ball
+shoot_bow
+shoot_gun
+sit
+situp
+smile
+smoke
+somersault
+stand
+swing_baseball
+sword
+sword_exercise
+talk
+throw
+turn
+walk
+wave