Skip to content

add HMDB51 and UCF101 datasets as well as prototype for new style video decoding #5422

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
2 changes: 1 addition & 1 deletion .circleci/config.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion .circleci/config.yml.in
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ jobs:
- install_torchvision
- install_prototype_dependencies
- pip_install:
args: scipy pycocotools h5py
args: scipy pycocotools h5py av rarfile
descr: Install optional dependencies
- run:
name: Enable prototype tests
Expand Down
4 changes: 4 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,7 @@ ignore_missing_imports = True
[mypy-h5py.*]

ignore_missing_imports = True

[mypy-rarfile.*]

ignore_missing_imports = True
97 changes: 87 additions & 10 deletions test/builtin_dataset_mocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,18 @@
import pathlib
import pickle
import random
import unittest.mock
import xml.etree.ElementTree as ET
from collections import defaultdict, Counter

import numpy as np
import PIL.Image
import pytest
import torch
from datasets_utils import make_zip, make_tar, create_image_folder, create_image_file
from datasets_utils import make_zip, make_tar, create_image_folder, create_image_file, create_video_folder
from torch.nn.functional import one_hot
from torch.testing import make_tensor as _make_tensor
from torchvision.prototype.datasets._api import find
from torchvision.prototype.utils._internal import sequence_to_str

make_tensor = functools.partial(_make_tensor, device="cpu")
make_scalar = functools.partial(make_tensor, ())
Expand Down Expand Up @@ -67,14 +67,15 @@ def prepare(self, home, config):

mock_info = self._parse_mock_info(self.mock_data_fn(self.info, root, config))

available_file_names = {path.name for path in root.glob("*")}
required_file_names = {resource.file_name for resource in self.dataset.resources(config)}
missing_file_names = required_file_names - available_file_names
if missing_file_names:
raise pytest.UsageError(
f"Dataset '{self.name}' requires the files {sequence_to_str(sorted(missing_file_names))} "
f"for {config}, but they were not created by the mock data function."
)
for resource in self.dataset.resources(config):
with unittest.mock.patch(
"torchvision.prototype.datasets.utils._resource.OnlineResource.download",
side_effect=TypeError(
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
side_effect=TypeError(
side_effect=pytest.UsageError(

f"Dataset '{self.name}' requires the file {resource.file_name} for {config}, "
f"but it was not created by the mock data function."
),
):
resource.load(root)

return mock_info

Expand Down Expand Up @@ -1344,3 +1345,79 @@ def pcam(info, root, config):
compressed_file.write(compressed_data)

return num_images


@register_mock
def ucf101(info, root, config):
video_folder = root / "UCF101" / "UCF-101"

categories_and_labels = [
("ApplyEyeMakeup", 0),
("LongJump", 50),
("YoYo", 100),
]

def file_name_fn(cls, idx, clips_per_group=2):
return f"v_{cls}_g{(idx // clips_per_group) + 1:02d}_c{(idx % clips_per_group) + 1:02d}.avi"

video_files = [
create_video_folder(
video_folder, category, lambda idx: file_name_fn(category, idx), num_examples=int(torch.randint(1, 6, ()))
)
for category, _ in categories_and_labels
]

splits_folder = root / "ucfTrainTestList"
splits_folder.mkdir()

with open(splits_folder / "classInd.txt", "w") as file:
file.write("\n".join(f"{label} {category}" for category, label in categories_and_labels) + "\n")

video_ids = [path.relative_to(video_folder).as_posix() for path in itertools.chain.from_iterable(video_files)]
splits = ("train", "test")
num_samples_map = {}
for fold in range(1, 4):
random.shuffle(video_ids)
for offset, split in enumerate(splits):
video_ids_in_config = video_ids[offset :: len(splits)]
with open(splits_folder / f"{split}list{fold:02d}.txt", "w") as file:
file.write("\n".join(video_ids_in_config) + "\n")

num_samples_map[info.make_config(split=split, fold=str(fold))] = len(video_ids_in_config)

make_zip(root, "UCF101TrainTestSplits-RecognitionTask.zip", splits_folder)

return num_samples_map[config]


@register_mock
def hmdb51(info, root, config):
video_folder = root / "hmdb51_org"

categories = [
"brush_hair",
"pour",
"wave",
]

video_files = {
category: create_video_folder(
video_folder, category, lambda idx: f"{category}_{idx}.avi", num_examples=int(torch.randint(3, 10, ()))
)
for category in categories
}

splits_folder = root / "test_train_splits" / "testTrainMulti_7030_splits"
splits_folder.mkdir(parents=True)

num_samples_map = defaultdict(lambda: 0)
for category, fold in itertools.product(categories, range(1, 4)):
videos = video_files[category]

with open(splits_folder / f"{category}_test_split{fold}.txt", "w") as file:
file.write("\n".join(f"{path.name} {idx % 3}" for idx, path in enumerate(videos)) + "\n")

for split, split_id in (("train", 1), ("test", 2)):
num_samples_map[info.make_config(split=split, fold=str(fold))] += len(videos[split_id::3])

return num_samples_map[config]
88 changes: 88 additions & 0 deletions test/test_prototype_videoutils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import math
import os

import pytest
import torch
from torchvision.io import _HAS_VIDEO_DECODER, _HAS_VIDEO_OPT, VideoReader
from torchvision.prototype.features import EncodedData
from torchvision.prototype.utils._internal import ReadOnlyTensorBuffer
from torchvision.prototype.datasets.utils._video import KeyframeDecoder, RandomFrameDecoder
try:
import av
except ImportError:
av = None

VIDEO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "videos")


@pytest.mark.skipif(av is None, reason="PyAV unavailable")
class TestVideoDatasetUtils:
# TODO: atm we separate backends in order to allow for testing on different systems;
# once we have things packaged we should add this as test parametrisation
# (this also applies for GPU decoding as well)

@pytest.mark.parametrize(
"video_file",
[
"RATRACE_wave_f_nm_np1_fr_goo_37.avi",
"TrumanShow_wave_f_nm_np1_fr_med_26.avi",
"v_SoccerJuggling_g23_c01.avi",
"v_SoccerJuggling_g24_c01.avi",
"R6llTwEh07w.mp4",
"SOX5yA1l24A.mp4",
"WUzgd7C1pWA.mp4",
],
)
def test_random_decoder_av(self, video_file):
"""Read a sequence of random frames from a video
Checks that files are valid video frames and no error is thrown during decoding.
"""
video_file = os.path.join(VIDEO_DIR, video_file)
video = ReadOnlyTensorBuffer(EncodedData.from_path(video_file))
print(next(video))
pass

def test_random_decoder_cpu(self, video_file):
"""Read a sequence of random frames from a video using CPU backend
Checks that files are valid video frames and no error is thrown during decoding,
and compares them to `pyav` output.
"""
pass

def test_random_decoder_GPU(self, video_file):
"""Read a sequence of random frames from a video using GPU backend
Checks that files are valid video frames and no error is thrown during decoding,
and compares them to `pyav` output.
"""
pass

def test_keyframe_decoder_av(self, video_file):
"""Read all keyframes from a video;
Compare the output to naive keyframe reading with `pyav`
"""
pass

def test_keyframe_decoder_cpu(self, video_file):
"""Read all keyframes from a video using CPU backend;
ATM should raise a warning and default to `pyav`
TODO: should we fail or default to a working backend
"""
pass

def test_keyframe_decoder_GPU(self, video_file):
"""Read all keyframes from a video using CPU backend;
ATM should raise a warning and default to `pyav`
TODO: should we fail or default to a working backend
"""
pass

def test_clip_decoder(self, video_file):
"""ATM very crude test:
check only if fails, or if the clip sampling is correct,
don't bother with the content just yet.
"""
pass


if __name__ == "__main__":
pytest.main([__file__])
10 changes: 10 additions & 0 deletions torchvision/datasets/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import bz2
import contextlib
import gzip
import hashlib
import itertools
Expand Down Expand Up @@ -301,6 +302,15 @@ def _extract_zip(from_path: str, to_path: str, compression: Optional[str]) -> No
".tgz": (".tar", ".gz"),
}

with contextlib.suppress(ImportError):
import rarfile

def _extract_rar(from_path: str, to_path: str, compression: Optional[str]) -> None:
with rarfile.RarFile(from_path, "r") as rar:
rar.extractall(to_path)

_ARCHIVE_EXTRACTORS[".rar"] = _extract_rar


def _detect_file_type(file: str) -> Tuple[str, Optional[str], Optional[str]]:
"""Detect the archive type and/or compression of a file.
Expand Down
2 changes: 2 additions & 0 deletions torchvision/prototype/datasets/_builtin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
from .dtd import DTD
from .fer2013 import FER2013
from .gtsrb import GTSRB
from .hmdb51 import HMDB51
from .imagenet import ImageNet
from .mnist import MNIST, FashionMNIST, KMNIST, EMNIST, QMNIST
from .oxford_iiit_pet import OxfordIITPet
from .pcam import PCAM
from .sbd import SBD
from .semeion import SEMEION
from .svhn import SVHN
from .ucf101 import UCF101
from .voc import VOC
51 changes: 51 additions & 0 deletions torchvision/prototype/datasets/_builtin/hmdb51.categories
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
brush_hair
cartwheel
catch
chew
clap
climb
climb_stairs
dive
draw_sword
dribble
drink
eat
fall_floor
fencing
flic_flac
golf
handstand
hit
hug
jump
kick
kick_ball
kiss
laugh
pick
pour
pullup
punch
push
pushup
ride_bike
ride_horse
run
shake_hands
shoot_ball
shoot_bow
shoot_gun
sit
situp
smile
smoke
somersault
stand
swing_baseball
sword
sword_exercise
talk
throw
turn
walk
wave
Loading