diff --git a/test/datasets_utils.py b/test/datasets_utils.py index aa3e3f61be3..5789c8620dc 100644 --- a/test/datasets_utils.py +++ b/test/datasets_utils.py @@ -496,14 +496,44 @@ def new(fp, *args, **kwargs): class VideoDatasetTestCase(DatasetTestCase): """Abstract base class for video dataset testcases. - - Overwrites the FEATURE_TYPES class attribute to expect two :class:`torch.Tensor` s for the video and audio as + - Overwrites the 'FEATURE_TYPES' class attribute to expect two :class:`torch.Tensor` s for the video and audio as well as an integer label. - - Overwrites the REQUIRED_PACKAGES class attribute to require PyAV (``av``). + - Overwrites the 'REQUIRED_PACKAGES' class attribute to require PyAV (``av``). + - Adds the 'DEFAULT_FRAMES_PER_CLIP' class attribute. If no 'frames_per_clip' is provided by 'inject_fake_data()' + and it is the last parameter without a default value in the dataset constructor, the value of the + 'DEFAULT_FRAMES_PER_CLIP' class attribute is appended to the output. """ FEATURE_TYPES = (torch.Tensor, torch.Tensor, int) REQUIRED_PACKAGES = ("av",) + DEFAULT_FRAMES_PER_CLIP = 1 + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.inject_fake_data = self._set_default_frames_per_clip(self.inject_fake_data) + + def _set_default_frames_per_clip(self, inject_fake_data): + argspec = inspect.getfullargspec(self.DATASET_CLASS.__init__) + args_without_default = argspec.args[1:-len(argspec.defaults)] + frames_per_clip_last = args_without_default[-1] == "frames_per_clip" + only_root_and_frames_per_clip = (len(args_without_default) == 2) and frames_per_clip_last + + @functools.wraps(inject_fake_data) + def wrapper(tmpdir, config): + output = inject_fake_data(tmpdir, config) + if isinstance(output, collections.abc.Sequence) and len(output) == 2: + args, info = output + if frames_per_clip_last and len(args) == len(args_without_default) - 1: + args = (*args, self.DEFAULT_FRAMES_PER_CLIP) + return args, info + elif isinstance(output, (int, dict)) and only_root_and_frames_per_clip: + return (tmpdir, self.DEFAULT_FRAMES_PER_CLIP) + else: + return output + + return wrapper + def create_image_or_video_tensor(size: Sequence[int]) -> torch.Tensor: r"""Create a random uint8 tensor. diff --git a/test/fakedata_generation.py b/test/fakedata_generation.py index cdd6683b22b..dac415df110 100644 --- a/test/fakedata_generation.py +++ b/test/fakedata_generation.py @@ -369,50 +369,6 @@ def _make_mat(file): yield root -@contextlib.contextmanager -def ucf101_root(): - with get_tmp_dir() as tmp_dir: - ucf_dir = os.path.join(tmp_dir, 'UCF-101') - video_dir = os.path.join(ucf_dir, 'video') - annotations = os.path.join(ucf_dir, 'annotations') - - os.makedirs(ucf_dir) - os.makedirs(video_dir) - os.makedirs(annotations) - - fold_files = [] - for split in {'train', 'test'}: - for fold in range(1, 4): - fold_file = '{:s}list{:02d}.txt'.format(split, fold) - fold_files.append(os.path.join(annotations, fold_file)) - - file_handles = [open(x, 'w') for x in fold_files] - file_iter = cycle(file_handles) - - for i in range(0, 2): - current_class = 'class_{0}'.format(i + 1) - class_dir = os.path.join(video_dir, current_class) - os.makedirs(class_dir) - for group in range(0, 3): - for clip in range(0, 4): - # Save sample file - clip_name = 'v_{0}_g{1}_c{2}.avi'.format( - current_class, group, clip) - clip_path = os.path.join(class_dir, clip_name) - length = random.randrange(10, 21) - this_clip = torch.randint( - 0, 256, (length * 25, 320, 240, 3), dtype=torch.uint8) - write_video(clip_path, this_clip, 25) - # Add to annotations - ann_file = next(file_iter) - ann_file.write('{0}\n'.format( - os.path.join(current_class, clip_name))) - # Close all file descriptors - for f in file_handles: - f.close() - yield (video_dir, annotations) - - @contextlib.contextmanager def places365_root(split="train-standard", small=False): VARIANTS = { diff --git a/test/test_datasets.py b/test/test_datasets.py index 265aa9b80dc..37651ae7614 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -11,7 +11,7 @@ from torchvision.datasets import utils from common_utils import get_tmp_dir from fakedata_generation import mnist_root, cifar_root, imagenet_root, \ - cityscapes_root, svhn_root, ucf101_root, places365_root, widerface_root, stl10_root + cityscapes_root, svhn_root, places365_root, widerface_root, stl10_root import xml.etree.ElementTree as ET from urllib.request import Request, urlopen import itertools @@ -22,6 +22,7 @@ import torch import shutil import json +import random try: @@ -261,29 +262,6 @@ def test_svhn(self, mock_check): dataset = torchvision.datasets.SVHN(root, split="extra") self.generic_classification_dataset_test(dataset, num_images=2) - @unittest.skipIf(not HAS_PYAV, "PyAV unavailable") - def test_ucf101(self): - cached_meta_data = None - with ucf101_root() as (root, ann_root): - for split in {True, False}: - for fold in range(1, 4): - for length in {10, 15, 20}: - dataset = torchvision.datasets.UCF101(root, ann_root, length, fold=fold, train=split, - num_workers=2, _precomputed_metadata=cached_meta_data) - if cached_meta_data is None: - cached_meta_data = dataset.metadata - self.assertGreater(len(dataset), 0) - - video, audio, label = dataset[0] - self.assertEqual(video.size(), (length, 320, 240, 3)) - self.assertEqual(audio.numel(), 0) - self.assertEqual(label, 0) - - video, audio, label = dataset[len(dataset) - 1] - self.assertEqual(video.size(), (length, 320, 240, 3)) - self.assertEqual(audio.numel(), 0) - self.assertEqual(label, 1) - def test_places365(self): for split, small in itertools.product(("train-standard", "train-challenge", "val"), (False, True)): with places365_root(split=split, small=small) as places365: @@ -905,5 +883,56 @@ def test_captions(self): self.assertEqual(tuple(captions), tuple(info["captions"])) +class UCF101TestCase(datasets_utils.VideoDatasetTestCase): + DATASET_CLASS = datasets.UCF101 + + CONFIGS = datasets_utils.combinations_grid(fold=(1, 2, 3), train=(True, False)) + + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + + video_folder = tmpdir / "videos" + os.makedirs(video_folder) + video_files = self._create_videos(video_folder) + + annotations_folder = annotations_folder = tmpdir / "annotations" + os.makedirs(annotations_folder) + num_examples = self._create_annotation_files(annotations_folder, video_files, config["fold"], config["train"]) + + return (str(video_folder), str(annotations_folder)), num_examples + + def _create_videos(self, root, num_examples_per_class=3): + def file_name_fn(cls, idx, clips_per_group=2): + return f"v_{cls}_g{(idx // clips_per_group) + 1:02d}_c{(idx % clips_per_group) + 1:02d}.avi" + + video_files = [ + datasets_utils.create_video_folder(root, cls, lambda idx: file_name_fn(cls, idx), num_examples_per_class) + for cls in ("ApplyEyeMakeup", "YoYo") + ] + return [path.relative_to(root) for path in itertools.chain(*video_files)] + + def _create_annotation_files(self, root, video_files, fold, train): + current_videos = random.sample(video_files, random.randrange(1, len(video_files) - 1)) + current_annotation = self._annotation_file_name(fold, train) + self._create_annotation_file(root, current_annotation, current_videos) + + other_videos = set(video_files) - set(current_videos) + other_annotations = [ + self._annotation_file_name(fold, train) for fold, train in itertools.product((1, 2, 3), (True, False)) + ] + other_annotations.remove(current_annotation) + for name in other_annotations: + self._create_annotation_file(root, name, other_videos) + + return len(current_videos) + + def _annotation_file_name(self, fold, train): + return f"{'train' if train else 'test'}list{fold:02d}.txt" + + def _create_annotation_file(self, root, name, video_files): + with open(pathlib.Path(root) / name, "w") as fh: + fh.writelines(f"{file}\n" for file in sorted(video_files)) + + if __name__ == "__main__": unittest.main()