From f0246cd7fdffa2d3512118d5a6137ee1d21597b1 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 20 Jan 2022 08:03:08 +0100 Subject: [PATCH 1/9] [DEBUG] run prototype tests on Windows --- .circleci/config.yml | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 94ac3e3fd23..75a01fdf5e4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -335,27 +335,28 @@ jobs: file_or_dir: test/test_onnx.py unittest_prototype: - docker: - - image: circleci/python:3.7 + executor: windows-cpu +# docker: +# - image: circleci/python:3.7 resource_class: xlarge steps: - checkout - - run: - name: Download model weights - background: true - command: | - sudo apt update -qy && sudo apt install -qy parallel wget - mkdir -p ~/.cache/torch/hub/checkpoints - python scripts/collect_model_urls.py torchvision/prototype/models \ - | parallel -j0 'wget --no-verbose -O ~/.cache/torch/hub/checkpoints/`basename {}` {}\?source=ci' +# - run: +# name: Download model weights +# background: true +# command: | +# sudo apt update -qy && sudo apt install -qy parallel wget +# mkdir -p ~/.cache/torch/hub/checkpoints +# python scripts/collect_model_urls.py torchvision/prototype/models \ +# | parallel -j0 'wget --no-verbose -O ~/.cache/torch/hub/checkpoints/`basename {}` {}\?source=ci' - install_torchvision - install_prototype_dependencies - pip_install: args: scipy pycocotools descr: Install optional dependencies - - run: - name: Enable prototype tests - command: echo 'export PYTORCH_TEST_WITH_PROTOTYPE=1' >> $BASH_ENV +# - run: +# name: Enable prototype tests +# command: echo 'export PYTORCH_TEST_WITH_PROTOTYPE=1' >> $BASH_ENV - run_tests_selective: file_or_dir: test/test_prototype_*.py From 113eca4a4fbbca6cfe171de036954f5df301c9aa Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 20 Jan 2022 08:05:22 +0100 Subject: [PATCH 2/9] disable duplicate resource class --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 75a01fdf5e4..eb29decd209 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -338,7 +338,7 @@ jobs: executor: windows-cpu # docker: # - image: circleci/python:3.7 - resource_class: xlarge +# resource_class: xlarge steps: - checkout # - run: From f53cf60ff5638e6c2e08a0ee2e73d646b051395f Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 20 Jan 2022 08:51:49 +0100 Subject: [PATCH 3/9] fix newlines in mock data generation --- test/builtin_dataset_mocks.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/test/builtin_dataset_mocks.py b/test/builtin_dataset_mocks.py index fc980326307..b01bd426c24 100644 --- a/test/builtin_dataset_mocks.py +++ b/test/builtin_dataset_mocks.py @@ -534,7 +534,7 @@ def imagenet(info, root, config): devkit_root.mkdir() data_root = devkit_root / "data" data_root.mkdir() - with open(data_root / "ILSVRC2012_validation_ground_truth.txt", "w") as file: + with open(data_root / "ILSVRC2012_validation_ground_truth.txt", "w", newline="") as file: for label in torch.randint(0, len(wnids), (num_samples,)).tolist(): file.write(f"{label}\n") make_tar(root, f"{devkit_root}.tar.gz", devkit_root, compression="gz") @@ -672,7 +672,7 @@ def _make_split_files(cls, root_map): } for split, ids in ids_map.items(): - with open(root_map[split] / f"{split}.txt", "w") as fh: + with open(root_map[split] / f"{split}.txt", "w", newline="") as fh: fh.writelines(f"{id}\n" for id in ids) return sorted(set(itertools.chain(*ids_map.values()))), {split: len(ids) for split, ids in ids_map.items()} @@ -738,11 +738,11 @@ def semeion(info, root, config): images = torch.rand(num_samples, 256) labels = one_hot(torch.randint(len(info.categories), size=(num_samples,))) - with open(root / "semeion.data", "w") as fh: + with open(root / "semeion.data", "w", newline="") as fh: for image, one_hot_label in zip(images, labels): image_columns = " ".join([f"{pixel.item():.4f}" for pixel in image]) labels_columns = " ".join([str(label.item()) for label in one_hot_label]) - fh.write(f"{image_columns} {labels_columns}\n") + fh.write(f"{image_columns} {labels_columns}\r\n") return num_samples @@ -780,7 +780,7 @@ def _make_split_files(cls, root, *, year, trainval): task_folder = split_folder / task_sub_folder task_folder.mkdir(parents=True, exist_ok=True) for split, ids in ids_map.items(): - with open(task_folder / f"{split}.txt", "w") as fh: + with open(task_folder / f"{split}.txt", "w", newline="") as fh: fh.writelines(f"{id}\n" for id in ids) return sorted(set(itertools.chain(*ids_map.values()))), {split: len(ids) for split, ids in ids_map.items()} @@ -857,7 +857,7 @@ def voc(info, root, config): class CelebAMockData: @classmethod def _make_ann_file(cls, root, name, data, *, field_names=None): - with open(root / name, "w") as file: + with open(root / name, "w", newline="") as file: if field_names: file.write(f"{len(data)}\r\n") file.write(" ".join(field_names) + "\r\n") @@ -971,13 +971,13 @@ def dtd(info, root, _): meta_folder = data_folder / "labels" meta_folder.mkdir() - with open(meta_folder / "labels_joint_anno.txt", "w") as file: + with open(meta_folder / "labels_joint_anno.txt", "w", newline="") as file: for cls, image_ids in image_ids_per_category.items(): for image_id in image_ids: joint_categories = random.choices( list(categories - {cls}), k=int(torch.randint(len(categories) - 1, ())) ) - file.write(" ".join([image_id, *sorted([cls, *joint_categories])]) + "\n") + file.write(" ".join([image_id, *sorted([cls, *joint_categories])]) + " \n") image_ids = list(itertools.chain(*image_ids_per_category.values())) splits = ("train", "val", "test") @@ -986,7 +986,7 @@ def dtd(info, root, _): random.shuffle(image_ids) for offset, split in enumerate(splits): image_ids_in_config = image_ids[offset :: len(splits)] - with open(meta_folder / f"{split}{fold}.txt", "w") as file: + with open(meta_folder / f"{split}{fold}.txt", "w", newline="") as file: file.write("\n".join(image_ids_in_config) + "\n") num_samples_map[info.make_config(split=split, fold=str(fold))] = len(image_ids_in_config) @@ -1104,7 +1104,7 @@ def generate(self, root): num_samples_map = {} for offset, split in enumerate(splits): split_and_classification_anns_in_split = split_and_classification_anns[offset :: len(splits)] - with open(anns_folder / f"{split}.txt", "w") as file: + with open(anns_folder / f"{split}.txt", "w", newline="") as file: writer = csv.writer(file, delimiter=" ") for split_and_classification_ann in split_and_classification_anns_in_split: writer.writerow(split_and_classification_ann) @@ -1171,7 +1171,7 @@ def _make_archive(cls, root): image_files = cls._make_images(images_folder) image_ids = list(range(1, len(image_files) + 1)) - with open(archive_folder / "images.txt", "w") as file: + with open(archive_folder / "images.txt", "w", newline="") as file: file.write( "\n".join( f"{id} {path.relative_to(images_folder).as_posix()}" for id, path in zip(image_ids, image_files) @@ -1181,10 +1181,10 @@ def _make_archive(cls, root): split_ids = torch.randint(2, (len(image_ids),)).tolist() counts = Counter(split_ids) num_samples_map = {"train": counts[1], "test": counts[0]} - with open(archive_folder / "train_test_split.txt", "w") as file: + with open(archive_folder / "train_test_split.txt", "w", newline="") as file: file.write("\n".join(f"{image_id} {split_id}" for image_id, split_id in zip(image_ids, split_ids))) - with open(archive_folder / "bounding_boxes.txt", "w") as file: + with open(archive_folder / "bounding_boxes.txt", "w", newline="") as file: file.write( "\n".join( " ".join( @@ -1237,7 +1237,7 @@ def _make_splits(cls, root, image_files): image_files_in_split = image_files[offset :: len(splits)] split_file = split_folder / f"{split}.txt" - with open(split_file, "w") as file: + with open(split_file, "w", newline="") as file: file.write( "\n".join( sorted( From e8372209cface90ca00ebaceb71534905093f11b Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 20 Jan 2022 09:08:11 +0100 Subject: [PATCH 4/9] fix path matching for DTD --- torchvision/prototype/datasets/_builtin/dtd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/prototype/datasets/_builtin/dtd.py b/torchvision/prototype/datasets/_builtin/dtd.py index 36990e8a21d..398243b8f55 100644 --- a/torchvision/prototype/datasets/_builtin/dtd.py +++ b/torchvision/prototype/datasets/_builtin/dtd.py @@ -72,7 +72,7 @@ def _classify_archive(self, data: Tuple[str, Any]) -> Optional[int]: def _image_key_fn(self, data: Tuple[str, Any]) -> str: path = pathlib.Path(data[0]) - return str(path.relative_to(path.parents[1])) + return str(path.relative_to(path.parents[1]).as_posix()) def _collate_and_decode_sample( self, From 6790971820f806fe92007f62c4e216f9c1f35a7e Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 20 Jan 2022 09:51:43 +0100 Subject: [PATCH 5/9] revert CI changes --- .circleci/config.yml | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index eb29decd209..94ac3e3fd23 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -335,28 +335,27 @@ jobs: file_or_dir: test/test_onnx.py unittest_prototype: - executor: windows-cpu -# docker: -# - image: circleci/python:3.7 -# resource_class: xlarge + docker: + - image: circleci/python:3.7 + resource_class: xlarge steps: - checkout -# - run: -# name: Download model weights -# background: true -# command: | -# sudo apt update -qy && sudo apt install -qy parallel wget -# mkdir -p ~/.cache/torch/hub/checkpoints -# python scripts/collect_model_urls.py torchvision/prototype/models \ -# | parallel -j0 'wget --no-verbose -O ~/.cache/torch/hub/checkpoints/`basename {}` {}\?source=ci' + - run: + name: Download model weights + background: true + command: | + sudo apt update -qy && sudo apt install -qy parallel wget + mkdir -p ~/.cache/torch/hub/checkpoints + python scripts/collect_model_urls.py torchvision/prototype/models \ + | parallel -j0 'wget --no-verbose -O ~/.cache/torch/hub/checkpoints/`basename {}` {}\?source=ci' - install_torchvision - install_prototype_dependencies - pip_install: args: scipy pycocotools descr: Install optional dependencies -# - run: -# name: Enable prototype tests -# command: echo 'export PYTORCH_TEST_WITH_PROTOTYPE=1' >> $BASH_ENV + - run: + name: Enable prototype tests + command: echo 'export PYTORCH_TEST_WITH_PROTOTYPE=1' >> $BASH_ENV - run_tests_selective: file_or_dir: test/test_prototype_*.py From 00b10c7a69123bd70e9bf0991bf224c78fcb4856 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 20 Jan 2022 10:21:42 +0100 Subject: [PATCH 6/9] Revert "fix newlines in mock data generation" This reverts commit f53cf60ff5638e6c2e08a0ee2e73d646b051395f. --- test/builtin_dataset_mocks.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/test/builtin_dataset_mocks.py b/test/builtin_dataset_mocks.py index b01bd426c24..fc980326307 100644 --- a/test/builtin_dataset_mocks.py +++ b/test/builtin_dataset_mocks.py @@ -534,7 +534,7 @@ def imagenet(info, root, config): devkit_root.mkdir() data_root = devkit_root / "data" data_root.mkdir() - with open(data_root / "ILSVRC2012_validation_ground_truth.txt", "w", newline="") as file: + with open(data_root / "ILSVRC2012_validation_ground_truth.txt", "w") as file: for label in torch.randint(0, len(wnids), (num_samples,)).tolist(): file.write(f"{label}\n") make_tar(root, f"{devkit_root}.tar.gz", devkit_root, compression="gz") @@ -672,7 +672,7 @@ def _make_split_files(cls, root_map): } for split, ids in ids_map.items(): - with open(root_map[split] / f"{split}.txt", "w", newline="") as fh: + with open(root_map[split] / f"{split}.txt", "w") as fh: fh.writelines(f"{id}\n" for id in ids) return sorted(set(itertools.chain(*ids_map.values()))), {split: len(ids) for split, ids in ids_map.items()} @@ -738,11 +738,11 @@ def semeion(info, root, config): images = torch.rand(num_samples, 256) labels = one_hot(torch.randint(len(info.categories), size=(num_samples,))) - with open(root / "semeion.data", "w", newline="") as fh: + with open(root / "semeion.data", "w") as fh: for image, one_hot_label in zip(images, labels): image_columns = " ".join([f"{pixel.item():.4f}" for pixel in image]) labels_columns = " ".join([str(label.item()) for label in one_hot_label]) - fh.write(f"{image_columns} {labels_columns}\r\n") + fh.write(f"{image_columns} {labels_columns}\n") return num_samples @@ -780,7 +780,7 @@ def _make_split_files(cls, root, *, year, trainval): task_folder = split_folder / task_sub_folder task_folder.mkdir(parents=True, exist_ok=True) for split, ids in ids_map.items(): - with open(task_folder / f"{split}.txt", "w", newline="") as fh: + with open(task_folder / f"{split}.txt", "w") as fh: fh.writelines(f"{id}\n" for id in ids) return sorted(set(itertools.chain(*ids_map.values()))), {split: len(ids) for split, ids in ids_map.items()} @@ -857,7 +857,7 @@ def voc(info, root, config): class CelebAMockData: @classmethod def _make_ann_file(cls, root, name, data, *, field_names=None): - with open(root / name, "w", newline="") as file: + with open(root / name, "w") as file: if field_names: file.write(f"{len(data)}\r\n") file.write(" ".join(field_names) + "\r\n") @@ -971,13 +971,13 @@ def dtd(info, root, _): meta_folder = data_folder / "labels" meta_folder.mkdir() - with open(meta_folder / "labels_joint_anno.txt", "w", newline="") as file: + with open(meta_folder / "labels_joint_anno.txt", "w") as file: for cls, image_ids in image_ids_per_category.items(): for image_id in image_ids: joint_categories = random.choices( list(categories - {cls}), k=int(torch.randint(len(categories) - 1, ())) ) - file.write(" ".join([image_id, *sorted([cls, *joint_categories])]) + " \n") + file.write(" ".join([image_id, *sorted([cls, *joint_categories])]) + "\n") image_ids = list(itertools.chain(*image_ids_per_category.values())) splits = ("train", "val", "test") @@ -986,7 +986,7 @@ def dtd(info, root, _): random.shuffle(image_ids) for offset, split in enumerate(splits): image_ids_in_config = image_ids[offset :: len(splits)] - with open(meta_folder / f"{split}{fold}.txt", "w", newline="") as file: + with open(meta_folder / f"{split}{fold}.txt", "w") as file: file.write("\n".join(image_ids_in_config) + "\n") num_samples_map[info.make_config(split=split, fold=str(fold))] = len(image_ids_in_config) @@ -1104,7 +1104,7 @@ def generate(self, root): num_samples_map = {} for offset, split in enumerate(splits): split_and_classification_anns_in_split = split_and_classification_anns[offset :: len(splits)] - with open(anns_folder / f"{split}.txt", "w", newline="") as file: + with open(anns_folder / f"{split}.txt", "w") as file: writer = csv.writer(file, delimiter=" ") for split_and_classification_ann in split_and_classification_anns_in_split: writer.writerow(split_and_classification_ann) @@ -1171,7 +1171,7 @@ def _make_archive(cls, root): image_files = cls._make_images(images_folder) image_ids = list(range(1, len(image_files) + 1)) - with open(archive_folder / "images.txt", "w", newline="") as file: + with open(archive_folder / "images.txt", "w") as file: file.write( "\n".join( f"{id} {path.relative_to(images_folder).as_posix()}" for id, path in zip(image_ids, image_files) @@ -1181,10 +1181,10 @@ def _make_archive(cls, root): split_ids = torch.randint(2, (len(image_ids),)).tolist() counts = Counter(split_ids) num_samples_map = {"train": counts[1], "test": counts[0]} - with open(archive_folder / "train_test_split.txt", "w", newline="") as file: + with open(archive_folder / "train_test_split.txt", "w") as file: file.write("\n".join(f"{image_id} {split_id}" for image_id, split_id in zip(image_ids, split_ids))) - with open(archive_folder / "bounding_boxes.txt", "w", newline="") as file: + with open(archive_folder / "bounding_boxes.txt", "w") as file: file.write( "\n".join( " ".join( @@ -1237,7 +1237,7 @@ def _make_splits(cls, root, image_files): image_files_in_split = image_files[offset :: len(splits)] split_file = split_folder / f"{split}.txt" - with open(split_file, "w", newline="") as file: + with open(split_file, "w") as file: file.write( "\n".join( sorted( From edb9479189bda87f6046c4e927223f9053c55c38 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 20 Jan 2022 10:23:33 +0100 Subject: [PATCH 7/9] rerun tests with fix in torchdata --- .circleci/config.yml | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 94ac3e3fd23..4796c3f4198 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -155,7 +155,7 @@ commands: install_prototype_dependencies: steps: - pip_install: - args: iopath git+https://github.com/pytorch/data + args: iopath git+https://github.com/pmeier/data@fix-strip-newline descr: Install prototype dependencies # Most of the test suite is handled by the `unittest` jobs, with completely different workflow and setup. @@ -335,27 +335,28 @@ jobs: file_or_dir: test/test_onnx.py unittest_prototype: - docker: - - image: circleci/python:3.7 - resource_class: xlarge + executor: windows-cpu +# docker: +# - image: circleci/python:3.7 +# resource_class: xlarge steps: - checkout - - run: - name: Download model weights - background: true - command: | - sudo apt update -qy && sudo apt install -qy parallel wget - mkdir -p ~/.cache/torch/hub/checkpoints - python scripts/collect_model_urls.py torchvision/prototype/models \ - | parallel -j0 'wget --no-verbose -O ~/.cache/torch/hub/checkpoints/`basename {}` {}\?source=ci' +# - run: +# name: Download model weights +# background: true +# command: | +# sudo apt update -qy && sudo apt install -qy parallel wget +# mkdir -p ~/.cache/torch/hub/checkpoints +# python scripts/collect_model_urls.py torchvision/prototype/models \ +# | parallel -j0 'wget --no-verbose -O ~/.cache/torch/hub/checkpoints/`basename {}` {}\?source=ci' - install_torchvision - install_prototype_dependencies - pip_install: args: scipy pycocotools descr: Install optional dependencies - - run: - name: Enable prototype tests - command: echo 'export PYTORCH_TEST_WITH_PROTOTYPE=1' >> $BASH_ENV +# - run: +# name: Enable prototype tests +# command: echo 'export PYTORCH_TEST_WITH_PROTOTYPE=1' >> $BASH_ENV - run_tests_selective: file_or_dir: test/test_prototype_*.py From 8e32a658b92af160cf5be8946318771464336494 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 20 Jan 2022 10:43:30 +0100 Subject: [PATCH 8/9] revert CI changes --- .circleci/config.yml | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 4796c3f4198..94ac3e3fd23 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -155,7 +155,7 @@ commands: install_prototype_dependencies: steps: - pip_install: - args: iopath git+https://github.com/pmeier/data@fix-strip-newline + args: iopath git+https://github.com/pytorch/data descr: Install prototype dependencies # Most of the test suite is handled by the `unittest` jobs, with completely different workflow and setup. @@ -335,28 +335,27 @@ jobs: file_or_dir: test/test_onnx.py unittest_prototype: - executor: windows-cpu -# docker: -# - image: circleci/python:3.7 -# resource_class: xlarge + docker: + - image: circleci/python:3.7 + resource_class: xlarge steps: - checkout -# - run: -# name: Download model weights -# background: true -# command: | -# sudo apt update -qy && sudo apt install -qy parallel wget -# mkdir -p ~/.cache/torch/hub/checkpoints -# python scripts/collect_model_urls.py torchvision/prototype/models \ -# | parallel -j0 'wget --no-verbose -O ~/.cache/torch/hub/checkpoints/`basename {}` {}\?source=ci' + - run: + name: Download model weights + background: true + command: | + sudo apt update -qy && sudo apt install -qy parallel wget + mkdir -p ~/.cache/torch/hub/checkpoints + python scripts/collect_model_urls.py torchvision/prototype/models \ + | parallel -j0 'wget --no-verbose -O ~/.cache/torch/hub/checkpoints/`basename {}` {}\?source=ci' - install_torchvision - install_prototype_dependencies - pip_install: args: scipy pycocotools descr: Install optional dependencies -# - run: -# name: Enable prototype tests -# command: echo 'export PYTORCH_TEST_WITH_PROTOTYPE=1' >> $BASH_ENV + - run: + name: Enable prototype tests + command: echo 'export PYTORCH_TEST_WITH_PROTOTYPE=1' >> $BASH_ENV - run_tests_selective: file_or_dir: test/test_prototype_*.py From 63e473fc14bee6260525f744e812925e6b13a19f Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 31 Jan 2022 17:59:06 +0100 Subject: [PATCH 9/9] add explanation --- torchvision/prototype/datasets/_builtin/dtd.py | 1 + 1 file changed, 1 insertion(+) diff --git a/torchvision/prototype/datasets/_builtin/dtd.py b/torchvision/prototype/datasets/_builtin/dtd.py index 398243b8f55..fc3ec61efc7 100644 --- a/torchvision/prototype/datasets/_builtin/dtd.py +++ b/torchvision/prototype/datasets/_builtin/dtd.py @@ -72,6 +72,7 @@ def _classify_archive(self, data: Tuple[str, Any]) -> Optional[int]: def _image_key_fn(self, data: Tuple[str, Any]) -> str: path = pathlib.Path(data[0]) + # The split files contain hardcoded posix paths for the images, e.g. banded/banded_0001.jpg return str(path.relative_to(path.parents[1]).as_posix()) def _collate_and_decode_sample(