From 2dd7d6c87757c520f3111c9b76a22b93c3436ec0 Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Wed, 21 Oct 2020 01:26:13 -0400
Subject: [PATCH 01/44] initial commit of widerface dataset

---
 torchvision/datasets/__init__.py  |   6 +-
 torchvision/datasets/widerface.py | 167 ++++++++++++++++++++++++++++++
 2 files changed, 171 insertions(+), 2 deletions(-)
 create mode 100644 torchvision/datasets/widerface.py

diff --git a/torchvision/datasets/__init__.py b/torchvision/datasets/__init__.py
index 4531d937c68..0ce0fd6bd60 100644
--- a/torchvision/datasets/__init__.py
+++ b/torchvision/datasets/__init__.py
@@ -16,6 +16,7 @@
 from .imagenet import ImageNet
 from .caltech import Caltech101, Caltech256
 from .celeba import CelebA
+from .widerface import WIDERFace
 from .sbd import SBDataset
 from .vision import VisionDataset
 from .usps import USPS
@@ -31,5 +32,6 @@
            'MNIST', 'KMNIST', 'STL10', 'SVHN', 'PhotoTour', 'SEMEION',
            'Omniglot', 'SBU', 'Flickr8k', 'Flickr30k',
            'VOCSegmentation', 'VOCDetection', 'Cityscapes', 'ImageNet',
-           'Caltech101', 'Caltech256', 'CelebA', 'SBDataset', 'VisionDataset',
-           'USPS', 'Kinetics400', 'HMDB51', 'UCF101', 'Places365')
+           'Caltech101', 'Caltech256', 'CelebA', 'WIDERFace', 'SBDataset',
+           'VisionDataset', 'USPS', 'Kinetics400', 'HMDB51', 'UCF101',
+           'Places365')
diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
new file mode 100644
index 00000000000..c97b8b168f3
--- /dev/null
+++ b/torchvision/datasets/widerface.py
@@ -0,0 +1,167 @@
+from .vision import VisionDataset
+import warnings
+from PIL import Image
+import os
+import os.path
+import numpy as np
+import torch
+import codecs
+import string
+import gzip
+import lzma
+from typing import Any, Callable, Dict, IO, List, Optional, Tuple, Union
+from .utils import download_and_extract_archive
+
+
+class WIDERFace(VisionDataset):
+    """`WIDERFace <http://yann.lecun.com/exdb/mnist/>`_ Dataset.
+
+    WIDER FACE dataset is a face detection benchmark dataset, of which images are 
+    selected from the publicly available WIDER dataset. We choose 32,203 images and 
+    label 393,703 faces with a high degree of variability in scale, pose and 
+    occlusion as depicted in the sample images. WIDER FACE dataset is organized 
+    based on 61 event classes. For each event class, we randomly select 40%/10%/50% 
+    data as training, validation and testing sets. We adopt the same evaluation 
+    metric employed in the PASCAL VOC dataset. Similar to MALF and Caltech datasets,
+    we do not release bounding box ground truth for the test images. Users are 
+    required to submit final prediction files, which we shall proceed to evaluate.
+
+    @inproceedings{yang2016wider,
+	    Author = {Yang, Shuo and Luo, Ping and Loy, Chen Change and Tang, Xiaoou},
+	    Booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+	    Title = {WIDER FACE: A Face Detection Benchmark},
+	    Year = {2016}}
+
+    Args:
+        root (string): Root directory of dataset where ``MNIST/processed/training.pt``
+            and  ``MNIST/processed/test.pt`` exist.
+        split (string): One of {'train', 'valid', 'test', 'all'}.
+            The specified dataset is selected.
+            Defaults to ``train``.
+        target_type (string or list, optional): Type of target to use, ``attr``, ``identity``, ``bbox``,
+            or ``landmarks``. Can also be a list to output a tuple with all specified target types.
+            The targets represent:
+                ``attr`` (np.array shape=(40,) dtype=int): binary (0, 1) labels for attributes
+                ``identity`` (int): label for each person (data points with the same identity are the same person)
+                ``bbox`` (np.array shape=(4,) dtype=int): bounding box (x, y, width, height)
+                ``landmarks`` (np.array shape=(10,) dtype=int): landmark points (lefteye_x, lefteye_y, righteye_x,
+                    righteye_y, nose_x, nose_y, leftmouth_x, leftmouth_y, rightmouth_x, rightmouth_y)
+            Defaults to ``attr``. If empty, ``None`` will be returned as target.
+        transform (callable, optional): A function/transform that  takes in a PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If true, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    file_list = [
+        # Download URL                                                                  MD5 Hash                            Filename
+        # ("https://drive.google.com/uc?export=download&id=0B6eKvaijfFUDQUUwd21EckhUbWs", "3fedf70df600953d25982bcd13d91ba2", "WIDER_train.zip"),
+        ("https://drive.google.com/uc?export=download&id=0B6eKvaijfFUDd3dIRmpvSk8tLUk", "dfa7d7e790efa35df3788964cf0bbaea", "WIDER_val.zip"),
+        # ("https://drive.google.com/uc?export=download&id=0B6eKvaijfFUDbW4tdGpaYjgzZkU", "e5d8f4248ed24c334bbd12f49c29dd40", "WIDER_test.zip")
+    ]
+
+    def __init__(
+            self,
+            root: str,
+            split: str = "train",
+            target_type: Union[List[str], str] = "attr",
+            transform: Optional[Callable] = None,
+            target_transform: Optional[Callable] = None,
+            download: bool = False,
+    ) -> None:
+        super(WIDERFace, self).__init__(root, transform=transform,
+                                    target_transform=target_transform)
+        print("root dir: " + root)
+        print(self.root)
+        self.split = split
+
+        if isinstance(target_type, list):
+            self.target_type = target_type
+        else:
+            self.target_type = [target_type]
+
+        if not self.target_type and self.target_transform is not None:
+            raise RuntimeError('target_transform is specified but target_type is empty')
+
+        # if download:
+        #     self.download()
+
+        # if not self._check_exists():
+        #     raise RuntimeError('Dataset not found.' +
+        #                        ' You can use download=True to download it')
+        
+        print("done downloading wider face")
+
+        # if self.train:
+        #     data_file = self.training_file
+        # else:
+        #     data_file = self.test_file
+        # self.data, self.targets = torch.load(os.path.join(self.processed_folder, data_file))
+    
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        """
+        Args:
+            index (int): Index
+
+        Returns:
+            tuple: (image, target) where target is index of the target class.
+        """
+        img, target = self.data[index], int(self.targets[index])
+
+        # doing this so that it is consistent with all other datasets
+        # to return a PIL Image
+        img = Image.fromarray(img.numpy(), mode='L')
+
+        if self.transform is not None:
+            img = self.transform(img)
+
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+
+        return img, target
+
+    def __len__(self) -> int:
+        return len(self.data)
+    
+    def download(self) -> None:
+        for (file_url, md5, filename) in self.file_list:
+            download_and_extract_archive(url=file_url, download_root=self.root, filename=filename, md5=md5)
+            # download_file_from_google_drive(file_id, os.path.join(self.root, self.base_folder), filename, md5)
+
+        # with zipfile.ZipFile(os.path.join(self.root, self.base_folder, "img_align_celeba.zip"), "r") as f:
+        #     f.extractall(os.path.join(self.root, self.base_folder))
+    
+    def download_mnist(self) -> None:
+        """Download the MNIST data if it doesn't exist in processed_folder already."""
+
+        if self._check_exists():
+            return
+
+        os.makedirs(self.raw_folder, exist_ok=True)
+        os.makedirs(self.processed_folder, exist_ok=True)
+
+        # download files
+        for url, md5 in self.resources:
+            filename = url.rpartition('/')[2]
+            download_and_extract_archive(url, download_root=self.raw_folder, filename=filename, md5=md5)
+
+        # process and save as torch files
+        print('Processing...')
+
+        training_set = (
+            read_image_file(os.path.join(self.raw_folder, 'train-images-idx3-ubyte')),
+            read_label_file(os.path.join(self.raw_folder, 'train-labels-idx1-ubyte'))
+        )
+        test_set = (
+            read_image_file(os.path.join(self.raw_folder, 't10k-images-idx3-ubyte')),
+            read_label_file(os.path.join(self.raw_folder, 't10k-labels-idx1-ubyte'))
+        )
+        with open(os.path.join(self.processed_folder, self.training_file), 'wb') as f:
+            torch.save(training_set, f)
+        with open(os.path.join(self.processed_folder, self.test_file), 'wb') as f:
+            torch.save(test_set, f)
+
+        print('Done!')

From f9e31c9286bdd386838382189f3c0fc3c40449cb Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Wed, 21 Oct 2020 18:47:28 -0400
Subject: [PATCH 02/44] comment out old code

---
 torchvision/datasets/widerface.py | 62 +++++++++++++++----------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index c97b8b168f3..41587cbaa22 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -134,34 +134,34 @@ def download(self) -> None:
         # with zipfile.ZipFile(os.path.join(self.root, self.base_folder, "img_align_celeba.zip"), "r") as f:
         #     f.extractall(os.path.join(self.root, self.base_folder))
     
-    def download_mnist(self) -> None:
-        """Download the MNIST data if it doesn't exist in processed_folder already."""
-
-        if self._check_exists():
-            return
-
-        os.makedirs(self.raw_folder, exist_ok=True)
-        os.makedirs(self.processed_folder, exist_ok=True)
-
-        # download files
-        for url, md5 in self.resources:
-            filename = url.rpartition('/')[2]
-            download_and_extract_archive(url, download_root=self.raw_folder, filename=filename, md5=md5)
-
-        # process and save as torch files
-        print('Processing...')
-
-        training_set = (
-            read_image_file(os.path.join(self.raw_folder, 'train-images-idx3-ubyte')),
-            read_label_file(os.path.join(self.raw_folder, 'train-labels-idx1-ubyte'))
-        )
-        test_set = (
-            read_image_file(os.path.join(self.raw_folder, 't10k-images-idx3-ubyte')),
-            read_label_file(os.path.join(self.raw_folder, 't10k-labels-idx1-ubyte'))
-        )
-        with open(os.path.join(self.processed_folder, self.training_file), 'wb') as f:
-            torch.save(training_set, f)
-        with open(os.path.join(self.processed_folder, self.test_file), 'wb') as f:
-            torch.save(test_set, f)
-
-        print('Done!')
+    # def download_mnist(self) -> None:
+    #     """Download the MNIST data if it doesn't exist in processed_folder already."""
+
+    #     if self._check_exists():
+    #         return
+
+    #     os.makedirs(self.raw_folder, exist_ok=True)
+    #     os.makedirs(self.processed_folder, exist_ok=True)
+
+    #     # download files
+    #     for url, md5 in self.resources:
+    #         filename = url.rpartition('/')[2]
+    #         download_and_extract_archive(url, download_root=self.raw_folder, filename=filename, md5=md5)
+
+    #     # process and save as torch files
+    #     print('Processing...')
+
+    #     training_set = (
+    #         read_image_file(os.path.join(self.raw_folder, 'train-images-idx3-ubyte')),
+    #         read_label_file(os.path.join(self.raw_folder, 'train-labels-idx1-ubyte'))
+    #     )
+    #     test_set = (
+    #         read_image_file(os.path.join(self.raw_folder, 't10k-images-idx3-ubyte')),
+    #         read_label_file(os.path.join(self.raw_folder, 't10k-labels-idx1-ubyte'))
+    #     )
+    #     with open(os.path.join(self.processed_folder, self.training_file), 'wb') as f:
+    #         torch.save(training_set, f)
+    #     with open(os.path.join(self.processed_folder, self.test_file), 'wb') as f:
+    #         torch.save(test_set, f)
+
+    #     print('Done!')

From e4ee45ff3b7e1659d85ccc5ac14bfa46931c1dd7 Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Thu, 22 Oct 2020 01:55:45 -0400
Subject: [PATCH 03/44] improve parsing of annotation files

---
 torchvision/datasets/widerface.py | 196 ++++++++++++++++++++----------
 1 file changed, 129 insertions(+), 67 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index 41587cbaa22..d6915103b1b 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -10,11 +10,11 @@
 import gzip
 import lzma
 from typing import Any, Callable, Dict, IO, List, Optional, Tuple, Union
-from .utils import download_and_extract_archive
+from .utils import download_file_from_google_drive, download_and_extract_archive, check_integrity
 
 
 class WIDERFace(VisionDataset):
-    """`WIDERFace <http://yann.lecun.com/exdb/mnist/>`_ Dataset.
+    """`WIDERFace <http://shuoyang1213.me/WIDERFACE/>`_ Dataset.
 
     WIDER FACE dataset is a face detection benchmark dataset, of which images are 
     selected from the publicly available WIDER dataset. We choose 32,203 images and 
@@ -33,8 +33,8 @@ class WIDERFace(VisionDataset):
 	    Year = {2016}}
 
     Args:
-        root (string): Root directory of dataset where ``MNIST/processed/training.pt``
-            and  ``MNIST/processed/test.pt`` exist.
+        root (string): Root directory of dataset where ``widerface/WIDER_train.zip widerface/WIDER_val.zip``
+            and  ``widerface/WIDER_test.zip widerface/wider_face_split.zip`` exist.
         split (string): One of {'train', 'valid', 'test', 'all'}.
             The specified dataset is selected.
             Defaults to ``train``.
@@ -56,12 +56,16 @@ class WIDERFace(VisionDataset):
             downloaded again.
     """
 
+    base_folder = "widerface"
     file_list = [
-        # Download URL                                                                  MD5 Hash                            Filename
-        # ("https://drive.google.com/uc?export=download&id=0B6eKvaijfFUDQUUwd21EckhUbWs", "3fedf70df600953d25982bcd13d91ba2", "WIDER_train.zip"),
-        ("https://drive.google.com/uc?export=download&id=0B6eKvaijfFUDd3dIRmpvSk8tLUk", "dfa7d7e790efa35df3788964cf0bbaea", "WIDER_val.zip"),
-        # ("https://drive.google.com/uc?export=download&id=0B6eKvaijfFUDbW4tdGpaYjgzZkU", "e5d8f4248ed24c334bbd12f49c29dd40", "WIDER_test.zip")
+        # File ID                        MD5 Hash                            Filename
+        ("0B6eKvaijfFUDQUUwd21EckhUbWs", "3fedf70df600953d25982bcd13d91ba2", "WIDER_train.zip"),
+        ("0B6eKvaijfFUDd3dIRmpvSk8tLUk", "dfa7d7e790efa35df3788964cf0bbaea", "WIDER_val.zip"),
+        ("0B6eKvaijfFUDbW4tdGpaYjgzZkU", "e5d8f4248ed24c334bbd12f49c29dd40", "WIDER_test.zip")
     ]
+    annotations_file = ("http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/bbx_annotation/wider_face_split.zip",
+                        "0e3767bcf0e326556d407bf5bff5d27c",
+                        "wider_face_split.zip")
 
     def __init__(
             self,
@@ -74,26 +78,83 @@ def __init__(
     ) -> None:
         super(WIDERFace, self).__init__(root, transform=transform,
                                     target_transform=target_transform)
-        print("root dir: " + root)
-        print(self.root)
+        print("root dir: " + self.root)
+        self.imgs_path = []
+        self.words = []
         self.split = split
 
         if isinstance(target_type, list):
             self.target_type = target_type
         else:
             self.target_type = [target_type]
-
         if not self.target_type and self.target_transform is not None:
             raise RuntimeError('target_transform is specified but target_type is empty')
 
-        # if download:
-        #     self.download()
-
-        # if not self._check_exists():
-        #     raise RuntimeError('Dataset not found.' +
-        #                        ' You can use download=True to download it')
+        if download:
+            self.download()
         
-        print("done downloading wider face")
+        if not self._check_integrity():
+            raise RuntimeError("Dataset not found or corrupted. " +
+                               "You can use download=True to download it")
+
+        print("Finished initializing WIDERFace")
+
+        ann_file = os.path.expanduser(os.path.join(self.root, self.base_folder, "wider_face_split", "wider_face_train_bbx_gt.txt"))
+        print("ann_file: " + ann_file)
+        f = open(ann_file, "r")
+        lines = f.readlines()
+
+        isFile = True
+        isNumBoxes, isBoxAnnotation = False, False
+        num_boxes = 0
+        box_counter = 0
+        labels = []
+        for line in lines:
+            line = line.rstrip()
+            if isFile:
+                # print(line)
+                self.imgs_path.append(line)
+                isFile = False
+                isNumBoxes = True
+            elif isNumBoxes:
+                num_boxes = int(line)
+                isNumBoxes = False
+                isBoxAnnotation = True
+            elif isBoxAnnotation:
+                box_counter += 1
+                # line = line.split(" ")
+                # line = [int(x) for x in line]
+                # labels.append(line)
+                if box_counter == num_boxes:
+                    isBoxAnnotation = False
+                    isFile = True
+                    # print("read {} bounding boxes".format(box_counter))
+                    # self.words.append(labels.copy())
+                    box_counter = 0
+                    # labels.clear()
+            else:
+                print("ERROR parsing annotations file")
+
+        # isFirst = True
+        # labels = []
+        # for line in lines:
+        #     line = line.rstrip()
+        #     if line.startswith("#"):
+        #         if isFirst is True:
+        #             isFirst = False
+        #         else:
+        #             labels_copy = labels.copy()
+        #             self.words.append(labels_copy)
+        #             labels.clear()
+        #         path = line[2:]
+        #         path = ann_file.replace("label.txt","images/") + path
+        #         self.imgs_path.append(path)
+        #     else:
+        #         line = line.split(" ")
+        #         label = [float(x) for x in line]
+        #         labels.append(label)
+        # self.words.append(labels)
+
 
         # if self.train:
         #     data_file = self.training_file
@@ -109,59 +170,60 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]:
         Returns:
             tuple: (image, target) where target is index of the target class.
         """
-        img, target = self.data[index], int(self.targets[index])
+        # img, target = self.data[index], int(self.targets[index])
 
-        # doing this so that it is consistent with all other datasets
-        # to return a PIL Image
-        img = Image.fromarray(img.numpy(), mode='L')
+        # # doing this so that it is consistent with all other datasets
+        # # to return a PIL Image
+        # img = Image.fromarray(img.numpy(), mode='L')
 
-        if self.transform is not None:
-            img = self.transform(img)
+        # if self.transform is not None:
+        #     img = self.transform(img)
 
-        if self.target_transform is not None:
-            target = self.target_transform(target)
+        # if self.target_transform is not None:
+        #     target = self.target_transform(target)
 
-        return img, target
+        # return img, target
+        return 0, 1
 
     def __len__(self) -> int:
-        return len(self.data)
-    
-    def download(self) -> None:
-        for (file_url, md5, filename) in self.file_list:
-            download_and_extract_archive(url=file_url, download_root=self.root, filename=filename, md5=md5)
-            # download_file_from_google_drive(file_id, os.path.join(self.root, self.base_folder), filename, md5)
+        return len(self.imgs_path)
 
-        # with zipfile.ZipFile(os.path.join(self.root, self.base_folder, "img_align_celeba.zip"), "r") as f:
-        #     f.extractall(os.path.join(self.root, self.base_folder))
-    
-    # def download_mnist(self) -> None:
-    #     """Download the MNIST data if it doesn't exist in processed_folder already."""
-
-    #     if self._check_exists():
-    #         return
-
-    #     os.makedirs(self.raw_folder, exist_ok=True)
-    #     os.makedirs(self.processed_folder, exist_ok=True)
-
-    #     # download files
-    #     for url, md5 in self.resources:
-    #         filename = url.rpartition('/')[2]
-    #         download_and_extract_archive(url, download_root=self.raw_folder, filename=filename, md5=md5)
-
-    #     # process and save as torch files
-    #     print('Processing...')
-
-    #     training_set = (
-    #         read_image_file(os.path.join(self.raw_folder, 'train-images-idx3-ubyte')),
-    #         read_label_file(os.path.join(self.raw_folder, 'train-labels-idx1-ubyte'))
-    #     )
-    #     test_set = (
-    #         read_image_file(os.path.join(self.raw_folder, 't10k-images-idx3-ubyte')),
-    #         read_label_file(os.path.join(self.raw_folder, 't10k-labels-idx1-ubyte'))
-    #     )
-    #     with open(os.path.join(self.processed_folder, self.training_file), 'wb') as f:
-    #         torch.save(training_set, f)
-    #     with open(os.path.join(self.processed_folder, self.test_file), 'wb') as f:
-    #         torch.save(test_set, f)
-
-    #     print('Done!')
+    # TODO - checking integrity of the annotations_file is not working
+    def _check_integrity(self) -> bool:
+        all_files = self.file_list.copy()
+        all_files.append(self.annotations_file)
+
+        for (_, md5, filename) in all_files:
+            fpath = os.path.join(self.root, self.base_folder, filename)
+            _, ext = os.path.splitext(filename)
+            # Allow original archive to be deleted (zip and 7z)
+            # Only need the extracted images
+            print("filename: " + fpath)
+            if ext not in [".zip", ".7z"] and not check_integrity(fpath, md5):
+                return False
+
+        # Should check a hash of the images
+        return os.path.isdir(os.path.join(self.root, self.base_folder, "WIDER_train"))
+
+    def download(self) -> None:
+        import zipfile
+
+        # if self._check_integrity():
+        #     print('Files already downloaded and verified')
+        #     return
+
+        # download data
+        for (file_id, md5, filename) in self.file_list:
+            download_file_from_google_drive(file_id, os.path.join(self.root, self.base_folder), filename, md5)
+
+        # extract data
+        for (file_id, md5, filename) in self.file_list:
+            with zipfile.ZipFile(os.path.join(self.root, self.base_folder, filename), "r") as f:
+                f.extractall(os.path.join(self.root, self.base_folder))
+
+        # download and extract annotations files
+        download_and_extract_archive(url=self.annotations_file[0],
+                                     download_root=os.path.join(self.root, self.base_folder),
+                                     extract_root=os.path.join(self.root, self.base_folder),
+                                     filename=self.annotations_file[2],
+                                     md5=self.annotations_file[1])

From 9eae6964c473ec6f8a4f304c949fc60a41b50364 Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Thu, 22 Oct 2020 16:01:52 -0400
Subject: [PATCH 04/44] code cleanup and fix docstring comments

---
 torchvision/datasets/widerface.py | 198 ++++++++++++++++--------------
 1 file changed, 106 insertions(+), 92 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index d6915103b1b..d6662886c56 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -16,16 +16,7 @@
 class WIDERFace(VisionDataset):
     """`WIDERFace <http://shuoyang1213.me/WIDERFACE/>`_ Dataset.
 
-    WIDER FACE dataset is a face detection benchmark dataset, of which images are 
-    selected from the publicly available WIDER dataset. We choose 32,203 images and 
-    label 393,703 faces with a high degree of variability in scale, pose and 
-    occlusion as depicted in the sample images. WIDER FACE dataset is organized 
-    based on 61 event classes. For each event class, we randomly select 40%/10%/50% 
-    data as training, validation and testing sets. We adopt the same evaluation 
-    metric employed in the PASCAL VOC dataset. Similar to MALF and Caltech datasets,
-    we do not release bounding box ground truth for the test images. Users are 
-    required to submit final prediction files, which we shall proceed to evaluate.
-
+    Citation:
     @inproceedings{yang2016wider,
 	    Author = {Yang, Shuo and Luo, Ping and Loy, Chen Change and Tang, Xiaoou},
 	    Booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
@@ -33,20 +24,24 @@ class WIDERFace(VisionDataset):
 	    Year = {2016}}
 
     Args:
-        root (string): Root directory of dataset where ``widerface/WIDER_train.zip widerface/WIDER_val.zip``
-            and  ``widerface/WIDER_test.zip widerface/wider_face_split.zip`` exist.
-        split (string): One of {'train', 'valid', 'test', 'all'}.
-            The specified dataset is selected.
-            Defaults to ``train``.
-        target_type (string or list, optional): Type of target to use, ``attr``, ``identity``, ``bbox``,
-            or ``landmarks``. Can also be a list to output a tuple with all specified target types.
+        root (string): Root directory of the WIDERFace Dataset.
+            Expects the following structure:
+                .
+                └── widerface
+                    ├── wider_face_split.zip
+                    ├── WIDER_test.zip
+                    ├── WIDER_train.zip
+                    └── WIDER_val.zip
+        split (string): One of {``train``, ``val``, ``test``}.
+            The dataset split to use. Defaults to ``train``.
+        target_type (string): The type of target to use, can be one of {``raw``, ``bbox``, ``attr``.``""``}
+            Can also be a list to output a tuple with all specified target types.
             The targets represent:
-                ``attr`` (np.array shape=(40,) dtype=int): binary (0, 1) labels for attributes
-                ``identity`` (int): label for each person (data points with the same identity are the same person)
-                ``bbox`` (np.array shape=(4,) dtype=int): bounding box (x, y, width, height)
-                ``landmarks`` (np.array shape=(10,) dtype=int): landmark points (lefteye_x, lefteye_y, righteye_x,
-                    righteye_y, nose_x, nose_y, leftmouth_x, leftmouth_y, rightmouth_x, rightmouth_y)
-            Defaults to ``attr``. If empty, ``None`` will be returned as target.
+                ``raw`` (torch.tensor shape=(10,) dtype=int): all annotations combined (bbox + attr)
+                ``bbox`` (torch.tensor shape=(4,) dtype=int): bounding box (x, y, width, height)
+                ``attr`` (torch.tensor shape=(6,) dtype=int): label values for attributes
+                    that represent (blur, expression, illumination, occlusion, pose, invalid)
+            Defaults to ``raw``. If empty, ``None`` will be returned as target.
         transform (callable, optional): A function/transform that  takes in a PIL image
             and returns a transformed version. E.g, ``transforms.RandomCrop``
         target_transform (callable, optional): A function/transform that takes in the
@@ -71,119 +66,134 @@ def __init__(
             self,
             root: str,
             split: str = "train",
-            target_type: Union[List[str], str] = "attr",
+            target_type: Union[List[str], str] = "raw",
             transform: Optional[Callable] = None,
             target_transform: Optional[Callable] = None,
             download: bool = False,
     ) -> None:
         super(WIDERFace, self).__init__(root, transform=transform,
-                                    target_transform=target_transform)
+                                        target_transform=target_transform)
         print("root dir: " + self.root)
-        self.imgs_path = []
-        self.words = []
+        
+        # check arguments
+        if split not in ("train","val","test"):
+            raise ValueError("split \"{}\" is not recognized.".format(split))
         self.split = split
 
         if isinstance(target_type, list):
             self.target_type = target_type
         else:
             self.target_type = [target_type]
+        if not (all(x in ["raw","bbox","attr",""] for x in self.target_type)):
+            raise ValueError("target_type \"{}\" is not recognized.".format(self.target_type))
         if not self.target_type and self.target_transform is not None:
             raise RuntimeError('target_transform is specified but target_type is empty')
 
+        # prepare dataset
+        self.imgs_path = []
+        self.raw_annotations = []
+
         if download:
             self.download()
         
         if not self._check_integrity():
             raise RuntimeError("Dataset not found or corrupted. " +
                                "You can use download=True to download it")
+        
+        # process dataset
+        if self.split in ("train","val"):
+            self.parse_train_val_annotations_file()
+        elif self.split == "test":
+            self.parse_test_annotations_file()
+        else:
+            raise ValueError("split \"{}\" is not recognized.".format(self.split))
 
-        print("Finished initializing WIDERFace")
+    def parse_train_val_annotations_file(self):
+        filename = "wider_face_train_bbx_gt.txt" if self.split == "train" else "wider_face_val_bbx_gt.txt"
+        filepath = os.path.join(self.root, self.base_folder, "wider_face_split", filename)
 
-        ann_file = os.path.expanduser(os.path.join(self.root, self.base_folder, "wider_face_split", "wider_face_train_bbx_gt.txt"))
-        print("ann_file: " + ann_file)
-        f = open(ann_file, "r")
+        f = open(filepath, "r")
         lines = f.readlines()
 
-        isFile = True
-        isNumBoxes, isBoxAnnotation = False, False
-        num_boxes = 0
-        box_counter = 0
+        file_name_line, num_boxes_line, box_annotation_line = True, False, False
+        num_boxes, box_counter = 0, 0
         labels = []
         for line in lines:
             line = line.rstrip()
-            if isFile:
+            if file_name_line:
                 # print(line)
-                self.imgs_path.append(line)
-                isFile = False
-                isNumBoxes = True
-            elif isNumBoxes:
+                abs_path = os.path.join(self.root, self.base_folder, "WIDER_"+self.split, "images", line)
+                self.imgs_path.append(abs_path)
+                file_name_line = False
+                num_boxes_line = True
+            elif num_boxes_line:
                 num_boxes = int(line)
-                isNumBoxes = False
-                isBoxAnnotation = True
-            elif isBoxAnnotation:
+                num_boxes_line = False
+                box_annotation_line = True
+            elif box_annotation_line:
                 box_counter += 1
-                # line = line.split(" ")
-                # line = [int(x) for x in line]
-                # labels.append(line)
-                if box_counter == num_boxes:
-                    isBoxAnnotation = False
-                    isFile = True
-                    # print("read {} bounding boxes".format(box_counter))
-                    # self.words.append(labels.copy())
+                line = line.split(" ")
+                line = [int(x) for x in line]
+                labels.append(line)
+                if box_counter >= num_boxes:
+                    box_annotation_line = False
+                    file_name_line = True
+                    self.raw_annotations.append(torch.tensor(labels))
                     box_counter = 0
-                    # labels.clear()
+                    labels.clear()
             else:
-                print("ERROR parsing annotations file")
-
-        # isFirst = True
-        # labels = []
-        # for line in lines:
-        #     line = line.rstrip()
-        #     if line.startswith("#"):
-        #         if isFirst is True:
-        #             isFirst = False
-        #         else:
-        #             labels_copy = labels.copy()
-        #             self.words.append(labels_copy)
-        #             labels.clear()
-        #         path = line[2:]
-        #         path = ann_file.replace("label.txt","images/") + path
-        #         self.imgs_path.append(path)
-        #     else:
-        #         line = line.split(" ")
-        #         label = [float(x) for x in line]
-        #         labels.append(label)
-        # self.words.append(labels)
-
-
-        # if self.train:
-        #     data_file = self.training_file
-        # else:
-        #     data_file = self.test_file
-        # self.data, self.targets = torch.load(os.path.join(self.processed_folder, data_file))
+                raise RuntimeError("ERROR parsing annotations file {}".format(filepath))
+        f.close()
     
+    def parse_test_annotations_file(self):
+        filepath = os.path.join(self.root, self.base_folder, "wider_face_split", "wider_face_test_filelist.txt")
+        f = open(filepath, "r")
+        lines = f.readlines()
+        for line in lines:
+            line = line.rstrip()
+            abs_path = os.path.join(self.root, self.base_folder, "WIDER_test", "images", line)
+            self.imgs_path.append(abs_path)
+        f.close()
+
     def __getitem__(self, index: int) -> Tuple[Any, Any]:
         """
         Args:
             index (int): Index
 
         Returns:
-            tuple: (image, target) where target is index of the target class.
+            tuple: (image, target) where target=None for the test split.
         """
-        # img, target = self.data[index], int(self.targets[index])
-
-        # # doing this so that it is consistent with all other datasets
-        # # to return a PIL Image
-        # img = Image.fromarray(img.numpy(), mode='L')
 
-        # if self.transform is not None:
-        #     img = self.transform(img)
+        # stay consistent with all other datasets and return a PIL Image
+        img = Image.open(self.imgs_path[index])
 
-        # if self.target_transform is not None:
-        #     target = self.target_transform(target)
+        if self.transform is not None:
+            img = self.transform(img)
+        
+        if self.split == "test":
+            return img, None
+
+        # prepare target in the train/val split
+        target: Any = []
+        for t in self.target_type:
+            if t == "raw":
+                target.append( self.raw_annotations[index] )
+            elif t == "bbox":
+                target.append( self.raw_annotations[index][:,:4] )
+            elif t == "attr":
+                target.append( self.raw_annotations[index][:,4:] )
+            elif t == "":
+                target = None
+                break
+            else:
+                raise ValueError("Target type \"{}\" is not recognized.".format(t))
+        if target:
+            target = tuple(target) if len(target) > 1 else target[0]
+            if self.target_transform is not None:
+                target = self.target_transform(target)
+        
+        return img, target
 
-        # return img, target
-        return 0, 1
 
     def __len__(self) -> int:
         return len(self.imgs_path)
@@ -227,3 +237,7 @@ def download(self) -> None:
                                      extract_root=os.path.join(self.root, self.base_folder),
                                      filename=self.annotations_file[2],
                                      md5=self.annotations_file[1])
+
+    def extra_repr(self) -> str:
+        lines = ["Target type: {target_type}", "Split: {split}"]
+        return '\n'.join(lines).format(**self.__dict__)

From 1fbd0b72aea3eeae745ac7f3a6a5ff66f40da208 Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Thu, 22 Oct 2020 19:10:54 -0400
Subject: [PATCH 05/44] speed up check for quota exceeded

---
 torchvision/datasets/utils.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/torchvision/datasets/utils.py b/torchvision/datasets/utils.py
index 6b705f875ba..0f7cdbde95b 100644
--- a/torchvision/datasets/utils.py
+++ b/torchvision/datasets/utils.py
@@ -120,11 +120,11 @@ def list_files(root: str, suffix: str, prefix: bool = False) -> List[str]:
 
 
 def _quota_exceeded(response: "requests.models.Response") -> bool:  # type: ignore[name-defined]
-    return "Google Drive - Quota exceeded" in response.text
+    return (response.status_code == 403) and ("Google Drive - Quota exceeded" in response.text)
 
 
 def download_file_from_google_drive(file_id: str, root: str, filename: Optional[str] = None, md5: Optional[str] = None):
-    """Download a Google Drive file from  and place it in root.
+    """Download a Google Drive file and place it in root.
 
     Args:
         file_id (str): id of file to be downloaded
@@ -147,7 +147,6 @@ def download_file_from_google_drive(file_id: str, root: str, filename: Optional[
         print('Using downloaded and verified file: ' + fpath)
     else:
         session = requests.Session()
-
         response = session.get(url, params={'id': file_id}, stream=True)
         token = _get_confirm_token(response)
 
@@ -162,7 +161,6 @@ def download_file_from_google_drive(file_id: str, root: str, filename: Optional[
                 f"and can only be overcome by trying again later."
             )
             raise RuntimeError(msg)
-
         _save_response_content(response, fpath)
 
 

From 75c620d157d7e779460d0022387f629154a67241 Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Thu, 22 Oct 2020 19:25:50 -0400
Subject: [PATCH 06/44] cleanup print statements

---
 torchvision/datasets/widerface.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index d6662886c56..d808865face 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -84,6 +84,7 @@ def __init__(
             self.target_type = target_type
         else:
             self.target_type = [target_type]
+        
         if not (all(x in ["raw","bbox","attr",""] for x in self.target_type)):
             raise ValueError("target_type \"{}\" is not recognized.".format(self.target_type))
         if not self.target_type and self.target_transform is not None:
@@ -164,12 +165,12 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]:
             tuple: (image, target) where target=None for the test split.
         """
 
-        # stay consistent with all other datasets and return a PIL Image
+        # stay consistent with other datasets and return a PIL Image
         img = Image.open(self.imgs_path[index])
 
         if self.transform is not None:
             img = self.transform(img)
-        
+
         if self.split == "test":
             return img, None
 
@@ -191,14 +192,14 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]:
             target = tuple(target) if len(target) > 1 else target[0]
             if self.target_transform is not None:
                 target = self.target_transform(target)
-        
+
         return img, target
 
 
     def __len__(self) -> int:
         return len(self.imgs_path)
 
-    # TODO - checking integrity of the annotations_file is not working
+
     def _check_integrity(self) -> bool:
         all_files = self.file_list.copy()
         all_files.append(self.annotations_file)
@@ -208,7 +209,6 @@ def _check_integrity(self) -> bool:
             _, ext = os.path.splitext(filename)
             # Allow original archive to be deleted (zip and 7z)
             # Only need the extracted images
-            print("filename: " + fpath)
             if ext not in [".zip", ".7z"] and not check_integrity(fpath, md5):
                 return False
 
@@ -218,9 +218,9 @@ def _check_integrity(self) -> bool:
     def download(self) -> None:
         import zipfile
 
-        # if self._check_integrity():
-        #     print('Files already downloaded and verified')
-        #     return
+        if self._check_integrity():
+            print('Files already downloaded and verified')
+            return
 
         # download data
         for (file_id, md5, filename) in self.file_list:
@@ -231,7 +231,7 @@ def download(self) -> None:
             with zipfile.ZipFile(os.path.join(self.root, self.base_folder, filename), "r") as f:
                 f.extractall(os.path.join(self.root, self.base_folder))
 
-        # download and extract annotations files
+        # download and extract annotation files
         download_and_extract_archive(url=self.annotations_file[0],
                                      download_root=os.path.join(self.root, self.base_folder),
                                      extract_root=os.path.join(self.root, self.base_folder),

From a82d7b5a27adcb8623187b23e6f59413eeffea9e Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Fri, 23 Oct 2020 00:24:56 -0400
Subject: [PATCH 07/44] reformat code and remove print statements

---
 torchvision/datasets/widerface.py | 124 +++++++++++++++---------------
 1 file changed, 63 insertions(+), 61 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index d808865face..7f4fb7afd33 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -109,6 +109,52 @@ def __init__(
         else:
             raise ValueError("split \"{}\" is not recognized.".format(self.split))
 
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        """
+        Args:
+            index (int): Index
+
+        Returns:
+            tuple: (image, target) where target=None for the test split.
+        """
+
+        # stay consistent with other datasets and return a PIL Image
+        img = Image.open(self.imgs_path[index])
+
+        if self.transform is not None:
+            img = self.transform(img)
+
+        if self.split == "test":
+            return img, None
+
+        # prepare target in the train/val split
+        target: Any = []
+        for t in self.target_type:
+            if t == "raw":
+                target.append( self.raw_annotations[index] )
+            elif t == "bbox":
+                target.append( self.raw_annotations[index][:,:4] )
+            elif t == "attr":
+                target.append( self.raw_annotations[index][:,4:] )
+            elif t == "":
+                target = None
+                break
+            else:
+                raise ValueError("Target type \"{}\" is not recognized.".format(t))
+        if target:
+            target = tuple(target) if len(target) > 1 else target[0]
+            if self.target_transform is not None:
+                target = self.target_transform(target)
+
+        return img, target
+
+    def __len__(self) -> int:
+        return len(self.imgs_path)
+
+    def extra_repr(self) -> str:
+        lines = ["Target type: {target_type}", "Split: {split}"]
+        return '\n'.join(lines).format(**self.__dict__)
+
     def parse_train_val_annotations_file(self):
         filename = "wider_face_train_bbx_gt.txt" if self.split == "train" else "wider_face_val_bbx_gt.txt"
         filepath = os.path.join(self.root, self.base_folder, "wider_face_split", filename)
@@ -122,7 +168,6 @@ def parse_train_val_annotations_file(self):
         for line in lines:
             line = line.rstrip()
             if file_name_line:
-                # print(line)
                 abs_path = os.path.join(self.root, self.base_folder, "WIDER_"+self.split, "images", line)
                 self.imgs_path.append(abs_path)
                 file_name_line = False
@@ -143,7 +188,7 @@ def parse_train_val_annotations_file(self):
                     box_counter = 0
                     labels.clear()
             else:
-                raise RuntimeError("ERROR parsing annotations file {}".format(filepath))
+                raise RuntimeError("Error parsing annotation file {}".format(filepath))
         f.close()
     
     def parse_test_annotations_file(self):
@@ -156,64 +201,20 @@ def parse_test_annotations_file(self):
             self.imgs_path.append(abs_path)
         f.close()
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
-        """
-        Args:
-            index (int): Index
-
-        Returns:
-            tuple: (image, target) where target=None for the test split.
-        """
-
-        # stay consistent with other datasets and return a PIL Image
-        img = Image.open(self.imgs_path[index])
-
-        if self.transform is not None:
-            img = self.transform(img)
-
-        if self.split == "test":
-            return img, None
-
-        # prepare target in the train/val split
-        target: Any = []
-        for t in self.target_type:
-            if t == "raw":
-                target.append( self.raw_annotations[index] )
-            elif t == "bbox":
-                target.append( self.raw_annotations[index][:,:4] )
-            elif t == "attr":
-                target.append( self.raw_annotations[index][:,4:] )
-            elif t == "":
-                target = None
-                break
-            else:
-                raise ValueError("Target type \"{}\" is not recognized.".format(t))
-        if target:
-            target = tuple(target) if len(target) > 1 else target[0]
-            if self.target_transform is not None:
-                target = self.target_transform(target)
-
-        return img, target
-
-
-    def __len__(self) -> int:
-        return len(self.imgs_path)
-
-
     def _check_integrity(self) -> bool:
         all_files = self.file_list.copy()
         all_files.append(self.annotations_file)
-
         for (_, md5, filename) in all_files:
             fpath = os.path.join(self.root, self.base_folder, filename)
-            _, ext = os.path.splitext(filename)
-            # Allow original archive to be deleted (zip and 7z)
-            # Only need the extracted images
-            if ext not in [".zip", ".7z"] and not check_integrity(fpath, md5):
+            file, ext = os.path.splitext(filename)
+            # Allow original archive to be deleted (zip). Only need the extracted images
+            # Should check a hash of the images
+            extracted_dir = os.path.join(self.root, self.base_folder, file)
+            if ext != ".zip" and not check_integrity(fpath, md5):
                 return False
-
-        # Should check a hash of the images
-        return os.path.isdir(os.path.join(self.root, self.base_folder, "WIDER_train"))
+            if not os.path.isdir(extracted_dir):
+                return False
+        return True
 
     def download(self) -> None:
         import zipfile
@@ -226,10 +227,15 @@ def download(self) -> None:
         for (file_id, md5, filename) in self.file_list:
             download_file_from_google_drive(file_id, os.path.join(self.root, self.base_folder), filename, md5)
 
-        # extract data
+        # extract data if it doesn't exist
         for (file_id, md5, filename) in self.file_list:
-            with zipfile.ZipFile(os.path.join(self.root, self.base_folder, filename), "r") as f:
-                f.extractall(os.path.join(self.root, self.base_folder))
+            file, _ = os.path.splitext(filename)
+            extracted_dir = os.path.join(self.root, self.base_folder, file)
+            if not os.path.isdir(extracted_dir):
+                zip_file = os.path.join(self.root, self.base_folder, filename)
+                with zipfile.ZipFile(zip_file, "r") as f:
+                    new_extracted_dir = os.path.join(self.root, self.base_folder)
+                    f.extractall(new_extracted_dir)
 
         # download and extract annotation files
         download_and_extract_archive(url=self.annotations_file[0],
@@ -237,7 +243,3 @@ def download(self) -> None:
                                      extract_root=os.path.join(self.root, self.base_folder),
                                      filename=self.annotations_file[2],
                                      md5=self.annotations_file[1])
-
-    def extra_repr(self) -> str:
-        lines = ["Target type: {target_type}", "Split: {split}"]
-        return '\n'.join(lines).format(**self.__dict__)

From bba0db267cf6c6b64345e4615361d5e416e1b156 Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Fri, 23 Oct 2020 00:43:57 -0400
Subject: [PATCH 08/44] minor code cleanup and reformatting

---
 torchvision/datasets/widerface.py | 25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index 7f4fb7afd33..dbf0dd13844 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -1,16 +1,9 @@
-from .vision import VisionDataset
-import warnings
 from PIL import Image
 import os
-import os.path
-import numpy as np
 import torch
-import codecs
-import string
-import gzip
-import lzma
-from typing import Any, Callable, Dict, IO, List, Optional, Tuple, Union
+from typing import Any, Callable, List, Optional, Tuple, Union
 from .utils import download_file_from_google_drive, download_and_extract_archive, check_integrity
+from .vision import VisionDataset
 
 
 class WIDERFace(VisionDataset):
@@ -73,8 +66,6 @@ def __init__(
     ) -> None:
         super(WIDERFace, self).__init__(root, transform=transform,
                                         target_transform=target_transform)
-        print("root dir: " + self.root)
-        
         # check arguments
         if split not in ("train","val","test"):
             raise ValueError("split \"{}\" is not recognized.".format(split))
@@ -100,7 +91,7 @@ def __init__(
         if not self._check_integrity():
             raise RuntimeError("Dataset not found or corrupted. " +
                                "You can use download=True to download it")
-        
+
         # process dataset
         if self.split in ("train","val"):
             self.parse_train_val_annotations_file()
@@ -155,7 +146,7 @@ def extra_repr(self) -> str:
         lines = ["Target type: {target_type}", "Split: {split}"]
         return '\n'.join(lines).format(**self.__dict__)
 
-    def parse_train_val_annotations_file(self):
+    def parse_train_val_annotations_file(self) -> None:
         filename = "wider_face_train_bbx_gt.txt" if self.split == "train" else "wider_face_val_bbx_gt.txt"
         filepath = os.path.join(self.root, self.base_folder, "wider_face_split", filename)
 
@@ -190,8 +181,8 @@ def parse_train_val_annotations_file(self):
             else:
                 raise RuntimeError("Error parsing annotation file {}".format(filepath))
         f.close()
-    
-    def parse_test_annotations_file(self):
+
+    def parse_test_annotations_file(self) -> None:
         filepath = os.path.join(self.root, self.base_folder, "wider_face_split", "wider_face_test_filelist.txt")
         f = open(filepath, "r")
         lines = f.readlines()
@@ -225,7 +216,9 @@ def download(self) -> None:
 
         # download data
         for (file_id, md5, filename) in self.file_list:
-            download_file_from_google_drive(file_id, os.path.join(self.root, self.base_folder), filename, md5)
+            download_file_from_google_drive(file_id,
+                                            os.path.join(self.root, self.base_folder),
+                                            filename, md5)
 
         # extract data if it doesn't exist
         for (file_id, md5, filename) in self.file_list:

From 0c33f5fe5d89e56659ea5df3437d0e2907275d09 Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Fri, 23 Oct 2020 01:14:08 -0400
Subject: [PATCH 09/44] add more comments

---
 torchvision/datasets/widerface.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index dbf0dd13844..90d5036ede4 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -18,7 +18,7 @@ class WIDERFace(VisionDataset):
 
     Args:
         root (string): Root directory of the WIDERFace Dataset.
-            Expects the following structure:
+            Expects the following folder structure if download=False:
                 .
                 └── widerface
                     ├── wider_face_split.zip

From a7c0b30de24472b75d89ae82b94055108dffa1f2 Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Fri, 23 Oct 2020 01:39:44 -0400
Subject: [PATCH 10/44] reuse variable

---
 torchvision/datasets/widerface.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index 90d5036ede4..ccc8cd8bf2b 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -227,8 +227,8 @@ def download(self) -> None:
             if not os.path.isdir(extracted_dir):
                 zip_file = os.path.join(self.root, self.base_folder, filename)
                 with zipfile.ZipFile(zip_file, "r") as f:
-                    new_extracted_dir = os.path.join(self.root, self.base_folder)
-                    f.extractall(new_extracted_dir)
+                    extracted_dir = os.path.join(self.root, self.base_folder)
+                    f.extractall(extracted_dir)
 
         # download and extract annotation files
         download_and_extract_archive(url=self.annotations_file[0],

From 40cde34133a066598d2f5517da427720cd05060b Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Fri, 23 Oct 2020 01:46:06 -0400
Subject: [PATCH 11/44] reverse formatting changes

---
 torchvision/datasets/utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/torchvision/datasets/utils.py b/torchvision/datasets/utils.py
index 0f7cdbde95b..3dd57130e83 100644
--- a/torchvision/datasets/utils.py
+++ b/torchvision/datasets/utils.py
@@ -147,6 +147,7 @@ def download_file_from_google_drive(file_id: str, root: str, filename: Optional[
         print('Using downloaded and verified file: ' + fpath)
     else:
         session = requests.Session()
+
         response = session.get(url, params={'id': file_id}, stream=True)
         token = _get_confirm_token(response)
 
@@ -161,6 +162,7 @@ def download_file_from_google_drive(file_id: str, root: str, filename: Optional[
                 f"and can only be overcome by trying again later."
             )
             raise RuntimeError(msg)
+
         _save_response_content(response, fpath)
 
 

From 48a620f865a9fd14d1444316780ddba0225217f6 Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Mon, 26 Oct 2020 03:03:11 -0400
Subject: [PATCH 12/44] fix flake8 errors

---
 torchvision/datasets/widerface.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index ccc8cd8bf2b..6085d23c5b2 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -11,10 +11,10 @@ class WIDERFace(VisionDataset):
 
     Citation:
     @inproceedings{yang2016wider,
-	    Author = {Yang, Shuo and Luo, Ping and Loy, Chen Change and Tang, Xiaoou},
-	    Booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
-	    Title = {WIDER FACE: A Face Detection Benchmark},
-	    Year = {2016}}
+        Author = {Yang, Shuo and Luo, Ping and Loy, Chen Change and Tang, Xiaoou},
+        Booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+        Title = {WIDER FACE: A Face Detection Benchmark},
+        Year = {2016}}
 
     Args:
         root (string): Root directory of the WIDERFace Dataset.
@@ -67,7 +67,7 @@ def __init__(
         super(WIDERFace, self).__init__(root, transform=transform,
                                         target_transform=target_transform)
         # check arguments
-        if split not in ("train","val","test"):
+        if split not in ("train", "val", "test"):
             raise ValueError("split \"{}\" is not recognized.".format(split))
         self.split = split
 
@@ -75,8 +75,8 @@ def __init__(
             self.target_type = target_type
         else:
             self.target_type = [target_type]
-        
-        if not (all(x in ["raw","bbox","attr",""] for x in self.target_type)):
+
+        if not (all(x in ["raw", "bbox", "attr", ""] for x in self.target_type)):
             raise ValueError("target_type \"{}\" is not recognized.".format(self.target_type))
         if not self.target_type and self.target_transform is not None:
             raise RuntimeError('target_transform is specified but target_type is empty')
@@ -87,13 +87,13 @@ def __init__(
 
         if download:
             self.download()
-        
+
         if not self._check_integrity():
             raise RuntimeError("Dataset not found or corrupted. " +
                                "You can use download=True to download it")
 
         # process dataset
-        if self.split in ("train","val"):
+        if self.split in ("train", "val"):
             self.parse_train_val_annotations_file()
         elif self.split == "test":
             self.parse_test_annotations_file()
@@ -122,11 +122,11 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]:
         target: Any = []
         for t in self.target_type:
             if t == "raw":
-                target.append( self.raw_annotations[index] )
+                target.append(self.raw_annotations[index])
             elif t == "bbox":
-                target.append( self.raw_annotations[index][:,:4] )
+                target.append(self.raw_annotations[index][:, :4])
             elif t == "attr":
-                target.append( self.raw_annotations[index][:,4:] )
+                target.append(self.raw_annotations[index][:, 4:])
             elif t == "":
                 target = None
                 break
@@ -159,7 +159,7 @@ def parse_train_val_annotations_file(self) -> None:
         for line in lines:
             line = line.rstrip()
             if file_name_line:
-                abs_path = os.path.join(self.root, self.base_folder, "WIDER_"+self.split, "images", line)
+                abs_path = os.path.join(self.root, self.base_folder, "WIDER_" + self.split, "images", line)
                 self.imgs_path.append(abs_path)
                 file_name_line = False
                 num_boxes_line = True

From bc8c35b63156b0d17671bf456bab7593bd92e48b Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Mon, 26 Oct 2020 03:23:03 -0400
Subject: [PATCH 13/44] add type annotations

---
 torchvision/datasets/widerface.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index 6085d23c5b2..ac2fc3e7eeb 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -82,8 +82,8 @@ def __init__(
             raise RuntimeError('target_transform is specified but target_type is empty')
 
         # prepare dataset
-        self.imgs_path = []
-        self.raw_annotations = []
+        self.imgs_path: List[str] = []
+        self.raw_annotations: List[torch.Tensor] = []
 
         if download:
             self.download()

From e0b866482a0e9fd1aa8d0aef3fabbb476ad33faf Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbrad3@evoforge.org>
Date: Mon, 26 Oct 2020 18:48:40 +0000
Subject: [PATCH 14/44] fix mypy errors

---
 torchvision/datasets/widerface.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index ac2fc3e7eeb..10a93d5cdf9 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -169,9 +169,9 @@ def parse_train_val_annotations_file(self) -> None:
                 box_annotation_line = True
             elif box_annotation_line:
                 box_counter += 1
-                line = line.split(" ")
-                line = [int(x) for x in line]
-                labels.append(line)
+                line_split = line.split(" ")
+                line_values = [int(x) for x in line_split]
+                labels.append(line_values)
                 if box_counter >= num_boxes:
                     box_annotation_line = False
                     file_name_line = True

From 2e73130062558b3fbcabe1b1b62f71249b718bcf Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Mon, 26 Oct 2020 18:43:08 -0400
Subject: [PATCH 15/44] add a base_folder to root directory

---
 torchvision/datasets/widerface.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index 10a93d5cdf9..6d6eba8b9fa 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -64,7 +64,8 @@ def __init__(
             target_transform: Optional[Callable] = None,
             download: bool = False,
     ) -> None:
-        super(WIDERFace, self).__init__(root, transform=transform,
+        super(WIDERFace, self).__init__(root=os.path.join(root, self.base_folder),
+                                        transform=transform,
                                         target_transform=target_transform)
         # check arguments
         if split not in ("train", "val", "test"):
@@ -148,7 +149,7 @@ def extra_repr(self) -> str:
 
     def parse_train_val_annotations_file(self) -> None:
         filename = "wider_face_train_bbx_gt.txt" if self.split == "train" else "wider_face_val_bbx_gt.txt"
-        filepath = os.path.join(self.root, self.base_folder, "wider_face_split", filename)
+        filepath = os.path.join(self.root, "wider_face_split", filename)
 
         f = open(filepath, "r")
         lines = f.readlines()
@@ -159,7 +160,7 @@ def parse_train_val_annotations_file(self) -> None:
         for line in lines:
             line = line.rstrip()
             if file_name_line:
-                abs_path = os.path.join(self.root, self.base_folder, "WIDER_" + self.split, "images", line)
+                abs_path = os.path.join(self.root, "WIDER_" + self.split, "images", line)
                 self.imgs_path.append(abs_path)
                 file_name_line = False
                 num_boxes_line = True
@@ -183,12 +184,12 @@ def parse_train_val_annotations_file(self) -> None:
         f.close()
 
     def parse_test_annotations_file(self) -> None:
-        filepath = os.path.join(self.root, self.base_folder, "wider_face_split", "wider_face_test_filelist.txt")
+        filepath = os.path.join(self.root, "wider_face_split", "wider_face_test_filelist.txt")
         f = open(filepath, "r")
         lines = f.readlines()
         for line in lines:
             line = line.rstrip()
-            abs_path = os.path.join(self.root, self.base_folder, "WIDER_test", "images", line)
+            abs_path = os.path.join(self.root, "WIDER_test", "images", line)
             self.imgs_path.append(abs_path)
         f.close()
 
@@ -196,11 +197,11 @@ def _check_integrity(self) -> bool:
         all_files = self.file_list.copy()
         all_files.append(self.annotations_file)
         for (_, md5, filename) in all_files:
-            fpath = os.path.join(self.root, self.base_folder, filename)
+            fpath = os.path.join(self.root, filename)
             file, ext = os.path.splitext(filename)
             # Allow original archive to be deleted (zip). Only need the extracted images
             # Should check a hash of the images
-            extracted_dir = os.path.join(self.root, self.base_folder, file)
+            extracted_dir = os.path.join(self.root, file)
             if ext != ".zip" and not check_integrity(fpath, md5):
                 return False
             if not os.path.isdir(extracted_dir):
@@ -217,22 +218,21 @@ def download(self) -> None:
         # download data
         for (file_id, md5, filename) in self.file_list:
             download_file_from_google_drive(file_id,
-                                            os.path.join(self.root, self.base_folder),
+                                            self.root,
                                             filename, md5)
 
         # extract data if it doesn't exist
         for (file_id, md5, filename) in self.file_list:
             file, _ = os.path.splitext(filename)
-            extracted_dir = os.path.join(self.root, self.base_folder, file)
+            extracted_dir = os.path.join(self.root, file)
             if not os.path.isdir(extracted_dir):
-                zip_file = os.path.join(self.root, self.base_folder, filename)
+                zip_file = os.path.join(self.root, filename)
                 with zipfile.ZipFile(zip_file, "r") as f:
-                    extracted_dir = os.path.join(self.root, self.base_folder)
-                    f.extractall(extracted_dir)
+                    f.extractall(self.root)
 
         # download and extract annotation files
         download_and_extract_archive(url=self.annotations_file[0],
-                                     download_root=os.path.join(self.root, self.base_folder),
-                                     extract_root=os.path.join(self.root, self.base_folder),
+                                     download_root=self.root,
+                                     extract_root=self.root,
                                      filename=self.annotations_file[2],
                                      md5=self.annotations_file[1])

From 4de06aa0530c9453601993a3772380501e213bad Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Tue, 27 Oct 2020 13:16:01 -0400
Subject: [PATCH 16/44] some formatting fixes

---
 torchvision/datasets/widerface.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index 6d6eba8b9fa..c004fd6a4e5 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -11,13 +11,13 @@ class WIDERFace(VisionDataset):
 
     Citation:
     @inproceedings{yang2016wider,
-        Author = {Yang, Shuo and Luo, Ping and Loy, Chen Change and Tang, Xiaoou},
-        Booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
-        Title = {WIDER FACE: A Face Detection Benchmark},
-        Year = {2016}}
+        author    = "Yang, Shuo and Luo, Ping and Loy, Chen Change and Tang, Xiaoou",
+        booktitle = "IEEE Conference on Computer Vision and Pattern Recognition (CVPR)",
+        title     = "WIDER FACE: A Face Detection Benchmark",
+        year      = "2016"}
 
     Args:
-        root (string): Root directory of the WIDERFace Dataset.
+        root (string): Root directory where images and annotations are downloaded to.
             Expects the following folder structure if download=False:
                 .
                 └── widerface
@@ -25,10 +25,11 @@ class WIDERFace(VisionDataset):
                     ├── WIDER_test.zip
                     ├── WIDER_train.zip
                     └── WIDER_val.zip
-        split (string): One of {``train``, ``val``, ``test``}.
-            The dataset split to use. Defaults to ``train``.
-        target_type (string): The type of target to use, can be one of {``raw``, ``bbox``, ``attr``.``""``}
-            Can also be a list to output a tuple with all specified target types.
+        split (string): The dataset split to use. One of {``train``, ``val``, ``test``}.
+            Defaults to ``train``.
+        target_type (string): The type of target to use, can be one
+            of {``raw``, ``bbox``, ``attr``.``""``}. Can also be a list to
+            output a tuple with all specified target types.
             The targets represent:
                 ``raw`` (torch.tensor shape=(10,) dtype=int): all annotations combined (bbox + attr)
                 ``bbox`` (torch.tensor shape=(4,) dtype=int): bounding box (x, y, width, height)

From 70dc752c69ebd885ab6012b94bed92350eff214c Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Wed, 28 Oct 2020 22:01:28 -0400
Subject: [PATCH 17/44] GDrive threshold does not throw 403 error

---
 torchvision/datasets/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/datasets/utils.py b/torchvision/datasets/utils.py
index 3dd57130e83..e6385bc1cd6 100644
--- a/torchvision/datasets/utils.py
+++ b/torchvision/datasets/utils.py
@@ -120,7 +120,7 @@ def list_files(root: str, suffix: str, prefix: bool = False) -> List[str]:
 
 
 def _quota_exceeded(response: "requests.models.Response") -> bool:  # type: ignore[name-defined]
-    return (response.status_code == 403) and ("Google Drive - Quota exceeded" in response.text)
+    return "Google Drive - Quota exceeded" in response.text
 
 
 def download_file_from_google_drive(file_id: str, root: str, filename: Optional[str] = None, md5: Optional[str] = None):

From 4d2506f40508180616f45a7ef884db950823abc5 Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Thu, 29 Oct 2020 13:31:10 -0400
Subject: [PATCH 18/44] testing new download logic

---
 torchvision/datasets/widerface.py | 82 ++++++++++++++++++++-----------
 1 file changed, 53 insertions(+), 29 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index c004fd6a4e5..65fe5999aae 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -2,7 +2,7 @@
 import os
 import torch
 from typing import Any, Callable, List, Optional, Tuple, Union
-from .utils import download_file_from_google_drive, download_and_extract_archive, check_integrity
+from .utils import download_file_from_google_drive, download_and_extract_archive, check_integrity, download_url, extract_archive
 from .vision import VisionDataset
 
 
@@ -69,6 +69,7 @@ def __init__(
                                         transform=transform,
                                         target_transform=target_transform)
         # check arguments
+        print("ROOT: " + self.root)
         if split not in ("train", "val", "test"):
             raise ValueError("split \"{}\" is not recognized.".format(split))
         self.split = split
@@ -83,17 +84,19 @@ def __init__(
         if not self.target_type and self.target_transform is not None:
             raise RuntimeError('target_transform is specified but target_type is empty')
 
-        # prepare dataset
-        self.imgs_path: List[str] = []
-        self.raw_annotations: List[torch.Tensor] = []
-
         if download:
             self.download()
 
+        # self._extract_dataset()
+
         if not self._check_integrity():
             raise RuntimeError("Dataset not found or corrupted. " +
                                "You can use download=True to download it")
 
+        # prepare dataset
+        self.imgs_path: List[str] = []
+        self.raw_annotations: List[torch.Tensor] = []
+
         # process dataset
         if self.split in ("train", "val"):
             self.parse_train_val_annotations_file()
@@ -194,46 +197,67 @@ def parse_test_annotations_file(self) -> None:
             self.imgs_path.append(abs_path)
         f.close()
 
+    # def _extract_dataset(self) -> None:
+    #     print("EXTRACT_DATASET")
+    #     all_files = self.file_list.copy()
+    #     all_files.append(self.annotations_file)
+    #     for (_, md5, filename) in all_files:
+    #         fpath = os.path.join(self.root, filename)
+    #         file, ext = os.path.splitext(filename)
+    #         # Allow original archive to be deleted (zip). Only need the extracted images
+    #         extracted_dir = os.path.join(self.root, file)
+    #         print("extracted_dir: " + extracted_dir)
+    #         print("fpath: " + fpath)
+    #         print("root: " + self.root)
+    #         if not os.path.isdir(extracted_dir):
+    #             zip_file = os.path.join(self.root, filename)
+    #             extract_archive(zip_file)
+
     def _check_integrity(self) -> bool:
+        print("CHECK_INTEGRITY")
         all_files = self.file_list.copy()
         all_files.append(self.annotations_file)
         for (_, md5, filename) in all_files:
             fpath = os.path.join(self.root, filename)
             file, ext = os.path.splitext(filename)
+            if os.path.exists(fpath) and not check_integrity(fpath, md5):
+                return False
             # Allow original archive to be deleted (zip). Only need the extracted images
-            # Should check a hash of the images
+            # TODO problem case when !directory.exist and file.exists()
             extracted_dir = os.path.join(self.root, file)
-            if ext != ".zip" and not check_integrity(fpath, md5):
-                return False
-            if not os.path.isdir(extracted_dir):
-                return False
+            print("extracted_dir: " + extracted_dir)
+            print("fpath: " + fpath)
+            print("root: " + self.root)
+            if os.path.exists(fpath) and not os.path.isdir(extracted_dir):
+                print("extracting file {}".format(filename))
+                zip_file = os.path.join(self.root, filename)
+                extract_archive(zip_file)
         return True
 
     def download(self) -> None:
-        import zipfile
-
         if self._check_integrity():
             print('Files already downloaded and verified')
             return
 
-        # download data
-        for (file_id, md5, filename) in self.file_list:
-            download_file_from_google_drive(file_id,
-                                            self.root,
-                                            filename, md5)
-
-        # extract data if it doesn't exist
+        # download data if the extracted data doesn't exist
         for (file_id, md5, filename) in self.file_list:
             file, _ = os.path.splitext(filename)
             extracted_dir = os.path.join(self.root, file)
             if not os.path.isdir(extracted_dir):
-                zip_file = os.path.join(self.root, filename)
-                with zipfile.ZipFile(zip_file, "r") as f:
-                    f.extractall(self.root)
-
-        # download and extract annotation files
-        download_and_extract_archive(url=self.annotations_file[0],
-                                     download_root=self.root,
-                                     extract_root=self.root,
-                                     filename=self.annotations_file[2],
-                                     md5=self.annotations_file[1])
+                download_file_from_google_drive(file_id, self.root, filename, md5)
+        # download annotation files
+        extracted_dir, _ = os.path.splitext(self.annotations_file[2])
+        if not os.path.isdir(extracted_dir):
+            download_url(url=self.annotations_file[0], root=self.root, md5=self.annotations_file[1])
+
+        # # extract data if necessary
+        # all_files = self.file_list.copy()
+        # all_files.append(self.annotations_file)
+        # for (_, md5, filename) in all_files:
+        #     file, ext = os.path.splitext(filename)
+        #     # Allow original archive to be deleted (zip). Only need the extracted images
+        #     extracted_dir = os.path.join(self.root, file)
+        #     print("download - extracted_dir: " + extracted_dir)
+        #     if not os.path.isdir(extracted_dir):
+        #         zip_file = os.path.join(self.root, filename)
+        #         extract_archive(zip_file)

From 6f76fd73512674e5014407039b0f2a2ab85ca3d8 Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Thu, 29 Oct 2020 14:06:08 -0400
Subject: [PATCH 19/44] cleanup logic for download and integrity check

---
 torchvision/datasets/widerface.py | 59 +++++++------------------------
 1 file changed, 12 insertions(+), 47 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index 65fe5999aae..16b2de31192 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -2,7 +2,7 @@
 import os
 import torch
 from typing import Any, Callable, List, Optional, Tuple, Union
-from .utils import download_file_from_google_drive, download_and_extract_archive, check_integrity, download_url, extract_archive
+from .utils import download_file_from_google_drive, check_integrity, download_url, extract_archive
 from .vision import VisionDataset
 
 
@@ -69,7 +69,6 @@ def __init__(
                                         transform=transform,
                                         target_transform=target_transform)
         # check arguments
-        print("ROOT: " + self.root)
         if split not in ("train", "val", "test"):
             raise ValueError("split \"{}\" is not recognized.".format(split))
         self.split = split
@@ -87,8 +86,6 @@ def __init__(
         if download:
             self.download()
 
-        # self._extract_dataset()
-
         if not self._check_integrity():
             raise RuntimeError("Dataset not found or corrupted. " +
                                "You can use download=True to download it")
@@ -197,41 +194,19 @@ def parse_test_annotations_file(self) -> None:
             self.imgs_path.append(abs_path)
         f.close()
 
-    # def _extract_dataset(self) -> None:
-    #     print("EXTRACT_DATASET")
-    #     all_files = self.file_list.copy()
-    #     all_files.append(self.annotations_file)
-    #     for (_, md5, filename) in all_files:
-    #         fpath = os.path.join(self.root, filename)
-    #         file, ext = os.path.splitext(filename)
-    #         # Allow original archive to be deleted (zip). Only need the extracted images
-    #         extracted_dir = os.path.join(self.root, file)
-    #         print("extracted_dir: " + extracted_dir)
-    #         print("fpath: " + fpath)
-    #         print("root: " + self.root)
-    #         if not os.path.isdir(extracted_dir):
-    #             zip_file = os.path.join(self.root, filename)
-    #             extract_archive(zip_file)
-
     def _check_integrity(self) -> bool:
-        print("CHECK_INTEGRITY")
+        # Allow original archive to be deleted (zip). Only need the extracted images
         all_files = self.file_list.copy()
         all_files.append(self.annotations_file)
         for (_, md5, filename) in all_files:
-            fpath = os.path.join(self.root, filename)
             file, ext = os.path.splitext(filename)
-            if os.path.exists(fpath) and not check_integrity(fpath, md5):
-                return False
-            # Allow original archive to be deleted (zip). Only need the extracted images
-            # TODO problem case when !directory.exist and file.exists()
             extracted_dir = os.path.join(self.root, file)
-            print("extracted_dir: " + extracted_dir)
-            print("fpath: " + fpath)
-            print("root: " + self.root)
-            if os.path.exists(fpath) and not os.path.isdir(extracted_dir):
-                print("extracting file {}".format(filename))
-                zip_file = os.path.join(self.root, filename)
-                extract_archive(zip_file)
+            if os.path.exists(extracted_dir):
+                continue
+            filepath = os.path.join(self.root, filename)
+            if not check_integrity(filepath, md5):
+                return False
+            extract_archive(filepath)
         return True
 
     def download(self) -> None:
@@ -243,21 +218,11 @@ def download(self) -> None:
         for (file_id, md5, filename) in self.file_list:
             file, _ = os.path.splitext(filename)
             extracted_dir = os.path.join(self.root, file)
-            if not os.path.isdir(extracted_dir):
-                download_file_from_google_drive(file_id, self.root, filename, md5)
+            if os.path.isdir(extracted_dir):
+                continue
+            download_file_from_google_drive(file_id, self.root, filename, md5)
+
         # download annotation files
         extracted_dir, _ = os.path.splitext(self.annotations_file[2])
         if not os.path.isdir(extracted_dir):
             download_url(url=self.annotations_file[0], root=self.root, md5=self.annotations_file[1])
-
-        # # extract data if necessary
-        # all_files = self.file_list.copy()
-        # all_files.append(self.annotations_file)
-        # for (_, md5, filename) in all_files:
-        #     file, ext = os.path.splitext(filename)
-        #     # Allow original archive to be deleted (zip). Only need the extracted images
-        #     extracted_dir = os.path.join(self.root, file)
-        #     print("download - extracted_dir: " + extracted_dir)
-        #     if not os.path.isdir(extracted_dir):
-        #         zip_file = os.path.join(self.root, filename)
-        #         extract_archive(zip_file)

From 9c6d02cb56b0f58b8e1647f9e7ed8c50cc877093 Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Thu, 29 Oct 2020 16:57:14 -0400
Subject: [PATCH 20/44] use a better variable name

---
 torchvision/datasets/widerface.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index 16b2de31192..164664be4dd 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -161,8 +161,8 @@ def parse_train_val_annotations_file(self) -> None:
         for line in lines:
             line = line.rstrip()
             if file_name_line:
-                abs_path = os.path.join(self.root, "WIDER_" + self.split, "images", line)
-                self.imgs_path.append(abs_path)
+                img_path = os.path.join(self.root, "WIDER_" + self.split, "images", line)
+                self.imgs_path.append(img_path)
                 file_name_line = False
                 num_boxes_line = True
             elif num_boxes_line:
@@ -190,8 +190,8 @@ def parse_test_annotations_file(self) -> None:
         lines = f.readlines()
         for line in lines:
             line = line.rstrip()
-            abs_path = os.path.join(self.root, "WIDER_test", "images", line)
-            self.imgs_path.append(abs_path)
+            img_path = os.path.join(self.root, "WIDER_test", "images", line)
+            self.imgs_path.append(img_path)
         f.close()
 
     def _check_integrity(self) -> bool:

From 2f76d945106bd72982305f1d5e4e102e2ce737eb Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Thu, 29 Oct 2020 16:58:53 -0400
Subject: [PATCH 21/44] format fix

---
 torchvision/datasets/widerface.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index 164664be4dd..6a13368d6ba 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -31,7 +31,7 @@ class WIDERFace(VisionDataset):
             of {``raw``, ``bbox``, ``attr``.``""``}. Can also be a list to
             output a tuple with all specified target types.
             The targets represent:
-                ``raw`` (torch.tensor shape=(10,) dtype=int): all annotations combined (bbox + attr)
+                ``raw``  (torch.tensor shape=(10,) dtype=int): all annotations combined (bbox + attr)
                 ``bbox`` (torch.tensor shape=(4,) dtype=int): bounding box (x, y, width, height)
                 ``attr`` (torch.tensor shape=(6,) dtype=int): label values for attributes
                     that represent (blur, expression, illumination, occlusion, pose, invalid)

From a7f021cddc7575f7ea69c0ef5d3850edbcadd387 Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Sun, 1 Nov 2020 14:42:02 -0500
Subject: [PATCH 22/44] reorder list in docstring

---
 torchvision/datasets/widerface.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index 6a13368d6ba..64f847f0303 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -22,9 +22,9 @@ class WIDERFace(VisionDataset):
                 .
                 └── widerface
                     ├── wider_face_split.zip
-                    ├── WIDER_test.zip
                     ├── WIDER_train.zip
-                    └── WIDER_val.zip
+                    ├── WIDER_val.zip
+                    └── WIDER_test.zip
         split (string): The dataset split to use. One of {``train``, ``val``, ``test``}.
             Defaults to ``train``.
         target_type (string): The type of target to use, can be one

From 35b683417cdaeabe484a1fefcbf551dbd10e9571 Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Sun, 1 Nov 2020 14:42:50 -0500
Subject: [PATCH 23/44] initial widerface unit test - fails on MD5 check

---
 test/fakedata_generation.py | 80 +++++++++++++++++++++++++++++++++++++
 test/test_datasets.py       |  7 +++-
 2 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/test/fakedata_generation.py b/test/fakedata_generation.py
index e17a4309cdc..e5e245a5950 100644
--- a/test/fakedata_generation.py
+++ b/test/fakedata_generation.py
@@ -171,6 +171,86 @@ def _make_devkit_archive(root):
         yield root
 
 
+@contextlib.contextmanager
+def widerface_root():
+    """
+    <root>
+    └── widerface
+        ├── wider_face_split.zip ('wider_face_split' when uncompressed)
+        ├── WIDER_train.zip ('WIDER_train' when uncompressed)
+        ├── WIDER_val.zip ('WIDER_val' when uncompressed)
+        └── WIDER_test.zip ('WIDER_test' when uncompressed)
+
+    The dataset consist of
+      1 image for each dataset split (train, val, test)
+      annotation files for each split
+    """
+    import shutil
+
+    def _make_image(file):
+        PIL.Image.fromarray(np.zeros((32, 32, 3), dtype=np.uint8)).save(file)
+
+    def _make_train_archive(root):
+        with get_tmp_dir() as tmp:
+            extracted_dir = os.path.join(tmp, 'WIDER_train', 'images', '0--Parade')
+            os.makedirs(extracted_dir)
+            _make_image(os.path.join(extracted_dir, '0_Parade_marchingband_1_1.jpg'))
+            zipped_file = os.path.join(root, 'WIDER_train')
+            shutil.make_archive(zipped_file, 'zip', root_dir=tmp)
+
+    def _make_val_archive(root):
+        with get_tmp_dir() as tmp:
+            extracted_dir = os.path.join(tmp, 'WIDER_val', 'images', '0--Parade')
+            os.makedirs(extracted_dir)
+            _make_image(os.path.join(extracted_dir, '0_Parade_marchingband_1_2.jpg'))
+            zipped_file = os.path.join(root, 'WIDER_val')
+            shutil.make_archive(zipped_file, 'zip', root_dir=tmp)
+
+    def _make_test_archive(root):
+        with get_tmp_dir() as tmp:
+            extracted_dir = os.path.join(tmp, 'WIDER_test', 'images', '0--Parade')
+            os.makedirs(extracted_dir)
+            _make_image(os.path.join(extracted_dir, '0_Parade_marchingband_1_3.jpg'))
+            zipped_file = os.path.join(root, 'WIDER_test')
+            shutil.make_archive(zipped_file, 'zip', root_dir=tmp)
+
+    def _make_annotations_archive(root):
+        train_bbox_contents = '0--Parade/0_Parade_marchingband_1_1.jpg\n1\n449 330 122 149 0 0 0 0 0 0\n'
+        val_bbox_contents = '0--Parade/0_Parade_marchingband_1_2.jpg\n1\n501 160 285 443 0 0 0 0 0 0\n'
+        test_filelist_contents = '0--Parade/0_Parade_marchingband_1_3.jpg\n'
+
+        with get_tmp_dir() as tmp:
+            extracted_dir = os.path.join(tmp, 'wider_face_split')
+            os.makedirs(extracted_dir)
+
+            # bbox training file
+            bbox_file = os.path.join(extracted_dir, "wider_face_train_bbx_gt.txt")
+            with open(bbox_file, "w") as txt_file:
+                txt_file.write(train_bbox_contents)
+
+            # bbox validation file
+            bbox_file = os.path.join(extracted_dir, "wider_face_val_bbx_gt.txt")
+            with open(bbox_file, "w") as txt_file:
+                txt_file.write(val_bbox_contents)
+
+            # test filelist file
+            filelist_file = os.path.join(extracted_dir, "wider_face_test_filelist.txt")
+            with open(filelist_file, "w") as txt_file:
+                txt_file.write(test_filelist_contents)
+
+            # zip up all annotation files
+            zipped_file = os.path.join(root, 'wider_face_split')
+            shutil.make_archive(zipped_file, 'zip', root_dir=tmp)
+
+    with get_tmp_dir() as root:
+        _make_train_archive(root)
+        _make_val_archive(root)
+        _make_test_archive(root)
+        _make_annotations_archive(root)
+
+        yield root
+
+
 @contextlib.contextmanager
 def cityscapes_root():
 
diff --git a/test/test_datasets.py b/test/test_datasets.py
index af092e1845d..b2c0a95a3d5 100644
--- a/test/test_datasets.py
+++ b/test/test_datasets.py
@@ -9,7 +9,7 @@
 import torchvision
 from common_utils import get_tmp_dir
 from fakedata_generation import mnist_root, cifar_root, imagenet_root, \
-    cityscapes_root, svhn_root, voc_root, ucf101_root, places365_root
+    cityscapes_root, svhn_root, voc_root, ucf101_root, places365_root, widerface_root
 import xml.etree.ElementTree as ET
 from urllib.request import Request, urlopen
 import itertools
@@ -139,6 +139,11 @@ def test_imagenet(self, mock_verify):
             dataset = torchvision.datasets.ImageNet(root, split='val')
             self.generic_classification_dataset_test(dataset)
 
+    def test_widerface(self):
+        with widerface_root() as root:
+            dataset = torchvision.datasets.WIDERFace(root, split='train')
+            self.assertEqual(len(dataset), 1)
+
     @mock.patch('torchvision.datasets.cifar.check_integrity')
     @mock.patch('torchvision.datasets.cifar.CIFAR10._check_integrity')
     def test_cifar10(self, mock_ext_check, mock_int_check):

From f0f47c16f8ef6d6ded5379b89740fbf5911502bd Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Sun, 1 Nov 2020 15:51:46 -0500
Subject: [PATCH 24/44] use list of dictionaries to store dataset

---
 torchvision/datasets/widerface.py | 38 +++++++++++++++++--------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index 64f847f0303..dd8957eda28 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -1,7 +1,8 @@
 from PIL import Image
 import os
+from os.path import abspath, expanduser
 import torch
-from typing import Any, Callable, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 from .utils import download_file_from_google_drive, check_integrity, download_url, extract_archive
 from .vision import VisionDataset
 
@@ -73,6 +74,8 @@ def __init__(
             raise ValueError("split \"{}\" is not recognized.".format(split))
         self.split = split
 
+        if self.split == "test":
+            target_type = ""
         if isinstance(target_type, list):
             self.target_type = target_type
         else:
@@ -90,11 +93,8 @@ def __init__(
             raise RuntimeError("Dataset not found or corrupted. " +
                                "You can use download=True to download it")
 
-        # prepare dataset
-        self.imgs_path: List[str] = []
-        self.raw_annotations: List[torch.Tensor] = []
-
         # process dataset
+        self.img_info: List[Dict[str, object]] = []
         if self.split in ("train", "val"):
             self.parse_train_val_annotations_file()
         elif self.split == "test":
@@ -112,23 +112,22 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]:
         """
 
         # stay consistent with other datasets and return a PIL Image
-        img = Image.open(self.imgs_path[index])
+        img = Image.open(self.img_info[index]["img_path"])
 
         if self.transform is not None:
             img = self.transform(img)
 
-        if self.split == "test":
-            return img, None
-
-        # prepare target in the train/val split
+        # prepare target
         target: Any = []
         for t in self.target_type:
             if t == "raw":
-                target.append(self.raw_annotations[index])
+                target.append(self.img_info[index][t])
             elif t == "bbox":
-                target.append(self.raw_annotations[index][:, :4])
+                # bbox coordinates are the first 4 values in the raw annotation
+                target.append(self.img_info[index]["raw"][:, :4])
             elif t == "attr":
-                target.append(self.raw_annotations[index][:, 4:])
+                # attributes are defined after the bbox coordinates
+                target.append(self.img_info[index]["raw"][:, 4:])
             elif t == "":
                 target = None
                 break
@@ -142,7 +141,7 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]:
         return img, target
 
     def __len__(self) -> int:
-        return len(self.imgs_path)
+        return len(self.img_info)
 
     def extra_repr(self) -> str:
         lines = ["Target type: {target_type}", "Split: {split}"]
@@ -162,7 +161,7 @@ def parse_train_val_annotations_file(self) -> None:
             line = line.rstrip()
             if file_name_line:
                 img_path = os.path.join(self.root, "WIDER_" + self.split, "images", line)
-                self.imgs_path.append(img_path)
+                img_path = abspath(expanduser(img_path))
                 file_name_line = False
                 num_boxes_line = True
             elif num_boxes_line:
@@ -177,7 +176,10 @@ def parse_train_val_annotations_file(self) -> None:
                 if box_counter >= num_boxes:
                     box_annotation_line = False
                     file_name_line = True
-                    self.raw_annotations.append(torch.tensor(labels))
+                    self.img_info.append({
+                        "img_path": img_path,
+                        "raw": torch.tensor(labels),
+                    })
                     box_counter = 0
                     labels.clear()
             else:
@@ -186,12 +188,14 @@ def parse_train_val_annotations_file(self) -> None:
 
     def parse_test_annotations_file(self) -> None:
         filepath = os.path.join(self.root, "wider_face_split", "wider_face_test_filelist.txt")
+        filepath = abspath(expanduser(filepath))
         f = open(filepath, "r")
         lines = f.readlines()
         for line in lines:
             line = line.rstrip()
             img_path = os.path.join(self.root, "WIDER_test", "images", line)
-            self.imgs_path.append(img_path)
+            img_path = abspath(expanduser(img_path))
+            self.img_info.append({"img_path": img_path})
         f.close()
 
     def _check_integrity(self) -> bool:

From 463bde0fef86520c23304deffe49e5be54c10867 Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Sun, 1 Nov 2020 15:54:34 -0500
Subject: [PATCH 25/44] fix docstring formatting

---
 torchvision/datasets/widerface.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index dd8957eda28..dfd1d8e9f9c 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -20,12 +20,12 @@ class WIDERFace(VisionDataset):
     Args:
         root (string): Root directory where images and annotations are downloaded to.
             Expects the following folder structure if download=False:
-                .
-                └── widerface
-                    ├── wider_face_split.zip
-                    ├── WIDER_train.zip
-                    ├── WIDER_val.zip
-                    └── WIDER_test.zip
+                <root>
+                    └── widerface
+                        ├── wider_face_split.zip
+                        ├── WIDER_train.zip
+                        ├── WIDER_val.zip
+                        └── WIDER_test.zip
         split (string): The dataset split to use. One of {``train``, ``val``, ``test``}.
             Defaults to ``train``.
         target_type (string): The type of target to use, can be one

From 6ef5379f3282b7d84b904a75161e5646808403b8 Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Sun, 1 Nov 2020 15:57:40 -0500
Subject: [PATCH 26/44] remove unnecessary error checking

---
 torchvision/datasets/widerface.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index dfd1d8e9f9c..690f4fede73 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -97,10 +97,8 @@ def __init__(
         self.img_info: List[Dict[str, object]] = []
         if self.split in ("train", "val"):
             self.parse_train_val_annotations_file()
-        elif self.split == "test":
-            self.parse_test_annotations_file()
         else:
-            raise ValueError("split \"{}\" is not recognized.".format(self.split))
+            self.parse_test_annotations_file()
 
     def __getitem__(self, index: int) -> Tuple[Any, Any]:
         """

From e844078661f89332a80a7e4d2436317b5596cb66 Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Sun, 1 Nov 2020 17:51:17 -0500
Subject: [PATCH 27/44] fix type checker error

---
 torchvision/datasets/widerface.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index 690f4fede73..68309a4901e 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -94,7 +94,8 @@ def __init__(
                                "You can use download=True to download it")
 
         # process dataset
-        self.img_info: List[Dict[str, object]] = []
+        # dataset will be stored as a list of dict objects (img_info)
+        self.img_info: Any = []
         if self.split in ("train", "val"):
             self.parse_train_val_annotations_file()
         else:

From da96b84ad737a011e12a2d5162c5b9c53338efaa Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Mon, 9 Nov 2020 18:26:41 -0500
Subject: [PATCH 28/44] revert typo fix

---
 torchvision/datasets/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/datasets/utils.py b/torchvision/datasets/utils.py
index e6385bc1cd6..6b705f875ba 100644
--- a/torchvision/datasets/utils.py
+++ b/torchvision/datasets/utils.py
@@ -124,7 +124,7 @@ def _quota_exceeded(response: "requests.models.Response") -> bool:  # type: igno
 
 
 def download_file_from_google_drive(file_id: str, root: str, filename: Optional[str] = None, md5: Optional[str] = None):
-    """Download a Google Drive file and place it in root.
+    """Download a Google Drive file from  and place it in root.
 
     Args:
         file_id (str): id of file to be downloaded

From 9d3cac794344a38bcd681a27c3d8507e75b6d9da Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Mon, 9 Nov 2020 19:00:04 -0500
Subject: [PATCH 29/44] rename var constants, use file context manager, verify
 str args

---
 torchvision/datasets/widerface.py | 117 ++++++++++++++----------------
 1 file changed, 56 insertions(+), 61 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index 68309a4901e..d03cf2f98be 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -3,7 +3,8 @@
 from os.path import abspath, expanduser
 import torch
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
-from .utils import download_file_from_google_drive, check_integrity, download_url, extract_archive
+from .utils import download_file_from_google_drive, check_integrity, download_url, \
+    extract_archive, verify_str_arg
 from .vision import VisionDataset
 
 
@@ -46,14 +47,14 @@ class WIDERFace(VisionDataset):
             downloaded again.
     """
 
-    base_folder = "widerface"
-    file_list = [
+    BASE_FOLDER = "widerface"
+    FILE_LIST = [
         # File ID                        MD5 Hash                            Filename
         ("0B6eKvaijfFUDQUUwd21EckhUbWs", "3fedf70df600953d25982bcd13d91ba2", "WIDER_train.zip"),
         ("0B6eKvaijfFUDd3dIRmpvSk8tLUk", "dfa7d7e790efa35df3788964cf0bbaea", "WIDER_val.zip"),
         ("0B6eKvaijfFUDbW4tdGpaYjgzZkU", "e5d8f4248ed24c334bbd12f49c29dd40", "WIDER_test.zip")
     ]
-    annotations_file = ("http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/bbx_annotation/wider_face_split.zip",
+    ANNOTATIONS_FILE = ("http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/bbx_annotation/wider_face_split.zip",
                         "0e3767bcf0e326556d407bf5bff5d27c",
                         "wider_face_split.zip")
 
@@ -66,23 +67,21 @@ def __init__(
             target_transform: Optional[Callable] = None,
             download: bool = False,
     ) -> None:
-        super(WIDERFace, self).__init__(root=os.path.join(root, self.base_folder),
+        super(WIDERFace, self).__init__(root=os.path.join(root, self.BASE_FOLDER),
                                         transform=transform,
                                         target_transform=target_transform)
         # check arguments
-        if split not in ("train", "val", "test"):
-            raise ValueError("split \"{}\" is not recognized.".format(split))
-        self.split = split
-
+        self.split = verify_str_arg(split, "split", ("train", "val", "test"))
         if self.split == "test":
             target_type = ""
+
         if isinstance(target_type, list):
             self.target_type = target_type
         else:
             self.target_type = [target_type]
+        self.target_type = [verify_str_arg(t, "target_type", ("raw", "bbox", "attr", ""))
+                            for t in target_type]
 
-        if not (all(x in ["raw", "bbox", "attr", ""] for x in self.target_type)):
-            raise ValueError("target_type \"{}\" is not recognized.".format(self.target_type))
         if not self.target_type and self.target_transform is not None:
             raise RuntimeError('target_transform is specified but target_type is empty')
 
@@ -127,11 +126,9 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]:
             elif t == "attr":
                 # attributes are defined after the bbox coordinates
                 target.append(self.img_info[index]["raw"][:, 4:])
-            elif t == "":
+            else: # target_type == "":
                 target = None
                 break
-            else:
-                raise ValueError("Target type \"{}\" is not recognized.".format(t))
         if target:
             target = tuple(target) if len(target) > 1 else target[0]
             if self.target_transform is not None:
@@ -150,57 +147,55 @@ def parse_train_val_annotations_file(self) -> None:
         filename = "wider_face_train_bbx_gt.txt" if self.split == "train" else "wider_face_val_bbx_gt.txt"
         filepath = os.path.join(self.root, "wider_face_split", filename)
 
-        f = open(filepath, "r")
-        lines = f.readlines()
-
-        file_name_line, num_boxes_line, box_annotation_line = True, False, False
-        num_boxes, box_counter = 0, 0
-        labels = []
-        for line in lines:
-            line = line.rstrip()
-            if file_name_line:
-                img_path = os.path.join(self.root, "WIDER_" + self.split, "images", line)
-                img_path = abspath(expanduser(img_path))
-                file_name_line = False
-                num_boxes_line = True
-            elif num_boxes_line:
-                num_boxes = int(line)
-                num_boxes_line = False
-                box_annotation_line = True
-            elif box_annotation_line:
-                box_counter += 1
-                line_split = line.split(" ")
-                line_values = [int(x) for x in line_split]
-                labels.append(line_values)
-                if box_counter >= num_boxes:
-                    box_annotation_line = False
-                    file_name_line = True
-                    self.img_info.append({
-                        "img_path": img_path,
-                        "raw": torch.tensor(labels),
-                    })
-                    box_counter = 0
-                    labels.clear()
-            else:
-                raise RuntimeError("Error parsing annotation file {}".format(filepath))
-        f.close()
+        with open(filepath, "r") as f:
+            lines = f.readlines()
+
+            file_name_line, num_boxes_line, box_annotation_line = True, False, False
+            num_boxes, box_counter = 0, 0
+            labels = []
+            for line in lines:
+                line = line.rstrip()
+                if file_name_line:
+                    img_path = os.path.join(self.root, "WIDER_" + self.split, "images", line)
+                    img_path = abspath(expanduser(img_path))
+                    file_name_line = False
+                    num_boxes_line = True
+                elif num_boxes_line:
+                    num_boxes = int(line)
+                    num_boxes_line = False
+                    box_annotation_line = True
+                elif box_annotation_line:
+                    box_counter += 1
+                    line_split = line.split(" ")
+                    line_values = [int(x) for x in line_split]
+                    labels.append(line_values)
+                    if box_counter >= num_boxes:
+                        box_annotation_line = False
+                        file_name_line = True
+                        self.img_info.append({
+                            "img_path": img_path,
+                            "raw": torch.tensor(labels),
+                        })
+                        box_counter = 0
+                        labels.clear()
+                else:
+                    raise RuntimeError("Error parsing annotation file {}".format(filepath))
 
     def parse_test_annotations_file(self) -> None:
         filepath = os.path.join(self.root, "wider_face_split", "wider_face_test_filelist.txt")
         filepath = abspath(expanduser(filepath))
-        f = open(filepath, "r")
-        lines = f.readlines()
-        for line in lines:
-            line = line.rstrip()
-            img_path = os.path.join(self.root, "WIDER_test", "images", line)
-            img_path = abspath(expanduser(img_path))
-            self.img_info.append({"img_path": img_path})
-        f.close()
+        with open(filepath, "r") as f:
+            lines = f.readlines()
+            for line in lines:
+                line = line.rstrip()
+                img_path = os.path.join(self.root, "WIDER_test", "images", line)
+                img_path = abspath(expanduser(img_path))
+                self.img_info.append({"img_path": img_path})
 
     def _check_integrity(self) -> bool:
         # Allow original archive to be deleted (zip). Only need the extracted images
-        all_files = self.file_list.copy()
-        all_files.append(self.annotations_file)
+        all_files = self.FILE_LIST.copy()
+        all_files.append(self.ANNOTATIONS_FILE)
         for (_, md5, filename) in all_files:
             file, ext = os.path.splitext(filename)
             extracted_dir = os.path.join(self.root, file)
@@ -218,7 +213,7 @@ def download(self) -> None:
             return
 
         # download data if the extracted data doesn't exist
-        for (file_id, md5, filename) in self.file_list:
+        for (file_id, md5, filename) in self.FILE_LIST:
             file, _ = os.path.splitext(filename)
             extracted_dir = os.path.join(self.root, file)
             if os.path.isdir(extracted_dir):
@@ -226,6 +221,6 @@ def download(self) -> None:
             download_file_from_google_drive(file_id, self.root, filename, md5)
 
         # download annotation files
-        extracted_dir, _ = os.path.splitext(self.annotations_file[2])
+        extracted_dir, _ = os.path.splitext(self.ANNOTATIONS_FILE[2])
         if not os.path.isdir(extracted_dir):
-            download_url(url=self.annotations_file[0], root=self.root, md5=self.annotations_file[1])
+            download_url(url=self.ANNOTATIONS_FILE[0], root=self.root, md5=self.ANNOTATIONS_FILE[1])

From fb846a2078d60fcc5b4ce14ccea8e2bc3b47bffb Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Mon, 9 Nov 2020 19:06:09 -0500
Subject: [PATCH 30/44] fix flake8 error

---
 torchvision/datasets/widerface.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index d03cf2f98be..0ba96baad66 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -126,7 +126,7 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]:
             elif t == "attr":
                 # attributes are defined after the bbox coordinates
                 target.append(self.img_info[index]["raw"][:, 4:])
-            else: # target_type == "":
+            else:  # target_type == "":
                 target = None
                 break
         if target:

From c11858fefa3ebb619508aba3ad72a4aafe06ce0f Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Mon, 9 Nov 2020 19:22:40 -0500
Subject: [PATCH 31/44] fix checking target_type argument values

---
 torchvision/datasets/widerface.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index 0ba96baad66..59c2f3e748c 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -80,7 +80,7 @@ def __init__(
         else:
             self.target_type = [target_type]
         self.target_type = [verify_str_arg(t, "target_type", ("raw", "bbox", "attr", ""))
-                            for t in target_type]
+                            for t in self.target_type]
 
         if not self.target_type and self.target_transform is not None:
             raise RuntimeError('target_transform is specified but target_type is empty')

From 2e456805dc1117aec1653ab44624a209cee08a90 Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Mon, 9 Nov 2020 20:08:08 -0500
Subject: [PATCH 32/44] create uncompressed dataset folders

---
 test/fakedata_generation.py | 59 +++++++++++++++----------------------
 1 file changed, 24 insertions(+), 35 deletions(-)

diff --git a/test/fakedata_generation.py b/test/fakedata_generation.py
index e5e245a5950..b81a3c17bfd 100644
--- a/test/fakedata_generation.py
+++ b/test/fakedata_generation.py
@@ -174,45 +174,36 @@ def _make_devkit_archive(root):
 @contextlib.contextmanager
 def widerface_root():
     """
+    Generates a dataset with the following folder structure and returns the path root:
     <root>
-    └── widerface
-        ├── wider_face_split.zip ('wider_face_split' when uncompressed)
-        ├── WIDER_train.zip ('WIDER_train' when uncompressed)
-        ├── WIDER_val.zip ('WIDER_val' when uncompressed)
-        └── WIDER_test.zip ('WIDER_test' when uncompressed)
+        └── widerface
+            ├── wider_face_split
+            ├── WIDER_train
+            ├── WIDER_val
+            └── WIDER_test
 
     The dataset consist of
-      1 image for each dataset split (train, val, test)
-      annotation files for each split
+      1 image for each dataset split (train, val, test) and annotation files
+      for each split
     """
-    import shutil
 
     def _make_image(file):
         PIL.Image.fromarray(np.zeros((32, 32, 3), dtype=np.uint8)).save(file)
 
     def _make_train_archive(root):
-        with get_tmp_dir() as tmp:
-            extracted_dir = os.path.join(tmp, 'WIDER_train', 'images', '0--Parade')
-            os.makedirs(extracted_dir)
-            _make_image(os.path.join(extracted_dir, '0_Parade_marchingband_1_1.jpg'))
-            zipped_file = os.path.join(root, 'WIDER_train')
-            shutil.make_archive(zipped_file, 'zip', root_dir=tmp)
+        extracted_dir = os.path.join(root, 'WIDER_train', 'images', '0--Parade')
+        os.makedirs(extracted_dir)
+        _make_image(os.path.join(extracted_dir, '0_Parade_marchingband_1_1.jpg'))
 
     def _make_val_archive(root):
-        with get_tmp_dir() as tmp:
-            extracted_dir = os.path.join(tmp, 'WIDER_val', 'images', '0--Parade')
-            os.makedirs(extracted_dir)
-            _make_image(os.path.join(extracted_dir, '0_Parade_marchingband_1_2.jpg'))
-            zipped_file = os.path.join(root, 'WIDER_val')
-            shutil.make_archive(zipped_file, 'zip', root_dir=tmp)
+        extracted_dir = os.path.join(root, 'WIDER_val', 'images', '0--Parade')
+        os.makedirs(extracted_dir)
+        _make_image(os.path.join(extracted_dir, '0_Parade_marchingband_1_2.jpg'))
 
     def _make_test_archive(root):
-        with get_tmp_dir() as tmp:
-            extracted_dir = os.path.join(tmp, 'WIDER_test', 'images', '0--Parade')
-            os.makedirs(extracted_dir)
-            _make_image(os.path.join(extracted_dir, '0_Parade_marchingband_1_3.jpg'))
-            zipped_file = os.path.join(root, 'WIDER_test')
-            shutil.make_archive(zipped_file, 'zip', root_dir=tmp)
+        extracted_dir = os.path.join(root, 'WIDER_test', 'images', '0--Parade')
+        os.makedirs(extracted_dir)
+        _make_image(os.path.join(extracted_dir, '0_Parade_marchingband_1_3.jpg'))
 
     def _make_annotations_archive(root):
         train_bbox_contents = '0--Parade/0_Parade_marchingband_1_1.jpg\n1\n449 330 122 149 0 0 0 0 0 0\n'
@@ -220,7 +211,7 @@ def _make_annotations_archive(root):
         test_filelist_contents = '0--Parade/0_Parade_marchingband_1_3.jpg\n'
 
         with get_tmp_dir() as tmp:
-            extracted_dir = os.path.join(tmp, 'wider_face_split')
+            extracted_dir = os.path.join(root, 'wider_face_split')
             os.makedirs(extracted_dir)
 
             # bbox training file
@@ -238,15 +229,13 @@ def _make_annotations_archive(root):
             with open(filelist_file, "w") as txt_file:
                 txt_file.write(test_filelist_contents)
 
-            # zip up all annotation files
-            zipped_file = os.path.join(root, 'wider_face_split')
-            shutil.make_archive(zipped_file, 'zip', root_dir=tmp)
-
     with get_tmp_dir() as root:
-        _make_train_archive(root)
-        _make_val_archive(root)
-        _make_test_archive(root)
-        _make_annotations_archive(root)
+        root_base = os.path.join(root, "widerface")
+        os.mkdir(root_base)
+        _make_train_archive(root_base)
+        _make_val_archive(root_base)
+        _make_test_archive(root_base)
+        _make_annotations_archive(root_base)
 
         yield root
 

From c8f3f37755c6866fc57ec2540b7f2d5ec6b410ea Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Mon, 9 Nov 2020 20:44:06 -0500
Subject: [PATCH 33/44] cleanup unit tests for widerface

---
 test/test_datasets.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/test/test_datasets.py b/test/test_datasets.py
index b2c0a95a3d5..3996f9dd871 100644
--- a/test/test_datasets.py
+++ b/test/test_datasets.py
@@ -139,10 +139,24 @@ def test_imagenet(self, mock_verify):
             dataset = torchvision.datasets.ImageNet(root, split='val')
             self.generic_classification_dataset_test(dataset)
 
-    def test_widerface(self):
+    @mock.patch('torchvision.datasets.WIDERFace._check_integrity')
+    def test_widerface(self, mock_check_integrity):
+        mock_check_integrity.return_value = True
         with widerface_root() as root:
             dataset = torchvision.datasets.WIDERFace(root, split='train')
             self.assertEqual(len(dataset), 1)
+            img, target = dataset[0]
+            self.assertTrue(isinstance(img, PIL.Image.Image))
+
+            dataset = torchvision.datasets.WIDERFace(root, split='val')
+            self.assertEqual(len(dataset), 1)
+            img, target = dataset[0]
+            self.assertTrue(isinstance(img, PIL.Image.Image))
+
+            dataset = torchvision.datasets.WIDERFace(root, split='test')
+            self.assertEqual(len(dataset), 1)
+            img, target = dataset[0]
+            self.assertTrue(isinstance(img, PIL.Image.Image))
 
     @mock.patch('torchvision.datasets.cifar.check_integrity')
     @mock.patch('torchvision.datasets.cifar.CIFAR10._check_integrity')

From ea09dabbb9f508a295a00a1ca49b4c6fee6e89bc Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Mon, 9 Nov 2020 20:49:01 -0500
Subject: [PATCH 34/44] use correct os function

---
 test/fakedata_generation.py | 36 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/test/fakedata_generation.py b/test/fakedata_generation.py
index b81a3c17bfd..7eec95301a0 100644
--- a/test/fakedata_generation.py
+++ b/test/fakedata_generation.py
@@ -209,25 +209,23 @@ def _make_annotations_archive(root):
         train_bbox_contents = '0--Parade/0_Parade_marchingband_1_1.jpg\n1\n449 330 122 149 0 0 0 0 0 0\n'
         val_bbox_contents = '0--Parade/0_Parade_marchingband_1_2.jpg\n1\n501 160 285 443 0 0 0 0 0 0\n'
         test_filelist_contents = '0--Parade/0_Parade_marchingband_1_3.jpg\n'
-
-        with get_tmp_dir() as tmp:
-            extracted_dir = os.path.join(root, 'wider_face_split')
-            os.makedirs(extracted_dir)
-
-            # bbox training file
-            bbox_file = os.path.join(extracted_dir, "wider_face_train_bbx_gt.txt")
-            with open(bbox_file, "w") as txt_file:
-                txt_file.write(train_bbox_contents)
-
-            # bbox validation file
-            bbox_file = os.path.join(extracted_dir, "wider_face_val_bbx_gt.txt")
-            with open(bbox_file, "w") as txt_file:
-                txt_file.write(val_bbox_contents)
-
-            # test filelist file
-            filelist_file = os.path.join(extracted_dir, "wider_face_test_filelist.txt")
-            with open(filelist_file, "w") as txt_file:
-                txt_file.write(test_filelist_contents)
+        extracted_dir = os.path.join(root, 'wider_face_split')
+        os.mkdir(extracted_dir)
+
+        # bbox training file
+        bbox_file = os.path.join(extracted_dir, "wider_face_train_bbx_gt.txt")
+        with open(bbox_file, "w") as txt_file:
+            txt_file.write(train_bbox_contents)
+
+        # bbox validation file
+        bbox_file = os.path.join(extracted_dir, "wider_face_val_bbx_gt.txt")
+        with open(bbox_file, "w") as txt_file:
+            txt_file.write(val_bbox_contents)
+
+        # test filelist file
+        filelist_file = os.path.join(extracted_dir, "wider_face_test_filelist.txt")
+        with open(filelist_file, "w") as txt_file:
+            txt_file.write(test_filelist_contents)
 
     with get_tmp_dir() as root:
         root_base = os.path.join(root, "widerface")

From 1f0223c2fe4a726c40f40c98aa9de6713c08c05f Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Mon, 9 Nov 2020 20:55:17 -0500
Subject: [PATCH 35/44] add more info to docstring

---
 torchvision/datasets/widerface.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index 59c2f3e748c..e35ea2bb696 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -23,10 +23,10 @@ class WIDERFace(VisionDataset):
             Expects the following folder structure if download=False:
                 <root>
                     └── widerface
-                        ├── wider_face_split.zip
-                        ├── WIDER_train.zip
-                        ├── WIDER_val.zip
-                        └── WIDER_test.zip
+                        ├── wider_face_split.zip ('wider_face_split' if uncompressed)
+                        ├── WIDER_train.zip ('WIDER_train' if uncompressed)
+                        ├── WIDER_val.zip ('WIDER_val' if uncompressed)
+                        └── WIDER_test.zip ('WIDER_test' if uncompressed)
         split (string): The dataset split to use. One of {``train``, ``val``, ``test``}.
             Defaults to ``train``.
         target_type (string): The type of target to use, can be one

From 2813d4ea383a5095dc872e4a266c1436ea58578a Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Wed, 11 Nov 2020 14:39:17 -0500
Subject: [PATCH 36/44] disable unittests for windows

---
 test/test_datasets.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/test_datasets.py b/test/test_datasets.py
index 3996f9dd871..7796496cf1e 100644
--- a/test/test_datasets.py
+++ b/test/test_datasets.py
@@ -140,6 +140,7 @@ def test_imagenet(self, mock_verify):
             self.generic_classification_dataset_test(dataset)
 
     @mock.patch('torchvision.datasets.WIDERFace._check_integrity')
+    @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows')
     def test_widerface(self, mock_check_integrity):
         mock_check_integrity.return_value = True
         with widerface_root() as root:

From f5981ede1826c4a1846c2b751003a4c62dbd7ccc Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Wed, 11 Nov 2020 18:23:06 -0500
Subject: [PATCH 37/44] fix _check_integrity logic

---
 torchvision/datasets/widerface.py | 28 +++++++++++-----------------
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index e35ea2bb696..115604c6bb5 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -3,8 +3,8 @@
 from os.path import abspath, expanduser
 import torch
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
-from .utils import download_file_from_google_drive, check_integrity, download_url, \
-    extract_archive, verify_str_arg
+from .utils import check_integrity, download_file_from_google_drive, \
+    download_and_extract_archive, extract_archive, verify_str_arg
 from .vision import VisionDataset
 
 
@@ -90,7 +90,7 @@ def __init__(
 
         if not self._check_integrity():
             raise RuntimeError("Dataset not found or corrupted. " +
-                               "You can use download=True to download it")
+                               "You can use download=True to download and prepare it")
 
         # process dataset
         # dataset will be stored as a list of dict objects (img_info)
@@ -199,12 +199,8 @@ def _check_integrity(self) -> bool:
         for (_, md5, filename) in all_files:
             file, ext = os.path.splitext(filename)
             extracted_dir = os.path.join(self.root, file)
-            if os.path.exists(extracted_dir):
-                continue
-            filepath = os.path.join(self.root, filename)
-            if not check_integrity(filepath, md5):
+            if not os.path.exists(extracted_dir):
                 return False
-            extract_archive(filepath)
         return True
 
     def download(self) -> None:
@@ -212,15 +208,13 @@ def download(self) -> None:
             print('Files already downloaded and verified')
             return
 
-        # download data if the extracted data doesn't exist
+        # download and extract image data
         for (file_id, md5, filename) in self.FILE_LIST:
-            file, _ = os.path.splitext(filename)
-            extracted_dir = os.path.join(self.root, file)
-            if os.path.isdir(extracted_dir):
-                continue
             download_file_from_google_drive(file_id, self.root, filename, md5)
+            filepath = os.path.join(self.root, filename)
+            extract_archive(filepath)
 
-        # download annotation files
-        extracted_dir, _ = os.path.splitext(self.ANNOTATIONS_FILE[2])
-        if not os.path.isdir(extracted_dir):
-            download_url(url=self.ANNOTATIONS_FILE[0], root=self.root, md5=self.ANNOTATIONS_FILE[1])
+        # download and extract annotation files
+        download_and_extract_archive(url=self.ANNOTATIONS_FILE[0],
+                                     download_root=self.root,
+                                     md5=self.ANNOTATIONS_FILE[1])

From a4d305171ad7b540b538d13d797c68c6aeee082f Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Wed, 11 Nov 2020 22:01:36 -0500
Subject: [PATCH 38/44] update docstring

---
 torchvision/datasets/widerface.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index 115604c6bb5..90e0e265121 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -23,10 +23,10 @@ class WIDERFace(VisionDataset):
             Expects the following folder structure if download=False:
                 <root>
                     └── widerface
-                        ├── wider_face_split.zip ('wider_face_split' if uncompressed)
-                        ├── WIDER_train.zip ('WIDER_train' if uncompressed)
-                        ├── WIDER_val.zip ('WIDER_val' if uncompressed)
-                        └── WIDER_test.zip ('WIDER_test' if uncompressed)
+                        ├── wider_face_split ('wider_face_split.zip' if compressed)
+                        ├── WIDER_train ('WIDER_train.zip' if compressed)
+                        ├── WIDER_val ('WIDER_val.zip' if compressed)
+                        └── WIDER_test ('WIDER_test.zip' if compressed)
         split (string): The dataset split to use. One of {``train``, ``val``, ``test``}.
             Defaults to ``train``.
         target_type (string): The type of target to use, can be one

From 2dcd8c85f8beece53a450233b3bae0557c9225a1 Mon Sep 17 00:00:00 2001
From: Joshua Bradley <jgbradley1@gmail.com>
Date: Sat, 2 Jan 2021 23:53:00 -0500
Subject: [PATCH 39/44] remove citation

---
 torchvision/datasets/widerface.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index 90e0e265121..fc425830f9c 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -11,13 +11,6 @@
 class WIDERFace(VisionDataset):
     """`WIDERFace <http://shuoyang1213.me/WIDERFACE/>`_ Dataset.
 
-    Citation:
-    @inproceedings{yang2016wider,
-        author    = "Yang, Shuo and Luo, Ping and Loy, Chen Change and Tang, Xiaoou",
-        booktitle = "IEEE Conference on Computer Vision and Pattern Recognition (CVPR)",
-        title     = "WIDER FACE: A Face Detection Benchmark",
-        year      = "2016"}
-
     Args:
         root (string): Root directory where images and annotations are downloaded to.
             Expects the following folder structure if download=False:

From 95d6708aec971b84ff029363c0dbafa39f4f7413 Mon Sep 17 00:00:00 2001
From: Josh <jgbradley1@gmail.com>
Date: Sun, 3 Jan 2021 23:08:47 -0500
Subject: [PATCH 40/44] remove target_type option

---
 torchvision/datasets/widerface.py | 56 +++++++------------------------
 1 file changed, 13 insertions(+), 43 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index fc425830f9c..a11c832fd54 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -2,7 +2,7 @@
 import os
 from os.path import abspath, expanduser
 import torch
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Optional, Tuple
 from .utils import check_integrity, download_file_from_google_drive, \
     download_and_extract_archive, extract_archive, verify_str_arg
 from .vision import VisionDataset
@@ -22,15 +22,6 @@ class WIDERFace(VisionDataset):
                         └── WIDER_test ('WIDER_test.zip' if compressed)
         split (string): The dataset split to use. One of {``train``, ``val``, ``test``}.
             Defaults to ``train``.
-        target_type (string): The type of target to use, can be one
-            of {``raw``, ``bbox``, ``attr``.``""``}. Can also be a list to
-            output a tuple with all specified target types.
-            The targets represent:
-                ``raw``  (torch.tensor shape=(10,) dtype=int): all annotations combined (bbox + attr)
-                ``bbox`` (torch.tensor shape=(4,) dtype=int): bounding box (x, y, width, height)
-                ``attr`` (torch.tensor shape=(6,) dtype=int): label values for attributes
-                    that represent (blur, expression, illumination, occlusion, pose, invalid)
-            Defaults to ``raw``. If empty, ``None`` will be returned as target.
         transform (callable, optional): A function/transform that  takes in a PIL image
             and returns a transformed version. E.g, ``transforms.RandomCrop``
         target_transform (callable, optional): A function/transform that takes in the
@@ -55,7 +46,6 @@ def __init__(
             self,
             root: str,
             split: str = "train",
-            target_type: Union[List[str], str] = "raw",
             transform: Optional[Callable] = None,
             target_transform: Optional[Callable] = None,
             download: bool = False,
@@ -65,18 +55,6 @@ def __init__(
                                         target_transform=target_transform)
         # check arguments
         self.split = verify_str_arg(split, "split", ("train", "val", "test"))
-        if self.split == "test":
-            target_type = ""
-
-        if isinstance(target_type, list):
-            self.target_type = target_type
-        else:
-            self.target_type = [target_type]
-        self.target_type = [verify_str_arg(t, "target_type", ("raw", "bbox", "attr", ""))
-                            for t in self.target_type]
-
-        if not self.target_type and self.target_transform is not None:
-            raise RuntimeError('target_transform is specified but target_type is empty')
 
         if download:
             self.download()
@@ -108,24 +86,9 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]:
         if self.transform is not None:
             img = self.transform(img)
 
-        # prepare target
-        target: Any = []
-        for t in self.target_type:
-            if t == "raw":
-                target.append(self.img_info[index][t])
-            elif t == "bbox":
-                # bbox coordinates are the first 4 values in the raw annotation
-                target.append(self.img_info[index]["raw"][:, :4])
-            elif t == "attr":
-                # attributes are defined after the bbox coordinates
-                target.append(self.img_info[index]["raw"][:, 4:])
-            else:  # target_type == "":
-                target = None
-                break
-        if target:
-            target = tuple(target) if len(target) > 1 else target[0]
-            if self.target_transform is not None:
-                target = self.target_transform(target)
+        target = None if self.split == "test" else self.img_info[index]["annotations"]
+        if self.target_transform is not None:
+            target = self.target_transform(target)
 
         return img, target
 
@@ -133,7 +96,7 @@ def __len__(self) -> int:
         return len(self.img_info)
 
     def extra_repr(self) -> str:
-        lines = ["Target type: {target_type}", "Split: {split}"]
+        lines = ["Split: {split}"]
         return '\n'.join(lines).format(**self.__dict__)
 
     def parse_train_val_annotations_file(self) -> None:
@@ -165,9 +128,16 @@ def parse_train_val_annotations_file(self) -> None:
                     if box_counter >= num_boxes:
                         box_annotation_line = False
                         file_name_line = True
+                        labels_tensor = torch.tensor(labels)
                         self.img_info.append({
                             "img_path": img_path,
-                            "raw": torch.tensor(labels),
+                            "annotations": {"bbox": labels_tensor[:, 0:4],
+                                            "blur": labels_tensor[:, 4],
+                                            "expression": labels_tensor[:, 5],
+                                            "illumination": labels_tensor[:, 6],
+                                            "occlusion": labels_tensor[:, 7],
+                                            "pose": labels_tensor[:, 8],
+                                            "invalid": labels_tensor[:, 9]}
                         })
                         box_counter = 0
                         labels.clear()

From 00448e9df584af89b1daf727419625d7f1d53eeb Mon Sep 17 00:00:00 2001
From: Josh Bradley <jgbradley1@gmail.com>
Date: Mon, 4 Jan 2021 18:07:36 -0500
Subject: [PATCH 41/44] fix formatting issue

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 torchvision/datasets/widerface.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index a11c832fd54..9593033e1d5 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -38,9 +38,11 @@ class WIDERFace(VisionDataset):
         ("0B6eKvaijfFUDd3dIRmpvSk8tLUk", "dfa7d7e790efa35df3788964cf0bbaea", "WIDER_val.zip"),
         ("0B6eKvaijfFUDbW4tdGpaYjgzZkU", "e5d8f4248ed24c334bbd12f49c29dd40", "WIDER_test.zip")
     ]
-    ANNOTATIONS_FILE = ("http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/bbx_annotation/wider_face_split.zip",
-                        "0e3767bcf0e326556d407bf5bff5d27c",
-                        "wider_face_split.zip")
+    ANNOTATIONS_FILE = (
+        "http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/bbx_annotation/wider_face_split.zip",
+        "0e3767bcf0e326556d407bf5bff5d27c",
+        "wider_face_split.zip"
+    )
 
     def __init__(
             self,

From 752ed0dd2c45f5dd1d66bacb3fb9ee05dcb744c1 Mon Sep 17 00:00:00 2001
From: Josh <jgbradley1@gmail.com>
Date: Mon, 4 Jan 2021 18:28:16 -0500
Subject: [PATCH 42/44] remove comment and add more info to docstring

---
 torchvision/datasets/widerface.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index 9593033e1d5..3a0db5e0774 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -65,8 +65,6 @@ def __init__(
             raise RuntimeError("Dataset not found or corrupted. " +
                                "You can use download=True to download and prepare it")
 
-        # process dataset
-        # dataset will be stored as a list of dict objects (img_info)
         self.img_info: Any = []
         if self.split in ("train", "val"):
             self.parse_train_val_annotations_file()
@@ -79,7 +77,8 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]:
             index (int): Index
 
         Returns:
-            tuple: (image, target) where target=None for the test split.
+            tuple: (image, target) where target is a dict of annotations for all faces in the image.
+            target=None for the test split.
         """
 
         # stay consistent with other datasets and return a PIL Image

From 31b0122faf31b16ece784a924fe20a86a08ae806 Mon Sep 17 00:00:00 2001
From: Josh <jgbradley1@gmail.com>
Date: Wed, 6 Jan 2021 20:29:32 -0500
Subject: [PATCH 43/44] update type annotations

---
 torchvision/datasets/widerface.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index 3a0db5e0774..c4fe6229f0b 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -2,7 +2,7 @@
 import os
 from os.path import abspath, expanduser
 import torch
-from typing import Any, Callable, Optional, Tuple
+from typing import Any, Callable, List, Dict, Optional, Tuple, Union
 from .utils import check_integrity, download_file_from_google_drive, \
     download_and_extract_archive, extract_archive, verify_str_arg
 from .vision import VisionDataset
@@ -65,7 +65,7 @@ def __init__(
             raise RuntimeError("Dataset not found or corrupted. " +
                                "You can use download=True to download and prepare it")
 
-        self.img_info: Any = []
+        self.img_info: List[Dict[str, Union[str, Dict[str, torch.Tensor]]]] = []
         if self.split in ("train", "val"):
             self.parse_train_val_annotations_file()
         else:
@@ -132,7 +132,7 @@ def parse_train_val_annotations_file(self) -> None:
                         labels_tensor = torch.tensor(labels)
                         self.img_info.append({
                             "img_path": img_path,
-                            "annotations": {"bbox": labels_tensor[:, 0:4],
+                            "annotations": {"bbox": labels_tensor[:, 0:4],  # x, y, width, height
                                             "blur": labels_tensor[:, 4],
                                             "expression": labels_tensor[:, 5],
                                             "illumination": labels_tensor[:, 6],

From 02ae27ca99a91874f757932f9329ed320e36632c Mon Sep 17 00:00:00 2001
From: Josh <jgbradley1@gmail.com>
Date: Wed, 6 Jan 2021 20:50:57 -0500
Subject: [PATCH 44/44] restart CI jobs

---
 torchvision/datasets/widerface.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index c4fe6229f0b..5e826183f57 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -106,7 +106,6 @@ def parse_train_val_annotations_file(self) -> None:
 
         with open(filepath, "r") as f:
             lines = f.readlines()
-
             file_name_line, num_boxes_line, box_annotation_line = True, False, False
             num_boxes, box_counter = 0, 0
             labels = []