cast images to PIL at instantiation instead of runtime

Philip Meier · Philip Meier · commit 3f8449763a69 · 2019-07-04T12:16:28.000+02:00
diff --git a/torchvision/datasets/cifar.py b/torchvision/datasets/cifar.py
@@ -94,6 +94,10 @@ def __init__(self, root, train=True,
         self.data = np.vstack(self.data).reshape(-1, 3, 32, 32)
         self.data = self.data.transpose((0, 2, 3, 1))  # convert to HWC
 
+        # doing this so that it is consistent with all other datasets
+        # to return a PIL Image
+        self.data = [Image.fromarray(image) for image in self.data]
+
         self._load_meta()
 
     def _load_meta(self):
@@ -119,10 +123,6 @@ def __getitem__(self, index):
         """
         img, target = self.data[index], self.targets[index]
 
-        # doing this so that it is consistent with all other datasets
-        # to return a PIL Image
-        img = Image.fromarray(img)
-
         if self.transform is not None:
             img = self.transform(img)
 
diff --git a/torchvision/datasets/svhn.py b/torchvision/datasets/svhn.py
@@ -66,20 +66,20 @@ def __init__(self, root, split='train',
         import scipy.io as sio
 
         # reading(loading) mat file as array
-        loaded_mat = sio.loadmat(os.path.join(self.root, self.filename))
+        loaded_mat = sio.loadmat(os.path.join(self.root, self.filename),
+                                 squeeze_me=True)
+        data, targets = loaded_mat['X'], loaded_mat['y']
 
-        self.data = loaded_mat['X']
-        # loading from the .mat file gives an np array of type np.uint8
-        # converting to np.int64, so that we have a LongTensor after
-        # the conversion from the numpy array
-        # the squeeze is needed to obtain a 1D tensor
-        self.labels = loaded_mat['y'].astype(np.int64).squeeze()
+        # doing this so that it is consistent with all other datasets
+        # to return a PIL Image
+        self.data = [Image.fromarray(image.squeeze(3))
+                     for image in np.split(data, len(targets), axis=3)]
 
         # the svhn dataset assigns the class label "10" to the digit 0
         # this makes it inconsistent with several loss functions
         # which expect the class labels to be in the range [0, C-1]
-        np.place(self.labels, self.labels == 10, 0)
-        self.data = np.transpose(self.data, (3, 2, 0, 1))
+        np.place(targets, targets == 10, 0)
+        self.targets = [int(target) for target in targets]
 
     def __getitem__(self, index):
         """
@@ -89,11 +89,7 @@ def __getitem__(self, index):
         Returns:
             tuple: (image, target) where target is index of the target class.
         """
-        img, target = self.data[index], int(self.labels[index])
-
-        # doing this so that it is consistent with all other datasets
-        # to return a PIL Image
-        img = Image.fromarray(np.transpose(img, (1, 2, 0)))
+        img, target = self.data[index], self.targets[index]
 
         if self.transform is not None:
             img = self.transform(img)