leeyeehoo · kshitiz-1225 · Oct 19, 2023
diff --git a/create_json_custom.py b/create_json_custom.py
@@ -0,0 +1,32 @@
+import json
+import glob
+import os
+
+def get_image_list_from_folder(img_folder):
+    """
+    Obtain list of image path from the custom folder
+    """
+    img_pattern = os.path.join(img_folder, '*.jpg')
+    img_list = [img_path for img_path in glob.glob(img_pattern)]
+    return img_list
+
+def write_to_json(img_list, output_json):
+    """
+    Save the image list to a JSON file
+    """
+    with open(output_json, 'w') as json_file:
+        json.dump(img_list, json_file)
+
+if __name__ == '__main__':
+    # Path to the folder that contains images
+    IMG_FOLDER = '/Users/kshitiz/Documents/GitHub/CSRNet-pytorch/test_data/images'
+
+    # Path to the final JSON file
+    OUTPUT_JSON = '/Users/kshitiz/Documents/GitHub/CSRNet-pytorch/test_data/test.json'
+
+    image_list = get_image_list_from_folder(IMG_FOLDER)
+    for img in image_list:
+        print(img)
+
+    write_to_json(image_list, OUTPUT_JSON)
+
diff --git a/dataset.py b/dataset.py
@@ -1,18 +1,25 @@
 import os
-import random
 import torch
+import random
 import numpy as np
-from torch.utils.data import Dataset
 from PIL import Image
-from image import *
+from image import load_data
 import torchvision.transforms.functional as F
+from torch.utils.data import Dataset
 
-class listDataset(Dataset):
-    def __init__(self, root, shape=None, shuffle=True, transform=None,  train=False, seen=0, batch_size=1, num_workers=4):
+
+class ListDataset(Dataset):
+    """
+    custom dataset class for loading images
+    """
+    def __init__(self, root, shape=None, shuffle=True, transform=None,
+                 train=False, seen=0, batch_size=1, num_workers=4):
+        """
+        root: list of images
+        """
         if train:
-            root = root *4
+            root = root * 4
         random.shuffle(root)
-
         self.nSamples = len(root)
         self.lines = root
         self.transform = transform
@@ -21,26 +28,28 @@ def __init__(self, root, shape=None, shuffle=True, transform=None,  train=False,
         self.seen = seen
         self.batch_size = batch_size
         self.num_workers = num_workers
-
-
+
     def __len__(self):
+        """
+        # of samples.
+        """
         return self.nSamples
+
     def __getitem__(self, index):
-        assert index <= len(self), 'index range error' 
-
+        """
+        return tuple of image and target
+        """
+        if index >= len(self):
+            raise IndexError("Index out of range")
+
         img_path = self.lines[index]
-
-        img,target = load_data(img_path,self.train)
-
-        #img = 255.0 * F.to_tensor(img)
-
-        #img[0,:,:]=img[0,:,:]-92.8207477031
-        #img[1,:,:]=img[1,:,:]-95.2757037428
-        #img[2,:,:]=img[2,:,:]-104.877445883
+        img, target = load_data(img_path, self.train)
 
+        # img = 255.0 * F.to_tensor(img)
+        # img[0, :, :] = img[0, :, :] - 92.8207477031
+        # img[1, :, :] = img[1, :, :] - 95.2757037428
+        # img[2, :, :] = img[2, :, :] - 104.877445883
 
-
-
-        if self.transform is not None:
+        if self.transform:
             img = self.transform(img)
-        return img,target
+        return img, target
diff --git a/image.py b/image.py
@@ -1,43 +1,38 @@
 import random
-import os
-from PIL import Image,ImageFilter,ImageDraw
-import numpy as np
 import h5py
-from PIL import ImageStat
+import numpy as np
 import cv2
+from PIL import Image
 
-def load_data(img_path,train = True):
-    gt_path = img_path.replace('.jpg','.h5').replace('images','ground_truth')
-    img = Image.open(img_path).convert('RGB')
-    gt_file = h5py.File(gt_path)
-    target = np.asarray(gt_file['density'])
+def load_data(img_path, train=True):
+    """
+    loads the image and corresponding density map and returns the tuple (image & density map)
+    """
+    gt_path = img_path.replace('.jpg', '.h5').replace('images', 'ground_truth')
+
+    img = Image.open(img_path).convert('RGB') # load image and ground truth
+    with h5py.File(gt_path, 'r') as gt_file:
+        target = np.asarray(gt_file['density'])
+    # allows for image augmentation
     if False:
-        crop_size = (img.size[0]/2,img.size[1]/2)
-        if random.randint(0,9)<= -1:
-
-
-            dx = int(random.randint(0,1)*img.size[0]*1./2)
-            dy = int(random.randint(0,1)*img.size[1]*1./2)
+        crop_size = (img.size[0] // 2, img.size[1] // 2)
+
+        if random.randint(0, 9) <= -1:
+            dx = random.randint(0, 1) * img.size[0] // 2
+            dy = random.randint(0, 1) * img.size[1] // 2
         else:
-            dx = int(random.random()*img.size[0]*1./2)
-            dy = int(random.random()*img.size[1]*1./2)
-
-
-
-        img = img.crop((dx,dy,crop_size[0]+dx,crop_size[1]+dy))
-        target = target[dy:crop_size[1]+dy,dx:crop_size[0]+dx]
-
-
-
-
-        if random.random()>0.8:
+            dx = int(random.random() * img.size[0] * 0.5)
+            dy = int(random.random() * img.size[1] * 0.5)
+
+        img = img.crop((dx, dy, crop_size[0] + dx, crop_size[1] + dy))
+        target = target[dy:crop_size[1]+dy, dx:crop_size[0]+dx]
+
+        # horizontal flip
+        if random.random() > 0.8:
             target = np.fliplr(target)
             img = img.transpose(Image.FLIP_LEFT_RIGHT)
-
-
-
-
-    target = cv2.resize(target,(target.shape[1]/8,target.shape[0]/8),interpolation = cv2.INTER_CUBIC)*64
-
-
-    return img,target
+
+    # Resizing the density map
+    target = cv2.resize(target, (target.shape[1] // 8, target.shape[0] // 8), interpolation=cv2.INTER_CUBIC) * 64
+
+    return img, target
diff --git a/make_dataset.py b/make_dataset.py
@@ -0,0 +1,77 @@
+import os
+import glob
+import h5py
+import scipy.io as io
+import cv2
+import matplotlib.pyplot as plt
+from image import *
+from scipy.ndimage.filters import gaussian_filter
+
+
+# Set the root to the Shanghai dataset you download
+ROOT = '/Users/kshitiz/Documents/GitHub/CSRNet-pytorch/ShanghaiTech'
+
+def get_img_paths(root, train_path, test_path):
+    """
+    return the image paths
+    """
+    part_a_train = os.path.join(root, train_path, 'images')
+    part_a_test = os.path.join(root, test_path, 'images')
+    path_sets = [part_a_train, part_a_test]
+
+    img_paths = []
+    for path in path_sets:
+        for img_path in glob.glob(os.path.join(path, '*.jpg')):
+            img_paths.append(img_path)
+    return img_paths
+
+
+def process_images(img_paths):
+    """
+    Resize and generate the ground truth
+    """
+    for img_path in img_paths:
+        print(img_path)
+        image = cv2.imread(img_path)
+
+        # Resize image dimensions
+        d_width, d_height = 640, 360 # setup the image dimension 
+        re_image = cv2.resize(image, (d_width, d_height), cv2.INTER_AREA)
+        cv2.imwrite(img_path, re_image)
+
+        # Load corresponding mat file
+        mat = io.loadmat(img_path.replace('.jpg', '.mat').replace('images', 'ground_truth').replace('IMG_', 'GT_IMG_').replace('DSC_', 'GT_DSC_').replace('20221212_', 'GT_20221212_'))
+        img = plt.imread(img_path)
+
+        # Prepare empty density map
+        k = np.zeros((img.shape[0], img.shape[1]))
+        gt = mat["image_info"][0, 0][0, 0][0]
+
+        # calculate the scaling factor for x and y dim
+        scale_x = d_width / image.shape[1]
+        scale_y = d_height / image.shape[0]
+        for i in range(len(gt)):
+            if int(gt[i][1]) < img.shape[0] and int(gt[i][0]) < img.shape[1]:
+                # scaling to new image dimension
+                gt[i][1] = scale_x * gt[i][1]
+                gt[i][0] = scale_y * gt[i][0]
+                k[int(gt[i][1]), int(gt[i][0])] = 1 # mark with 1 to indicate presence at the location
+
+        k = gaussian_filter(k, 15)
+        with h5py.File(img_path.replace('.jpg', '.h5').replace('images', 'ground_truth'), 'w') as hf: # Save as .h5
+            hf['density'] = k
+
+def train_test_path(root_path, train_data_folder, test_data_folder):
+    train_images = [os.path.join(root_path, train_data_folder, img) for img in os.listdir(os.path.join(root_path, train_data_folder)) if img.endswith('.jpg')]
+    test_images = [os.path.join(root_path, test_data_folder, img) for img in os.listdir(os.path.join(root_path, test_data_folder)) if img.endswith('.jpg')]
+
+    return train_images + test_images
+def obtain_images(root, train_data_folder, test_data_folder):
+    image_paths = train_test_path(root, train_data_folder, test_data_folder)
+    process_images(image_paths)
+
+if __name__ == '__main__':
+    ROOT = '/Users/kshitiz/Documents/GitHub/CSRNet-pytorch/ShanghaiTech'
+    TRAIN_DATA_FOLDER, TEST_DATA_FOLDER = 'train_data', 'test_data'
+    obtain_images(ROOT, TRAIN_DATA_FOLDER, TEST_DATA_FOLDER)
+
diff --git a/model.py b/model.py
@@ -1,28 +1,31 @@
+import collections
 import torch.nn as nn
-import torch
 from torchvision import models
-from utils import save_net,load_net
 
 class CSRNet(nn.Module):
     def __init__(self, load_weights=False):
         super(CSRNet, self).__init__()
         self.seen = 0
         self.frontend_feat = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512]
-        self.backend_feat  = [512, 512, 512,256,128,64]
+        self.backend_feat = [512, 512, 512, 256, 128, 64]
+
         self.frontend = make_layers(self.frontend_feat)
-        self.backend = make_layers(self.backend_feat,in_channels = 512,dilation = True)
+        self.backend = make_layers(self.backend_feat, in_channels=512, dilation=True)
         self.output_layer = nn.Conv2d(64, 1, kernel_size=1)
+
         if not load_weights:
-            mod = models.vgg16(pretrained = True)
-            self._initialize_weights()
-            for i in xrange(len(self.frontend.state_dict().items())):
-                self.frontend.state_dict().items()[i][1].data[:] = mod.state_dict().items()[i][1].data[:]
-    def forward(self,x):
+            self._initialize_weights_from_vgg()
+
+    def forward(self, x):
         x = self.frontend(x)
         x = self.backend(x)
         x = self.output_layer(x)
         return x
+
     def _initialize_weights(self):
+        """
+        Initialize the weights of the model
+        """
         for m in self.modules():
             if isinstance(m, nn.Conv2d):
                 nn.init.normal_(m.weight, std=0.01)
@@ -31,22 +34,39 @@ def _initialize_weights(self):
             elif isinstance(m, nn.BatchNorm2d):
                 nn.init.constant_(m.weight, 1)
                 nn.init.constant_(m.bias, 0)
-
-
-def make_layers(cfg, in_channels = 3,batch_norm=False,dilation = False):
-    if dilation:
-        d_rate = 2
-    else:
-        d_rate = 1
+
+    def _initialize_weights_from_vgg(self):
+        """
+        Initialize weights using pretrained VGG16 model
+        """
+        mod = models.vgg16(pretrained=True)
+        self._initialize_weights()
+
+        fsd = collections.OrderedDict()
+        frontend_dict_items = list(self.frontend.state_dict().items())
+        mod_dict_items = list(mod.state_dict().items())
+        for i in range(len(frontend_dict_items)):
+            fsd[frontend_dict_items[i][0]] = mod_dict_items[i][1]
+
+        self.frontend.load_state_dict(fsd)
+
+
+def make_layers(cfg, in_channels=3, batch_norm=False, dilation=False):
+    """
+    Construct a layer for the given configuration
+    """
+    d_rate = 2 if dilation else 1
     layers = []
+
     for v in cfg:
         if v == 'M':
             layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
         else:
-            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=d_rate,dilation = d_rate)
+            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=d_rate, dilation=d_rate)
             if batch_norm:
                 layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
             else:
                 layers += [conv2d, nn.ReLU(inplace=True)]
             in_channels = v
-    return nn.Sequential(*layers)                
+
+    return nn.Sequential(*layers)