diff --git a/create_json_custom.py b/create_json_custom.py new file mode 100644 index 00000000..374cea27 --- /dev/null +++ b/create_json_custom.py @@ -0,0 +1,32 @@ +import json +import glob +import os + +def get_image_list_from_folder(img_folder): + """ + Obtain list of image path from the custom folder + """ + img_pattern = os.path.join(img_folder, '*.jpg') + img_list = [img_path for img_path in glob.glob(img_pattern)] + return img_list + +def write_to_json(img_list, output_json): + """ + Save the image list to a JSON file + """ + with open(output_json, 'w') as json_file: + json.dump(img_list, json_file) + +if __name__ == '__main__': + # Path to the folder that contains images + IMG_FOLDER = '/Users/kshitiz/Documents/GitHub/CSRNet-pytorch/test_data/images' + + # Path to the final JSON file + OUTPUT_JSON = '/Users/kshitiz/Documents/GitHub/CSRNet-pytorch/test_data/test.json' + + image_list = get_image_list_from_folder(IMG_FOLDER) + for img in image_list: + print(img) + + write_to_json(image_list, OUTPUT_JSON) + \ No newline at end of file diff --git a/dataset.py b/dataset.py index 21670ccc..708b82dc 100644 --- a/dataset.py +++ b/dataset.py @@ -1,18 +1,25 @@ import os -import random import torch +import random import numpy as np -from torch.utils.data import Dataset from PIL import Image -from image import * +from image import load_data import torchvision.transforms.functional as F +from torch.utils.data import Dataset -class listDataset(Dataset): - def __init__(self, root, shape=None, shuffle=True, transform=None, train=False, seen=0, batch_size=1, num_workers=4): + +class ListDataset(Dataset): + """ + custom dataset class for loading images + """ + def __init__(self, root, shape=None, shuffle=True, transform=None, + train=False, seen=0, batch_size=1, num_workers=4): + """ + root: list of images + """ if train: - root = root *4 + root = root * 4 random.shuffle(root) - self.nSamples = len(root) self.lines = root self.transform = transform @@ -21,26 +28,28 @@ def __init__(self, root, shape=None, shuffle=True, transform=None, train=False, self.seen = seen self.batch_size = batch_size self.num_workers = num_workers - - + def __len__(self): + """ + # of samples. + """ return self.nSamples + def __getitem__(self, index): - assert index <= len(self), 'index range error' - + """ + return tuple of image and target + """ + if index >= len(self): + raise IndexError("Index out of range") + img_path = self.lines[index] - - img,target = load_data(img_path,self.train) - - #img = 255.0 * F.to_tensor(img) - - #img[0,:,:]=img[0,:,:]-92.8207477031 - #img[1,:,:]=img[1,:,:]-95.2757037428 - #img[2,:,:]=img[2,:,:]-104.877445883 + img, target = load_data(img_path, self.train) + # img = 255.0 * F.to_tensor(img) + # img[0, :, :] = img[0, :, :] - 92.8207477031 + # img[1, :, :] = img[1, :, :] - 95.2757037428 + # img[2, :, :] = img[2, :, :] - 104.877445883 - - - if self.transform is not None: + if self.transform: img = self.transform(img) - return img,target \ No newline at end of file + return img, target diff --git a/image.py b/image.py index c8506d49..abdc88c5 100644 --- a/image.py +++ b/image.py @@ -1,43 +1,38 @@ import random -import os -from PIL import Image,ImageFilter,ImageDraw -import numpy as np import h5py -from PIL import ImageStat +import numpy as np import cv2 +from PIL import Image -def load_data(img_path,train = True): - gt_path = img_path.replace('.jpg','.h5').replace('images','ground_truth') - img = Image.open(img_path).convert('RGB') - gt_file = h5py.File(gt_path) - target = np.asarray(gt_file['density']) +def load_data(img_path, train=True): + """ + loads the image and corresponding density map and returns the tuple (image & density map) + """ + gt_path = img_path.replace('.jpg', '.h5').replace('images', 'ground_truth') + + img = Image.open(img_path).convert('RGB') # load image and ground truth + with h5py.File(gt_path, 'r') as gt_file: + target = np.asarray(gt_file['density']) + # allows for image augmentation if False: - crop_size = (img.size[0]/2,img.size[1]/2) - if random.randint(0,9)<= -1: - - - dx = int(random.randint(0,1)*img.size[0]*1./2) - dy = int(random.randint(0,1)*img.size[1]*1./2) + crop_size = (img.size[0] // 2, img.size[1] // 2) + + if random.randint(0, 9) <= -1: + dx = random.randint(0, 1) * img.size[0] // 2 + dy = random.randint(0, 1) * img.size[1] // 2 else: - dx = int(random.random()*img.size[0]*1./2) - dy = int(random.random()*img.size[1]*1./2) - - - - img = img.crop((dx,dy,crop_size[0]+dx,crop_size[1]+dy)) - target = target[dy:crop_size[1]+dy,dx:crop_size[0]+dx] - - - - - if random.random()>0.8: + dx = int(random.random() * img.size[0] * 0.5) + dy = int(random.random() * img.size[1] * 0.5) + + img = img.crop((dx, dy, crop_size[0] + dx, crop_size[1] + dy)) + target = target[dy:crop_size[1]+dy, dx:crop_size[0]+dx] + + # horizontal flip + if random.random() > 0.8: target = np.fliplr(target) img = img.transpose(Image.FLIP_LEFT_RIGHT) - - - - - target = cv2.resize(target,(target.shape[1]/8,target.shape[0]/8),interpolation = cv2.INTER_CUBIC)*64 - - - return img,target \ No newline at end of file + + # Resizing the density map + target = cv2.resize(target, (target.shape[1] // 8, target.shape[0] // 8), interpolation=cv2.INTER_CUBIC) * 64 + + return img, target diff --git a/make_dataset.py b/make_dataset.py new file mode 100644 index 00000000..668bfbb6 --- /dev/null +++ b/make_dataset.py @@ -0,0 +1,77 @@ +import os +import glob +import h5py +import scipy.io as io +import cv2 +import matplotlib.pyplot as plt +from image import * +from scipy.ndimage.filters import gaussian_filter + + +# Set the root to the Shanghai dataset you download +ROOT = '/Users/kshitiz/Documents/GitHub/CSRNet-pytorch/ShanghaiTech' + +def get_img_paths(root, train_path, test_path): + """ + return the image paths + """ + part_a_train = os.path.join(root, train_path, 'images') + part_a_test = os.path.join(root, test_path, 'images') + path_sets = [part_a_train, part_a_test] + + img_paths = [] + for path in path_sets: + for img_path in glob.glob(os.path.join(path, '*.jpg')): + img_paths.append(img_path) + return img_paths + + +def process_images(img_paths): + """ + Resize and generate the ground truth + """ + for img_path in img_paths: + print(img_path) + image = cv2.imread(img_path) + + # Resize image dimensions + d_width, d_height = 640, 360 # setup the image dimension + re_image = cv2.resize(image, (d_width, d_height), cv2.INTER_AREA) + cv2.imwrite(img_path, re_image) + + # Load corresponding mat file + mat = io.loadmat(img_path.replace('.jpg', '.mat').replace('images', 'ground_truth').replace('IMG_', 'GT_IMG_').replace('DSC_', 'GT_DSC_').replace('20221212_', 'GT_20221212_')) + img = plt.imread(img_path) + + # Prepare empty density map + k = np.zeros((img.shape[0], img.shape[1])) + gt = mat["image_info"][0, 0][0, 0][0] + + # calculate the scaling factor for x and y dim + scale_x = d_width / image.shape[1] + scale_y = d_height / image.shape[0] + for i in range(len(gt)): + if int(gt[i][1]) < img.shape[0] and int(gt[i][0]) < img.shape[1]: + # scaling to new image dimension + gt[i][1] = scale_x * gt[i][1] + gt[i][0] = scale_y * gt[i][0] + k[int(gt[i][1]), int(gt[i][0])] = 1 # mark with 1 to indicate presence at the location + + k = gaussian_filter(k, 15) + with h5py.File(img_path.replace('.jpg', '.h5').replace('images', 'ground_truth'), 'w') as hf: # Save as .h5 + hf['density'] = k + +def train_test_path(root_path, train_data_folder, test_data_folder): + train_images = [os.path.join(root_path, train_data_folder, img) for img in os.listdir(os.path.join(root_path, train_data_folder)) if img.endswith('.jpg')] + test_images = [os.path.join(root_path, test_data_folder, img) for img in os.listdir(os.path.join(root_path, test_data_folder)) if img.endswith('.jpg')] + + return train_images + test_images +def obtain_images(root, train_data_folder, test_data_folder): + image_paths = train_test_path(root, train_data_folder, test_data_folder) + process_images(image_paths) + +if __name__ == '__main__': + ROOT = '/Users/kshitiz/Documents/GitHub/CSRNet-pytorch/ShanghaiTech' + TRAIN_DATA_FOLDER, TEST_DATA_FOLDER = 'train_data', 'test_data' + obtain_images(ROOT, TRAIN_DATA_FOLDER, TEST_DATA_FOLDER) + \ No newline at end of file diff --git a/model.py b/model.py index 9fafc562..ca773f73 100644 --- a/model.py +++ b/model.py @@ -1,28 +1,31 @@ +import collections import torch.nn as nn -import torch from torchvision import models -from utils import save_net,load_net class CSRNet(nn.Module): def __init__(self, load_weights=False): super(CSRNet, self).__init__() self.seen = 0 self.frontend_feat = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512] - self.backend_feat = [512, 512, 512,256,128,64] + self.backend_feat = [512, 512, 512, 256, 128, 64] + self.frontend = make_layers(self.frontend_feat) - self.backend = make_layers(self.backend_feat,in_channels = 512,dilation = True) + self.backend = make_layers(self.backend_feat, in_channels=512, dilation=True) self.output_layer = nn.Conv2d(64, 1, kernel_size=1) + if not load_weights: - mod = models.vgg16(pretrained = True) - self._initialize_weights() - for i in xrange(len(self.frontend.state_dict().items())): - self.frontend.state_dict().items()[i][1].data[:] = mod.state_dict().items()[i][1].data[:] - def forward(self,x): + self._initialize_weights_from_vgg() + + def forward(self, x): x = self.frontend(x) x = self.backend(x) x = self.output_layer(x) return x + def _initialize_weights(self): + """ + Initialize the weights of the model + """ for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.normal_(m.weight, std=0.01) @@ -31,22 +34,39 @@ def _initialize_weights(self): elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) - - -def make_layers(cfg, in_channels = 3,batch_norm=False,dilation = False): - if dilation: - d_rate = 2 - else: - d_rate = 1 + + def _initialize_weights_from_vgg(self): + """ + Initialize weights using pretrained VGG16 model + """ + mod = models.vgg16(pretrained=True) + self._initialize_weights() + + fsd = collections.OrderedDict() + frontend_dict_items = list(self.frontend.state_dict().items()) + mod_dict_items = list(mod.state_dict().items()) + for i in range(len(frontend_dict_items)): + fsd[frontend_dict_items[i][0]] = mod_dict_items[i][1] + + self.frontend.load_state_dict(fsd) + + +def make_layers(cfg, in_channels=3, batch_norm=False, dilation=False): + """ + Construct a layer for the given configuration + """ + d_rate = 2 if dilation else 1 layers = [] + for v in cfg: if v == 'M': layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: - conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=d_rate,dilation = d_rate) + conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=d_rate, dilation=d_rate) if batch_norm: layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] else: layers += [conv2d, nn.ReLU(inplace=True)] in_channels = v - return nn.Sequential(*layers) \ No newline at end of file + + return nn.Sequential(*layers) diff --git a/train.py b/train.py index a8a93ea0..98ce2b43 100644 --- a/train.py +++ b/train.py @@ -1,230 +1,160 @@ -import sys import os - -import warnings - +import time +import json +import argparse +import torch +import torch.nn as nn +from torchvision import transforms from model import CSRNet - from utils import save_checkpoint +import dataset -import torch -import torch.nn as nn -from torch.autograd import Variable -from torchvision import datasets, transforms +def parse_args(): + parser = argparse.ArgumentParser(description='PyTorch CSRNet') + parser.add_argument('train_json', help='path to train json') + parser.add_argument('test_json', help='path to test json') + parser.add_argument('--pre', default=None, type=str, help='path to the pretrained model') + parser.add_argument('gpu', help='GPU id to use.') + parser.add_argument('task', help='task id to use.') + return parser.parse_args() -import numpy as np -import argparse -import json -import cv2 -import dataset -import time -parser = argparse.ArgumentParser(description='PyTorch CSRNet') +class AverageMeter: + """ + Computes and stores the average and current value + """ + + def __init__(self): + self.reset() -parser.add_argument('train_json', metavar='TRAIN', - help='path to train json') -parser.add_argument('test_json', metavar='TEST', - help='path to test json') + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 -parser.add_argument('--pre', '-p', metavar='PRETRAINED', default=None,type=str, - help='path to the pretrained model') + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + +def adjust_learning_rate(optimizer, epoch, steps, scales, original_lr): + """ + Sets the learning rate to the initial LR decayed by 10 every 30 epochs + """ + lr = original_lr + for step, scale in zip(steps, scales): + if epoch >= step: + lr *= scale + else: + break -parser.add_argument('gpu',metavar='GPU', type=str, - help='GPU id to use.') + for param_group in optimizer.param_groups: + param_group['lr'] = lr + return lr -parser.add_argument('task',metavar='TASK', type=str, - help='task id to use.') def main(): - - global args,best_prec1 - + args = parse_args() best_prec1 = 1e6 - - args = parser.parse_args() args.original_lr = 1e-7 args.lr = 1e-7 - args.batch_size = 1 - args.momentum = 0.95 - args.decay = 5*1e-4 - args.start_epoch = 0 - args.epochs = 400 - args.steps = [-1,1,100,150] - args.scales = [1,1,1,1] - args.workers = 4 + args.batch_size = 8 + args.momentum = 0.95 + args.decay = 5e-4 + args.start_epoch = 0 + args.epochs = 100 + args.steps = [-1, 1, 100, 150] + args.scales = [1, 1, 1, 1] + args.workers = 1 args.seed = time.time() args.print_freq = 30 - with open(args.train_json, 'r') as outfile: + + with open(args.train_json, 'r') as outfile: train_list = json.load(outfile) - with open(args.test_json, 'r') as outfile: + with open(args.test_json, 'r') as outfile: val_list = json.load(outfile) - + os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu torch.cuda.manual_seed(args.seed) - - model = CSRNet() - - model = model.cuda() - - criterion = nn.MSELoss(size_average=False).cuda() - - optimizer = torch.optim.SGD(model.parameters(), args.lr, - momentum=args.momentum, - weight_decay=args.decay) - - if args.pre: - if os.path.isfile(args.pre): - print("=> loading checkpoint '{}'".format(args.pre)) - checkpoint = torch.load(args.pre) - args.start_epoch = checkpoint['epoch'] - best_prec1 = checkpoint['best_prec1'] - model.load_state_dict(checkpoint['state_dict']) - optimizer.load_state_dict(checkpoint['optimizer']) - print("=> loaded checkpoint '{}' (epoch {})" - .format(args.pre, checkpoint['epoch'])) - else: - print("=> no checkpoint found at '{}'".format(args.pre)) - + + model = CSRNet().cuda() + criterion = nn.MSELoss(reduction='sum').cuda() + optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.decay) + + if args.pre and os.path.isfile(args.pre): + print(f"=> loading checkpoint '{args.pre}'") + checkpoint = torch.load(args.pre) + args.start_epoch = checkpoint['epoch'] + best_prec1 = checkpoint['best_prec1'] + model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + print(f"=> loaded checkpoint '{args.pre}' (epoch {checkpoint['epoch']})") + elif args.pre: + print(f"=> no checkpoint found at '{args.pre}'") + for epoch in range(args.start_epoch, args.epochs): - - adjust_learning_rate(optimizer, epoch) - - train(train_list, model, criterion, optimizer, epoch) + args.lr = adjust_learning_rate(optimizer, epoch, args.steps, args.scales, args.original_lr) + train(train_list, model, criterion, optimizer, epoch, args) prec1 = validate(val_list, model, criterion) - + is_best = prec1 < best_prec1 best_prec1 = min(prec1, best_prec1) - print(' * best MAE {mae:.3f} ' - .format(mae=best_prec1)) + print(f' * best MAE {best_prec1:.3f}') save_checkpoint({ 'epoch': epoch + 1, 'arch': args.pre, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, - 'optimizer' : optimizer.state_dict(), - }, is_best,args.task) + 'optimizer': optimizer.state_dict(), + }, is_best, args.task) -def train(train_list, model, criterion, optimizer, epoch): - +def train(train_list, model, criterion, optimizer, epoch, args): + model.train() losses = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() - - + + transform = transforms.Compose([transforms.ToTensor()]) + train_loader = torch.utils.data.DataLoader( - dataset.listDataset(train_list, - shuffle=True, - transform=transforms.Compose([ - transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]), - ]), - train=True, - seen=model.seen, - batch_size=args.batch_size, - num_workers=args.workers), + dataset.listDataset(train_list, shuffle=True, transform=transform, train=True, seen=model.seen, batch_size=args.batch_size, num_workers=args.workers), batch_size=args.batch_size) - print('epoch %d, processed %d samples, lr %.10f' % (epoch, epoch * len(train_loader.dataset), args.lr)) - - model.train() + + print(f'epoch {epoch}, processed {epoch * len(train_loader.dataset)} samples, lr {args.lr:.10f}') + end = time.time() - - for i,(img, target)in enumerate(train_loader): + for i, (img, target) in enumerate(train_loader): data_time.update(time.time() - end) - - img = img.cuda() - img = Variable(img) + + img, target = img.cuda(), target.float().unsqueeze(0).cuda() + output = model(img) - - - - - target = target.type(torch.FloatTensor).unsqueeze(0).cuda() - target = Variable(target) - - loss = criterion(output, target) - + losses.update(loss.item(), img.size(0)) optimizer.zero_grad() loss.backward() - optimizer.step() - + optimizer.step() + batch_time.update(time.time() - end) end = time.time() - + if i % args.print_freq == 0: - print('Epoch: [{0}][{1}/{2}]\t' - 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' - 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' - 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' - .format( - epoch, i, len(train_loader), batch_time=batch_time, - data_time=data_time, loss=losses)) - + print(f'Epoch: [{epoch}][{i}/{len(train_loader)}]\tTime {batch_time.val:.3f} ({batch_time.avg:.3f})\tData {data_time.val:.3f} ({data_time.avg:.3f})\tLoss {losses.val:.4f} ({losses.avg:.4f})') + def validate(val_list, model, criterion): - print ('begin test') - test_loader = torch.utils.data.DataLoader( - dataset.listDataset(val_list, - shuffle=False, - transform=transforms.Compose([ - transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]), - ]), train=False), - batch_size=args.batch_size) - + print('begin test') model.eval() - - mae = 0 - - for i,(img, target) in enumerate(test_loader): - img = img.cuda() - img = Variable(img) - output = model(img) - - mae += abs(output.data.sum()-target.sum().type(torch.FloatTensor).cuda()) - - mae = mae/len(test_loader) - print(' * MAE {mae:.3f} ' - .format(mae=mae)) - - return mae - -def adjust_learning_rate(optimizer, epoch): - """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" - - - args.lr = args.original_lr - - for i in range(len(args.steps)): - - scale = args.scales[i] if i < len(args.scales) else 1 - - - if epoch >= args.steps[i]: - args.lr = args.lr * scale - if epoch == args.steps[i]: - break - else: - break - for param_group in optimizer.param_groups: - param_group['lr'] = args.lr - -class AverageMeter(object): - """Computes and stores the average and current value""" - def __init__(self): - self.reset() + transform = transforms.Compose([transforms.ToTensor()]) + test_loader = torch.utils.data.DataLoader(dataset.listDataset(val_list, shuffle=False, transform=transform, train=False), batch_size=8) + mae = sum(abs(output.data.sum() - target.sum().float().cuda()) for img, target in test_loader) + mae /= len(test_loader) + print(f' * MAE {mae:.3f}') + return mae - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - def update(self, val, n=1): - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / self.count - if __name__ == '__main__': - main() \ No newline at end of file + main() + \ No newline at end of file diff --git a/utils.py b/utils.py index 859ad15e..6c900d06 100644 --- a/utils.py +++ b/utils.py @@ -2,17 +2,24 @@ import torch import shutil + def save_net(fname, net): with h5py.File(fname, 'w') as h5f: for k, v in net.state_dict().items(): h5f.create_dataset(k, data=v.cpu().numpy()) + + def load_net(fname, net): with h5py.File(fname, 'r') as h5f: - for k, v in net.state_dict().items(): - param = torch.from_numpy(np.asarray(h5f[k])) + for k, v in net.state_dict().items(): + param = torch.from_numpy(np.asarray(h5f[k])) v.copy_(param) - -def save_checkpoint(state, is_best,task_id, filename='checkpoint.pth.tar'): - torch.save(state, task_id+filename) + +def save_checkpoint(state, is_best, task_id, filename='checkpoint.pth.tar'): + """ + save the checkpoint and the best model. + """ + filepath = f"{task_id}{filename}" + torch.save(state, filepath) if is_best: - shutil.copyfile(task_id+filename, task_id+'model_best.pth.tar') \ No newline at end of file + shutil.copyfile(filepath, f"{task_id}model_best.pth.tar")