diff --git a/object-locator/data.py b/object-locator/data.py index 861266b..ed6b265 100644 --- a/object-locator/data.py +++ b/object-locator/data.py @@ -354,7 +354,8 @@ def __init__(self, img_abspath = os.path.join(self.root_dir, filename) orig_width, orig_height = \ get_image_size.get_image_size(img_abspath) - self.dict[filename] = {'count': count, + self.dict[filename] = {'filename': filename, + 'count': count, 'locations': locations, 'orig_width': orig_width, 'orig_height': orig_height} diff --git a/object-locator/locate.py b/object-locator/locate.py index 14c7ebc..8f60908 100644 --- a/object-locator/locate.py +++ b/object-locator/locate.py @@ -22,8 +22,10 @@ import torchvision as tv from torchvision.models import inception_v3 from sklearn import mixture -from .data import CSVDataset +import skimage.transform +from .data import XMLDataset from .data import csv_collator +from .data import ScaleImageAndLabel from . import losses from . import argparser @@ -41,17 +43,19 @@ torch.cuda.manual_seed_all(args.seed) # Create output directories -os.makedirs(os.path.join(args.out_dir, 'painted'), exist_ok=True) os.makedirs(os.path.join(args.out_dir, 'est_map'), exist_ok=True) os.makedirs(os.path.join(args.out_dir, 'est_map_thresholded'), exist_ok=True) +if args.paint: + os.makedirs(os.path.join(args.out_dir, 'painted'), exist_ok=True) # Tensor type to use, select CUDA or not tensortype = torch.cuda.FloatTensor if args.cuda else torch.FloatTensor tensortype_cpu = torch.FloatTensor # Data loading code -testset = CSVDataset(args.dataset, +testset = XMLDataset(args.dataset, transforms=transforms.Compose([ + ScaleImageAndLabel(size=(args.height, args.width)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), @@ -63,6 +67,9 @@ num_workers=args.nThreads, collate_fn=csv_collator) +# Array with [height, width] of the new size +resized_size = np.array([args.height, args.width]) + # Loss function l1_loss = nn.L1Loss(reduce=False) mse_loss = nn.MSELoss(reduce=False) @@ -144,24 +151,37 @@ target_locations = [dictt['locations'] for dictt in dictionaries] target_count = torch.stack([dictt['count'] for dictt in dictionaries]) - # Prepare targets target_locations = [Variable(t.type(tensortype), volatile=True) for t in target_locations] target_count = Variable(target_count.type(tensortype), volatile=True) + # Original size + target_orig_heights = [dictt['orig_height'] for dictt in dictionaries] + target_orig_widths = [dictt['orig_width'] for dictt in dictionaries] + target_orig_heights = tensortype(target_orig_heights) + target_orig_widths = tensortype(target_orig_widths) + target_orig_sizes = torch.stack( + (target_orig_heights, target_orig_widths)).transpose(0, 1) + origsize = (dictionaries[0]['orig_height'], + dictionaries[0]['orig_width']) + # Feed forward est_map, est_count = model.forward(imgs) # Save estimated map to disk - tv.utils.save_image(est_map.data[0, :, :], - os.path.join(args.out_dir, - 'est_map', - dictionaries[0]['filename'])) + est_map_numpy = est_map.data[0, :, :].cpu().numpy() + est_map_numpy_origsize = \ + skimage.transform.resize(est_map_numpy, + output_shape=origsize, + mode='constant') + cv2.imwrite(os.path.join(args.out_dir, + 'est_map', + dictionaries[0]['filename']), + est_map_numpy_origsize) # The estimated map must be thresholded to obtain estimated points - est_map_numpy = est_map.data[0, :, :].cpu().numpy() - mask = cv2.inRange(est_map_numpy, 2 / 255, 1) + mask = cv2.inRange(est_map_numpy_origsize, 2 / 255, 1) coord = np.where(mask > 0) y = coord[0].reshape((-1, 1)) x = coord[1].reshape((-1, 1)) @@ -188,11 +208,12 @@ # Paint red dots if user asked for it if args.paint: # Paint a circle in the original image at the estimated location - image_with_x = tensortype(imgs.data[0, :, :].squeeze().size()).\ - copy_(imgs.data[0, :, :].squeeze()) + image_with_x = np.moveaxis(imgs.data[0].cpu().numpy(), 0, 2).copy() + image_with_x = \ + skimage.transform.resize(image_with_x, + output_shape=origsize, + mode='constant') image_with_x = ((image_with_x + 1) / 2.0 * 255.0) - image_with_x = image_with_x.cpu().numpy() - image_with_x = np.moveaxis(image_with_x, 0, 2).copy() for y, x in centroids: image_with_x = cv2.circle(image_with_x, (x, y), 3, [255, 0, 0], -1) # Save original image with circle to disk @@ -223,14 +244,18 @@ # Evaluation using the Averaged Hausdorff Distance target_locations = \ target_locations[0].data.cpu().numpy().reshape(-1, 2) + norm_factor = target_orig_sizes[0].unsqueeze(0).cpu().numpy() \ + / resized_size + norm_factor = norm_factor.repeat(len(target_locations), axis=0) + target_locations_wrt_orig = norm_factor*target_locations ahd = losses.averaged_hausdorff_distance(centroids, - target_locations) + target_locations_wrt_orig) sum_ahd += ahd # Validation using Precision and Recall for judge in judges: - judge.evaluate_sample(centroids, target_locations) + judge.evaluate_sample(centroids, target_locations_wrt_orig) df = pd.DataFrame(data=[est_count.data[0, 0]], index=[dictionaries[0]['filename']], diff --git a/object-locator/train.py b/object-locator/train.py index 6367135..82e700c 100644 --- a/object-locator/train.py +++ b/object-locator/train.py @@ -19,6 +19,7 @@ from torchvision import transforms from torch.utils.data import DataLoader from sklearn import mixture +import skimage.transform from . import losses from .models import unet_model @@ -199,9 +200,18 @@ 'Term3*%s' % args.lambdaa, 'Sum/3']) - # Send input and output images (first one in the batch) - log.image(imgs=[((imgs[0, :, :].data + 1) / 2.0 * 255.0).squeeze().cpu().numpy(), - est_map[0, :, :].data.unsqueeze(0).cpu().numpy()], + # Send input and output images (first one in the batch). + # Resize to original size + orig_shape = target_orig_sizes[0].data.cpu().numpy().tolist() + orig_img_origsize = ((skimage.transform.resize(imgs[0].data.squeeze().cpu().numpy().transpose((1, 2, 0)), + output_shape=orig_shape, + mode='constant') + 1) / 2.0 * 255.0).\ + astype(np.float32).transpose((2, 0, 1)) + est_map_origsize = skimage.transform.resize(est_map[0].data.unsqueeze(0).cpu().numpy().transpose((1, 2, 0)), + output_shape=orig_shape, + mode='constant').\ + astype(np.float32).transpose((2, 0, 1)) + log.image(imgs=[orig_img_origsize, est_map_origsize], titles=['(Training) Input', '(Training) U-Net output'], windows=[1, 2]) @@ -335,8 +345,18 @@ if time.time() > tic_val + args.log_interval: tic_val = time.time() - log.image(imgs=[((imgs.data[0, :, :] + 1) / 2.0 * 255.0).squeeze().cpu().numpy(), - est_map[0, :, :].data.unsqueeze(0).cpu().numpy()], + # Send input and output images (first one in the batch). + # Resize to original size + orig_shape = target_orig_sizes[0].data.cpu().numpy().tolist() + orig_img_origsize = ((skimage.transform.resize(imgs[0].data.squeeze().cpu().numpy().transpose((1, 2, 0)), + output_shape=orig_shape, + mode='constant') + 1) / 2.0 * 255.0).\ + astype(np.float32).transpose((2, 0, 1)) + est_map_origsize = skimage.transform.resize(est_map[0].data.unsqueeze(0).cpu().numpy().transpose((1, 2, 0)), + output_shape=orig_shape, + mode='constant').\ + astype(np.float32).transpose((2, 0, 1)) + log.image(imgs=[orig_img_origsize, est_map_origsize], titles=['(Validation) Input', '(Validation) U-Net output'], windows=[5, 6])