save and compute metrics using original image size

Former-commit-id: 02de9dfdc3dc0758ddfc3eeebe8b36b437058568
javiribera · Apr 2, 2018 · 55dffe4 · 55dffe4
1 parent 2ed82ff
commit 55dffe4
Show file tree

Hide file tree

Showing 3 changed files with 68 additions and 22 deletions.
diff --git a/object-locator/data.py b/object-locator/data.py
@@ -354,7 +354,8 @@ def __init__(self,
                         img_abspath = os.path.join(self.root_dir, filename)
                         orig_width, orig_height = \
                             get_image_size.get_image_size(img_abspath)
-                        self.dict[filename] = {'count': count,
+                        self.dict[filename] = {'filename': filename,
+                                               'count': count,
                                                'locations': locations,
                                                'orig_width': orig_width,
                                                'orig_height': orig_height}

diff --git a/object-locator/locate.py b/object-locator/locate.py
@@ -22,8 +22,10 @@
 import torchvision as tv
 from torchvision.models import inception_v3
 from sklearn import mixture
-from .data import CSVDataset
+import skimage.transform
+from .data import XMLDataset
 from .data import csv_collator
+from .data import ScaleImageAndLabel
 
 from . import losses
 from . import argparser
@@ -41,17 +43,19 @@
     torch.cuda.manual_seed_all(args.seed)
 
 # Create output directories
-os.makedirs(os.path.join(args.out_dir, 'painted'), exist_ok=True)
 os.makedirs(os.path.join(args.out_dir, 'est_map'), exist_ok=True)
 os.makedirs(os.path.join(args.out_dir, 'est_map_thresholded'), exist_ok=True)
+if args.paint:
+    os.makedirs(os.path.join(args.out_dir, 'painted'), exist_ok=True)
 
 # Tensor type to use, select CUDA or not
 tensortype = torch.cuda.FloatTensor if args.cuda else torch.FloatTensor
 tensortype_cpu = torch.FloatTensor
 
 # Data loading code
-testset = CSVDataset(args.dataset,
+testset = XMLDataset(args.dataset,
                      transforms=transforms.Compose([
+                         ScaleImageAndLabel(size=(args.height, args.width)),
                          transforms.ToTensor(),
                          transforms.Normalize((0.5, 0.5, 0.5),
                                               (0.5, 0.5, 0.5)),
@@ -63,6 +67,9 @@
                                  num_workers=args.nThreads,
                                  collate_fn=csv_collator)
 
+# Array with [height, width] of the new size
+resized_size = np.array([args.height, args.width])
+
 # Loss function
 l1_loss = nn.L1Loss(reduce=False)
 mse_loss = nn.MSELoss(reduce=False)
@@ -144,24 +151,37 @@
         target_locations = [dictt['locations'] for dictt in dictionaries]
         target_count = torch.stack([dictt['count']
                                     for dictt in dictionaries])
-
         # Prepare targets
         target_locations = [Variable(t.type(tensortype), volatile=True)
                             for t in target_locations]
         target_count = Variable(target_count.type(tensortype), volatile=True)
 
+    # Original size
+    target_orig_heights = [dictt['orig_height'] for dictt in dictionaries]
+    target_orig_widths = [dictt['orig_width'] for dictt in dictionaries]
+    target_orig_heights = tensortype(target_orig_heights)
+    target_orig_widths = tensortype(target_orig_widths)
+    target_orig_sizes = torch.stack(
+        (target_orig_heights, target_orig_widths)).transpose(0, 1)
+    origsize = (dictionaries[0]['orig_height'],
+                dictionaries[0]['orig_width'])
+
     # Feed forward
     est_map, est_count = model.forward(imgs)
 
     # Save estimated map to disk
-    tv.utils.save_image(est_map.data[0, :, :],
-                        os.path.join(args.out_dir,
-                                     'est_map',
-                                     dictionaries[0]['filename']))
+    est_map_numpy = est_map.data[0, :, :].cpu().numpy()
+    est_map_numpy_origsize = \
+        skimage.transform.resize(est_map_numpy,
+                                 output_shape=origsize,
+                                 mode='constant')
+    cv2.imwrite(os.path.join(args.out_dir,
+                             'est_map',
+                             dictionaries[0]['filename']),
+                est_map_numpy_origsize)
 
     # The estimated map must be thresholded to obtain estimated points
-    est_map_numpy = est_map.data[0, :, :].cpu().numpy()
-    mask = cv2.inRange(est_map_numpy, 2 / 255, 1)
+    mask = cv2.inRange(est_map_numpy_origsize, 2 / 255, 1)
     coord = np.where(mask > 0)
     y = coord[0].reshape((-1, 1))
     x = coord[1].reshape((-1, 1))
@@ -188,11 +208,12 @@
     # Paint red dots if user asked for it
     if args.paint:
         # Paint a circle in the original image at the estimated location
-        image_with_x = tensortype(imgs.data[0, :, :].squeeze().size()).\
-            copy_(imgs.data[0, :, :].squeeze())
+        image_with_x = np.moveaxis(imgs.data[0].cpu().numpy(), 0, 2).copy()
+        image_with_x = \
+            skimage.transform.resize(image_with_x,
+                                     output_shape=origsize,
+                                     mode='constant')
         image_with_x = ((image_with_x + 1) / 2.0 * 255.0)
-        image_with_x = image_with_x.cpu().numpy()
-        image_with_x = np.moveaxis(image_with_x, 0, 2).copy()
         for y, x in centroids:
             image_with_x = cv2.circle(image_with_x, (x, y), 3, [255, 0, 0], -1)
         # Save original image with circle to disk
@@ -223,14 +244,18 @@
         # Evaluation using the Averaged Hausdorff Distance
         target_locations = \
             target_locations[0].data.cpu().numpy().reshape(-1, 2)
+        norm_factor = target_orig_sizes[0].unsqueeze(0).cpu().numpy() \
+            / resized_size
+        norm_factor = norm_factor.repeat(len(target_locations), axis=0)
+        target_locations_wrt_orig = norm_factor*target_locations
         ahd = losses.averaged_hausdorff_distance(centroids,
-                                                 target_locations)
+                                                 target_locations_wrt_orig)
 
         sum_ahd += ahd
 
         # Validation using Precision and Recall
         for judge in judges:
-            judge.evaluate_sample(centroids, target_locations)
+            judge.evaluate_sample(centroids, target_locations_wrt_orig)
 
     df = pd.DataFrame(data=[est_count.data[0, 0]],
                       index=[dictionaries[0]['filename']],

diff --git a/object-locator/train.py b/object-locator/train.py
@@ -19,6 +19,7 @@
 from torchvision import transforms
 from torch.utils.data import DataLoader
 from sklearn import mixture
+import skimage.transform
 
 from . import losses
 from .models import unet_model
@@ -199,9 +200,18 @@
                                             'Term3*%s' % args.lambdaa,
                                             'Sum/3'])
 
-            # Send input and output images (first one in the batch)
-            log.image(imgs=[((imgs[0, :, :].data + 1) / 2.0 * 255.0).squeeze().cpu().numpy(),
-                            est_map[0, :, :].data.unsqueeze(0).cpu().numpy()],
+            # Send input and output images (first one in the batch).
+            # Resize to original size
+            orig_shape = target_orig_sizes[0].data.cpu().numpy().tolist()
+            orig_img_origsize = ((skimage.transform.resize(imgs[0].data.squeeze().cpu().numpy().transpose((1, 2, 0)),
+                                                           output_shape=orig_shape,
+                                                           mode='constant') + 1) / 2.0 * 255.0).\
+                astype(np.float32).transpose((2, 0, 1))
+            est_map_origsize = skimage.transform.resize(est_map[0].data.unsqueeze(0).cpu().numpy().transpose((1, 2, 0)),
+                                                        output_shape=orig_shape,
+                                                        mode='constant').\
+                astype(np.float32).transpose((2, 0, 1))
+            log.image(imgs=[orig_img_origsize, est_map_origsize],
                       titles=['(Training) Input',
                               '(Training) U-Net output'],
                       windows=[1, 2])
@@ -335,8 +345,18 @@
         if time.time() > tic_val + args.log_interval:
             tic_val = time.time()
 
-            log.image(imgs=[((imgs.data[0, :, :] + 1) / 2.0 * 255.0).squeeze().cpu().numpy(),
-                            est_map[0, :, :].data.unsqueeze(0).cpu().numpy()],
+            # Send input and output images (first one in the batch).
+            # Resize to original size
+            orig_shape = target_orig_sizes[0].data.cpu().numpy().tolist()
+            orig_img_origsize = ((skimage.transform.resize(imgs[0].data.squeeze().cpu().numpy().transpose((1, 2, 0)),
+                                                           output_shape=orig_shape,
+                                                           mode='constant') + 1) / 2.0 * 255.0).\
+                astype(np.float32).transpose((2, 0, 1))
+            est_map_origsize = skimage.transform.resize(est_map[0].data.unsqueeze(0).cpu().numpy().transpose((1, 2, 0)),
+                                                        output_shape=orig_shape,
+                                                        mode='constant').\
+                astype(np.float32).transpose((2, 0, 1))
+            log.image(imgs=[orig_img_origsize, est_map_origsize],
                       titles=['(Validation) Input',
                               '(Validation) U-Net output'],
                       windows=[5, 6])