Merge pull request #8 from tomrunia/regression

UCF-101 dataset mean and normalization changes
tomrunia · Nov 14, 2018 · e6cc440 · e6cc440
2 parents 89f89d4 + d2a9797
commit e6cc440
Show file tree

Hide file tree

Showing 11 changed files with 169 additions and 76 deletions.
diff --git a/config.py b/config.py
@@ -14,7 +14,10 @@ def parse_opts():
     parser.add_argument('--dataset', type=str, required=True, help='Dataset string (kinetics | activitynet | ucf101 | blender)')
     parser.add_argument('--num_val_samples', type=int, default=1, help='Number of validation samples for each activity')
     parser.add_argument('--norm_value', default=255, type=int, help='Divide inputs by 255 or 1')
+    parser.add_argument('--no_dataset_mean', action='store_true', help='Dont use the dataset mean but normalize to zero mean')
+    parser.add_argument('--no_dataset_std', action='store_true', help='Dont use the dataset std but normalize to unity std')
     parser.add_argument('--num_classes', default=400, type=int, help= 'Number of classes (activitynet: 200, kinetics: 400, ucf101: 101, hmdb51: 51)')
+    parser.set_defaults(no_dataset_std=True)
 
     # Preprocessing pipeline
     parser.add_argument('--spatial_size', default=224, type=int, help='Height and width of inputs')
@@ -38,7 +41,8 @@ def parse_opts():
     parser.add_argument('--checkpoint_path', default='', type=str, help='Checkpoint file (.pth) of previous training')
     parser.add_argument('--finetune_num_classes', default=36, type=int, help='Number of classes for fine-tuning. num_classes is set to the number when pretraining.')
     parser.add_argument('--finetune_prefixes', default='logits,Mixed_5', type=str, help='Prefixes of layers to finetune, comma seperated (only used by I3D).')
-    parser.add_argument('--finetune_begin_index', default=0, type=int, help='Begin block index of fine-tuning (not used by I3D).')
+    parser.add_argument('--finetune_begin_index', default=4, type=int, help='Begin block index of fine-tuning (not used by I3D).')
+    parser.add_argument('--finetune_restore_optimizer', action='store_true', help='Whether to restore optimizer state')
 
     # Optimization
     parser.add_argument('--optimizer', default='adam', type=str, help='Which optimizer to use (SGD | adam | rmsprop)')
@@ -58,12 +62,13 @@ def parse_opts():
     parser.add_argument('--checkpoint_frequency', type=int, default=1, help='Save checkpoint after this number of epochs')
     parser.add_argument('--checkpoints_num_keep', type=int, default=5, help='Number of checkpoints to keep')
     parser.add_argument('--log_frequency', type=int, default=5, help='Logging frequency in number of steps')
+    parser.add_argument('--log_image_frequency', type=int, default=200, help='Logging images frequency in number of steps')
     parser.add_argument('--no_tensorboard', action='store_true', default=False, help='Disable the use of TensorboardX')
 
     # Misc
     parser.add_argument('--device', default='cuda:0', help='Device string cpu | cuda:0')
     parser.add_argument('--history_steps', default=25, type=int, help='History of running average meters')
-    parser.add_argument('--num_workers', default=4, type=int, help='Number of threads for multi-thread loading')
+    parser.add_argument('--num_workers', default=6, type=int, help='Number of threads for multi-thread loading')
     parser.add_argument('--no_eval', action='store_true', default=False, help='Disable evaluation')
 
     return parser.parse_args()
diff --git a/datasets/ucf101.py b/datasets/ucf101.py
@@ -11,14 +11,15 @@
 
 from utils.utils import load_value_file
 
+##########################################################################################
+##########################################################################################
 
 def pil_loader(path):
     # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
     with open(path, 'rb') as f:
         with Image.open(f) as img:
             return img.convert('RGB')
 
-
 def accimage_loader(path):
     try:
         import accimage
@@ -27,15 +28,13 @@ def accimage_loader(path):
         # Potentially a decoding problem, fall back to PIL.Image
         return pil_loader(path)
 
-
 def get_default_image_loader():
     from torchvision import get_image_backend
     if get_image_backend() == 'accimage':
         return accimage_loader
     else:
         return pil_loader
 
-
 def video_loader(video_dir_path, frame_indices, image_loader):
     video = []
     for i in frame_indices:
@@ -47,17 +46,14 @@ def video_loader(video_dir_path, frame_indices, image_loader):
 
     return video
 
-
 def get_default_video_loader():
     image_loader = get_default_image_loader()
     return functools.partial(video_loader, image_loader=image_loader)
 
-
 def load_annotation_data(data_file_path):
     with open(data_file_path, 'r') as data_file:
         return json.load(data_file)
 
-
 def get_class_labels(data):
     class_labels_map = {}
     index = 0
@@ -66,7 +62,6 @@ def get_class_labels(data):
         index += 1
     return class_labels_map
 
-
 def get_video_names_and_annotations(data, subset):
     video_names = []
     annotations = []
@@ -80,6 +75,8 @@ def get_video_names_and_annotations(data, subset):
 
     return video_names, annotations
 
+##########################################################################################
+##########################################################################################
 
 def make_dataset(root_path, annotation_path, subset, n_samples_for_each_video,
                  sample_duration):
@@ -143,8 +140,8 @@ def make_dataset(root_path, annotation_path, subset, n_samples_for_each_video,
 
     return dataset, idx_to_class
 
-############################################################################
-############################################################################
+##########################################################################################
+##########################################################################################
 
 class UCF101(data.Dataset):
     """

diff --git a/epoch_iterators.py b/epoch_iterators.py
@@ -45,7 +45,7 @@ def train_epoch(config, model, criterion, optimizer, device,
         optimizer.zero_grad()
 
         # Move inputs to GPU memory
-        clips   = clips.to(device)
+        clips = clips.to(device)
         targets = targets.to(device)
         if config.model == 'i3d':
             targets = torch.unsqueeze(targets, -1)
@@ -97,6 +97,16 @@ def train_epoch(config, model, criterion, optimizer, device,
             summary_writer.add_scalar('train/learning_rate', current_learning_rate(optimizer), global_step)
             summary_writer.add_scalar('train/weight_decay', current_weight_decay(optimizer), global_step)
 
+        if summary_writer and step % config.log_image_frequency == 0:
+            # TensorboardX video summary
+            for example_idx in range(4):
+                clip_for_display = clips[example_idx].clone().cpu()
+                min_val = float(clip_for_display.min())
+                max_val = float(clip_for_display.max())
+                clip_for_display.clamp_(min=min_val, max=max_val)
+                clip_for_display.add_(-min_val).div_(max_val - min_val + 1e-5)
+                summary_writer.add_video('train_clips/{:04d}'.format(example_idx), clip_for_display.unsqueeze(0), global_step)
+
     # Epoch statistics
     epoch_duration = float(time.time() - epoch_start_time)
     epoch_avg_loss = np.mean(losses)
@@ -159,6 +169,16 @@ def validation_epoch(config, model, criterion, device, data_loader, epoch, summa
                     step, steps_in_epoch, examples_per_second,
                     accuracies[step], losses[step]))
 
+        if summary_writer and step == 0:
+            # TensorboardX video summary
+            for example_idx in range(4):
+                clip_for_display = clips[example_idx].clone().cpu()
+                min_val = float(clip_for_display.min())
+                max_val = float(clip_for_display.max())
+                clip_for_display.clamp_(min=min_val, max=max_val)
+                clip_for_display.add_(-min_val).div_(max_val - min_val + 1e-5)
+                summary_writer.add_video('validation_clips/{:04d}'.format(example_idx), clip_for_display.unsqueeze(0), epoch*steps_in_epoch)
+
     # Epoch statistics
     epoch_duration = float(time.time() - epoch_start_time)
     epoch_avg_loss = np.mean(losses)

diff --git a/factory/data_factory.py b/factory/data_factory.py
@@ -209,8 +209,8 @@ def get_data_loaders(config, train_transforms, validation_transforms=None):
     if not config.no_eval and validation_transforms:
 
         dataset_validation = get_validation_set(
-            config, train_transforms['spatial'],
-            train_transforms['temporal'], train_transforms['target'])
+            config, validation_transforms['spatial'],
+            validation_transforms['temporal'], validation_transforms['target'])
 
         print('Found {} validation examples'.format(len(dataset_validation)))
 

diff --git a/factory/model_factory.py b/factory/model_factory.py
@@ -177,7 +177,9 @@ def get_model(config):
         print('Moving model to CUDA device...')
         # Move model to the GPU
         model = model.cuda()
-        #model = nn.DataParallel(model, device_ids=None)
+
+        if config.model != 'i3d':
+            model = nn.DataParallel(model, device_ids=None)
 
         if config.checkpoint_path:
 
@@ -194,6 +196,8 @@ def get_model(config):
 
             # Setup finetuning layer for different number of classes
             # Note: the DataParallel adds 'module' dict to complicate things...
+            print('Replacing model logits with {} output classes.'.format(config.finetune_num_classes))
+
             if config.model == 'i3d':
                 model.replace_logits(config.finetune_num_classes)
             elif config.model == 'densenet':
@@ -204,29 +208,12 @@ def get_model(config):
                 model.module.fc = model.module.fc.cuda()
 
             # Setup which layers to train
-            finetune_criterion = config.finetune_prefixes if config.model == 'i3d' else config.finetune_begin_index
+            assert config.model in ('i3d', 'resnet'), 'finetune params not implemented...'
+            finetune_criterion = config.finetune_prefixes if config.model in ('i3d', 'resnet') else config.finetune_begin_index
             parameters_to_train = get_fine_tuning_parameters(model, finetune_criterion)
 
             return model, parameters_to_train
     else:
-
-        if config.checkpoint_path:
-
-            print('Loading pretrained model {}'.format(config.checkpoint_path))
-            assert os.path.isfile(config.checkpoint_path)
-
-            checkpoint = torch.load(config.checkpoint_path)
-            model.load_state_dict(checkpoint['state_dict'])
-
-            if config.model == 'densenet':
-                model.classifier = nn.Linear(model.classifier.in_features, config.finetune_num_classes)
-            else:
-                model.fc = nn.Linear(model.fc.in_features, config.finetune_num_classes)
-
-            # Setup which layers to train
-            finetune_criterion = config.finetune_prefixes if config.model == 'i3d' else config.finetune_begin_index
-            parameters_to_train = get_fine_tuning_parameters(model, finetune_criterion)
-
-            return model, parameters_to_train
+        raise ValueError('CPU training not supported.')
 
     return model, model.parameters()
diff --git a/models/densenet.py b/models/densenet.py
@@ -197,6 +197,7 @@ def get_fine_tuning_parameters(model, ft_begin_index):
 
     assert isinstance(ft_begin_index, int)
     if ft_begin_index == 0:
+        print('WARNING: training full network because --finetune_begin_index=0')
         return model.parameters()
 
     ft_module_names = []
@@ -207,12 +208,20 @@ def get_fine_tuning_parameters(model, ft_begin_index):
     ft_module_names.append('classifier')
 
     parameters = []
+    param_names_to_finetune = []
+
     for k, v in model.named_parameters():
         for ft_module in ft_module_names:
             if ft_module in k:
                 parameters.append({'params': v})
+                param_names_to_finetune.append(k)
                 break
         else:
+            param_names_to_finetune.append(k)
             parameters.append({'params': v, 'lr': 0.0})
 
+    for k, v in model.named_parameters():
+        if k not in param_names_to_finetune:
+            v.requires_grad = False
+
     return parameters
diff --git a/models/i3d.py b/models/i3d.py
@@ -343,7 +343,6 @@ def trainable_params(self):
         return params
 
     def replace_logits(self, num_classes, device='cuda:0'):
-        print('Replacing I3D logits to {} output classes.'.format(num_classes))
         self._num_classes = num_classes
         self.layers['logits'] = Unit3D(
             in_channels=384+384+128+128, output_channels=num_classes,

diff --git a/models/resnet.py b/models/resnet.py
@@ -193,28 +193,82 @@ def forward(self, x):
 ##########################################################################################
 ##########################################################################################
 
-def get_fine_tuning_parameters(model, ft_begin_index):
 
-    assert isinstance(ft_begin_index, int)
-    if ft_begin_index == 0:
+def get_fine_tuning_parameters(model, ft_prefixes):
+
+    assert isinstance(ft_prefixes, str)
+
+    if ft_prefixes == '':
+        print('WARNING: training full network because --ft_predixes=None')
         return model.parameters()
 
-    ft_module_names = []
-    for i in range(ft_begin_index, 5):
-        ft_module_names.append('layer{}'.format(i))
-    ft_module_names.append('fc')
+    print('#'*60)
+    print('Setting finetuning layer prefixes: {}'.format(ft_prefixes))
 
+    ft_prefixes = ft_prefixes.split(',')
     parameters = []
-    for k, v in model.named_parameters():
-        for ft_module in ft_module_names:
-            if ft_module in k:
-                parameters.append({'params': v})
-                break
-        else:
-            parameters.append({'params': v, 'lr': 0.0})
+    param_names = []
+    for param_name, param in model.named_parameters():
+        for prefix in ft_prefixes:
+            if prefix in param_name:
+                print('  Finetuning parameter: {}'.format(param_name))
+                parameters.append({'params': param, 'name': param_name})
+                param_names.append(param_name)
+
+    for param_name, param in model.named_parameters():
+        if param_name not in param_names:
+            # This sames a lot of GPU memory...
+            print('disabling gradient for: {}'.format(param_name))
+            param.requires_grad = False
 
     return parameters
 
+
+
+# def get_fine_tuning_parameters(model, ft_begin_index):
+#
+#     assert isinstance(ft_begin_index, int)
+#     if ft_begin_index == 0:
+#         print('WARNING: training full network because --finetune_begin_index=0')
+#         return model.parameters()
+#
+#     for param_name, param in model.named_modules():
+#         print(param_name)
+#
+#
+#     ft_module_names = []
+#     for i in range(ft_begin_index, 5):
+#         ft_module_names.append('layer{}'.format(i))
+#     ft_module_names.append('fc')
+#
+#     print('Modules to finetune: {}'.format(ft_module_names))
+#
+#     parameters = []
+#     param_names_to_finetune = []
+#     for k, v in model.named_parameters():
+#         for ft_module in ft_module_names:
+#             if ft_module in k:
+#                 parameters.append({'params': v, 'name': k})
+#                 param_names_to_finetune.append(k)
+#                 break
+#         else:
+#             parameters.append({'params': v, 'lr': 0.0, 'name': k})
+#             param_names_to_finetune.append(k)
+#
+#     # Disabling gradients for frozen weights (hacky...)
+#     frozen_module_names = []
+#     for i in range(0, ft_begin_index):
+#         frozen_module_names.append('layer{}'.format(i))
+#     for k, v in model.named_parameters():
+#         for frozen_module in frozen_module_names:
+#             if frozen_module in k:
+#                 print('disabling grad for: {}'.format(k))
+#                 v.requires_grad = False
+#     model.module.conv1.requires_grad = False
+#     model.module.bn1.requires_grad = False
+#
+#     return parameters
+
 ##########################################################################################
 ##########################################################################################
 

diff --git a/models/resnext.py b/models/resnext.py
@@ -180,6 +180,7 @@ def get_fine_tuning_parameters(model, ft_begin_index):
 
     assert isinstance(ft_begin_index, int)
     if ft_begin_index == 0:
+        print('WARNING: training full network because --finetune_begin_index=0')
         return model.parameters()
 
     ft_module_names = []