Skip to content

Commit

Permalink
Merge pull request #8 from tomrunia/regression
Browse files Browse the repository at this point in the history
UCF-101 dataset mean and normalization changes
  • Loading branch information
tomrunia authored Nov 14, 2018
2 parents 89f89d4 + d2a9797 commit e6cc440
Show file tree
Hide file tree
Showing 11 changed files with 169 additions and 76 deletions.
9 changes: 7 additions & 2 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ def parse_opts():
parser.add_argument('--dataset', type=str, required=True, help='Dataset string (kinetics | activitynet | ucf101 | blender)')
parser.add_argument('--num_val_samples', type=int, default=1, help='Number of validation samples for each activity')
parser.add_argument('--norm_value', default=255, type=int, help='Divide inputs by 255 or 1')
parser.add_argument('--no_dataset_mean', action='store_true', help='Dont use the dataset mean but normalize to zero mean')
parser.add_argument('--no_dataset_std', action='store_true', help='Dont use the dataset std but normalize to unity std')
parser.add_argument('--num_classes', default=400, type=int, help= 'Number of classes (activitynet: 200, kinetics: 400, ucf101: 101, hmdb51: 51)')
parser.set_defaults(no_dataset_std=True)

# Preprocessing pipeline
parser.add_argument('--spatial_size', default=224, type=int, help='Height and width of inputs')
Expand All @@ -38,7 +41,8 @@ def parse_opts():
parser.add_argument('--checkpoint_path', default='', type=str, help='Checkpoint file (.pth) of previous training')
parser.add_argument('--finetune_num_classes', default=36, type=int, help='Number of classes for fine-tuning. num_classes is set to the number when pretraining.')
parser.add_argument('--finetune_prefixes', default='logits,Mixed_5', type=str, help='Prefixes of layers to finetune, comma seperated (only used by I3D).')
parser.add_argument('--finetune_begin_index', default=0, type=int, help='Begin block index of fine-tuning (not used by I3D).')
parser.add_argument('--finetune_begin_index', default=4, type=int, help='Begin block index of fine-tuning (not used by I3D).')
parser.add_argument('--finetune_restore_optimizer', action='store_true', help='Whether to restore optimizer state')

# Optimization
parser.add_argument('--optimizer', default='adam', type=str, help='Which optimizer to use (SGD | adam | rmsprop)')
Expand All @@ -58,12 +62,13 @@ def parse_opts():
parser.add_argument('--checkpoint_frequency', type=int, default=1, help='Save checkpoint after this number of epochs')
parser.add_argument('--checkpoints_num_keep', type=int, default=5, help='Number of checkpoints to keep')
parser.add_argument('--log_frequency', type=int, default=5, help='Logging frequency in number of steps')
parser.add_argument('--log_image_frequency', type=int, default=200, help='Logging images frequency in number of steps')
parser.add_argument('--no_tensorboard', action='store_true', default=False, help='Disable the use of TensorboardX')

# Misc
parser.add_argument('--device', default='cuda:0', help='Device string cpu | cuda:0')
parser.add_argument('--history_steps', default=25, type=int, help='History of running average meters')
parser.add_argument('--num_workers', default=4, type=int, help='Number of threads for multi-thread loading')
parser.add_argument('--num_workers', default=6, type=int, help='Number of threads for multi-thread loading')
parser.add_argument('--no_eval', action='store_true', default=False, help='Disable evaluation')

return parser.parse_args()
15 changes: 6 additions & 9 deletions datasets/ucf101.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@

from utils.utils import load_value_file

##########################################################################################
##########################################################################################

def pil_loader(path):
# open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
with open(path, 'rb') as f:
with Image.open(f) as img:
return img.convert('RGB')


def accimage_loader(path):
try:
import accimage
Expand All @@ -27,15 +28,13 @@ def accimage_loader(path):
# Potentially a decoding problem, fall back to PIL.Image
return pil_loader(path)


def get_default_image_loader():
from torchvision import get_image_backend
if get_image_backend() == 'accimage':
return accimage_loader
else:
return pil_loader


def video_loader(video_dir_path, frame_indices, image_loader):
video = []
for i in frame_indices:
Expand All @@ -47,17 +46,14 @@ def video_loader(video_dir_path, frame_indices, image_loader):

return video


def get_default_video_loader():
image_loader = get_default_image_loader()
return functools.partial(video_loader, image_loader=image_loader)


def load_annotation_data(data_file_path):
with open(data_file_path, 'r') as data_file:
return json.load(data_file)


def get_class_labels(data):
class_labels_map = {}
index = 0
Expand All @@ -66,7 +62,6 @@ def get_class_labels(data):
index += 1
return class_labels_map


def get_video_names_and_annotations(data, subset):
video_names = []
annotations = []
Expand All @@ -80,6 +75,8 @@ def get_video_names_and_annotations(data, subset):

return video_names, annotations

##########################################################################################
##########################################################################################

def make_dataset(root_path, annotation_path, subset, n_samples_for_each_video,
sample_duration):
Expand Down Expand Up @@ -143,8 +140,8 @@ def make_dataset(root_path, annotation_path, subset, n_samples_for_each_video,

return dataset, idx_to_class

############################################################################
############################################################################
##########################################################################################
##########################################################################################

class UCF101(data.Dataset):
"""
Expand Down
22 changes: 21 additions & 1 deletion epoch_iterators.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def train_epoch(config, model, criterion, optimizer, device,
optimizer.zero_grad()

# Move inputs to GPU memory
clips = clips.to(device)
clips = clips.to(device)
targets = targets.to(device)
if config.model == 'i3d':
targets = torch.unsqueeze(targets, -1)
Expand Down Expand Up @@ -97,6 +97,16 @@ def train_epoch(config, model, criterion, optimizer, device,
summary_writer.add_scalar('train/learning_rate', current_learning_rate(optimizer), global_step)
summary_writer.add_scalar('train/weight_decay', current_weight_decay(optimizer), global_step)

if summary_writer and step % config.log_image_frequency == 0:
# TensorboardX video summary
for example_idx in range(4):
clip_for_display = clips[example_idx].clone().cpu()
min_val = float(clip_for_display.min())
max_val = float(clip_for_display.max())
clip_for_display.clamp_(min=min_val, max=max_val)
clip_for_display.add_(-min_val).div_(max_val - min_val + 1e-5)
summary_writer.add_video('train_clips/{:04d}'.format(example_idx), clip_for_display.unsqueeze(0), global_step)

# Epoch statistics
epoch_duration = float(time.time() - epoch_start_time)
epoch_avg_loss = np.mean(losses)
Expand Down Expand Up @@ -159,6 +169,16 @@ def validation_epoch(config, model, criterion, device, data_loader, epoch, summa
step, steps_in_epoch, examples_per_second,
accuracies[step], losses[step]))

if summary_writer and step == 0:
# TensorboardX video summary
for example_idx in range(4):
clip_for_display = clips[example_idx].clone().cpu()
min_val = float(clip_for_display.min())
max_val = float(clip_for_display.max())
clip_for_display.clamp_(min=min_val, max=max_val)
clip_for_display.add_(-min_val).div_(max_val - min_val + 1e-5)
summary_writer.add_video('validation_clips/{:04d}'.format(example_idx), clip_for_display.unsqueeze(0), epoch*steps_in_epoch)

# Epoch statistics
epoch_duration = float(time.time() - epoch_start_time)
epoch_avg_loss = np.mean(losses)
Expand Down
4 changes: 2 additions & 2 deletions factory/data_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ def get_data_loaders(config, train_transforms, validation_transforms=None):
if not config.no_eval and validation_transforms:

dataset_validation = get_validation_set(
config, train_transforms['spatial'],
train_transforms['temporal'], train_transforms['target'])
config, validation_transforms['spatial'],
validation_transforms['temporal'], validation_transforms['target'])

print('Found {} validation examples'.format(len(dataset_validation)))

Expand Down
29 changes: 8 additions & 21 deletions factory/model_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,9 @@ def get_model(config):
print('Moving model to CUDA device...')
# Move model to the GPU
model = model.cuda()
#model = nn.DataParallel(model, device_ids=None)

if config.model != 'i3d':
model = nn.DataParallel(model, device_ids=None)

if config.checkpoint_path:

Expand All @@ -194,6 +196,8 @@ def get_model(config):

# Setup finetuning layer for different number of classes
# Note: the DataParallel adds 'module' dict to complicate things...
print('Replacing model logits with {} output classes.'.format(config.finetune_num_classes))

if config.model == 'i3d':
model.replace_logits(config.finetune_num_classes)
elif config.model == 'densenet':
Expand All @@ -204,29 +208,12 @@ def get_model(config):
model.module.fc = model.module.fc.cuda()

# Setup which layers to train
finetune_criterion = config.finetune_prefixes if config.model == 'i3d' else config.finetune_begin_index
assert config.model in ('i3d', 'resnet'), 'finetune params not implemented...'
finetune_criterion = config.finetune_prefixes if config.model in ('i3d', 'resnet') else config.finetune_begin_index
parameters_to_train = get_fine_tuning_parameters(model, finetune_criterion)

return model, parameters_to_train
else:

if config.checkpoint_path:

print('Loading pretrained model {}'.format(config.checkpoint_path))
assert os.path.isfile(config.checkpoint_path)

checkpoint = torch.load(config.checkpoint_path)
model.load_state_dict(checkpoint['state_dict'])

if config.model == 'densenet':
model.classifier = nn.Linear(model.classifier.in_features, config.finetune_num_classes)
else:
model.fc = nn.Linear(model.fc.in_features, config.finetune_num_classes)

# Setup which layers to train
finetune_criterion = config.finetune_prefixes if config.model == 'i3d' else config.finetune_begin_index
parameters_to_train = get_fine_tuning_parameters(model, finetune_criterion)

return model, parameters_to_train
raise ValueError('CPU training not supported.')

return model, model.parameters()
9 changes: 9 additions & 0 deletions models/densenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ def get_fine_tuning_parameters(model, ft_begin_index):

assert isinstance(ft_begin_index, int)
if ft_begin_index == 0:
print('WARNING: training full network because --finetune_begin_index=0')
return model.parameters()

ft_module_names = []
Expand All @@ -207,12 +208,20 @@ def get_fine_tuning_parameters(model, ft_begin_index):
ft_module_names.append('classifier')

parameters = []
param_names_to_finetune = []

for k, v in model.named_parameters():
for ft_module in ft_module_names:
if ft_module in k:
parameters.append({'params': v})
param_names_to_finetune.append(k)
break
else:
param_names_to_finetune.append(k)
parameters.append({'params': v, 'lr': 0.0})

for k, v in model.named_parameters():
if k not in param_names_to_finetune:
v.requires_grad = False

return parameters
1 change: 0 additions & 1 deletion models/i3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,6 @@ def trainable_params(self):
return params

def replace_logits(self, num_classes, device='cuda:0'):
print('Replacing I3D logits to {} output classes.'.format(num_classes))
self._num_classes = num_classes
self.layers['logits'] = Unit3D(
in_channels=384+384+128+128, output_channels=num_classes,
Expand Down
82 changes: 68 additions & 14 deletions models/resnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,28 +193,82 @@ def forward(self, x):
##########################################################################################
##########################################################################################

def get_fine_tuning_parameters(model, ft_begin_index):

assert isinstance(ft_begin_index, int)
if ft_begin_index == 0:
def get_fine_tuning_parameters(model, ft_prefixes):

assert isinstance(ft_prefixes, str)

if ft_prefixes == '':
print('WARNING: training full network because --ft_predixes=None')
return model.parameters()

ft_module_names = []
for i in range(ft_begin_index, 5):
ft_module_names.append('layer{}'.format(i))
ft_module_names.append('fc')
print('#'*60)
print('Setting finetuning layer prefixes: {}'.format(ft_prefixes))

ft_prefixes = ft_prefixes.split(',')
parameters = []
for k, v in model.named_parameters():
for ft_module in ft_module_names:
if ft_module in k:
parameters.append({'params': v})
break
else:
parameters.append({'params': v, 'lr': 0.0})
param_names = []
for param_name, param in model.named_parameters():
for prefix in ft_prefixes:
if prefix in param_name:
print(' Finetuning parameter: {}'.format(param_name))
parameters.append({'params': param, 'name': param_name})
param_names.append(param_name)

for param_name, param in model.named_parameters():
if param_name not in param_names:
# This sames a lot of GPU memory...
print('disabling gradient for: {}'.format(param_name))
param.requires_grad = False

return parameters



# def get_fine_tuning_parameters(model, ft_begin_index):
#
# assert isinstance(ft_begin_index, int)
# if ft_begin_index == 0:
# print('WARNING: training full network because --finetune_begin_index=0')
# return model.parameters()
#
# for param_name, param in model.named_modules():
# print(param_name)
#
#
# ft_module_names = []
# for i in range(ft_begin_index, 5):
# ft_module_names.append('layer{}'.format(i))
# ft_module_names.append('fc')
#
# print('Modules to finetune: {}'.format(ft_module_names))
#
# parameters = []
# param_names_to_finetune = []
# for k, v in model.named_parameters():
# for ft_module in ft_module_names:
# if ft_module in k:
# parameters.append({'params': v, 'name': k})
# param_names_to_finetune.append(k)
# break
# else:
# parameters.append({'params': v, 'lr': 0.0, 'name': k})
# param_names_to_finetune.append(k)
#
# # Disabling gradients for frozen weights (hacky...)
# frozen_module_names = []
# for i in range(0, ft_begin_index):
# frozen_module_names.append('layer{}'.format(i))
# for k, v in model.named_parameters():
# for frozen_module in frozen_module_names:
# if frozen_module in k:
# print('disabling grad for: {}'.format(k))
# v.requires_grad = False
# model.module.conv1.requires_grad = False
# model.module.bn1.requires_grad = False
#
# return parameters

##########################################################################################
##########################################################################################

Expand Down
1 change: 1 addition & 0 deletions models/resnext.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ def get_fine_tuning_parameters(model, ft_begin_index):

assert isinstance(ft_begin_index, int)
if ft_begin_index == 0:
print('WARNING: training full network because --finetune_begin_index=0')
return model.parameters()

ft_module_names = []
Expand Down
Loading

0 comments on commit e6cc440

Please sign in to comment.