diff --git a/main.py b/main.py index 3121377..23922ce 100644 --- a/main.py +++ b/main.py @@ -4,6 +4,7 @@ import sys import string import time +import json import torch from torch.backends import cudnn @@ -12,49 +13,63 @@ from torch import nn from torch.utils.data import DataLoader -from torch.optim.lr_scheduler import StepLR +from torchvision.transforms import InterpolationMode from reid import datasets from reid.models import resmap -from reid.pretrainer import PreTrainer +from reid.models.qaconv import QAConv from reid.trainers import Trainer from reid.evaluators import Evaluator from reid.utils.data import transforms as T from reid.utils.data.preprocessor import Preprocessor from reid.utils.logging import Logger from reid.utils.serialization import load_checkpoint, save_checkpoint -from reid.models.qaconv import QAConv -from reid.loss.class_memory_loss import ClassMemoryLoss +from reid.utils.data.graph_sampler import GraphSampler +from reid.loss.triplet_loss import TripletLoss -def get_data(dataname, data_dir, height, width, batch_size, combine_all=False, workers=8, test_batch=64): + +def get_data(dataname, data_dir, model, matcher, save_path, args): root = osp.join(data_dir, dataname) - dataset = datasets.create(dataname, root, combine_all=combine_all) + dataset = datasets.create(dataname, root, combine_all=args.combine_all) num_classes = dataset.num_train_ids + train_transformer = T.Compose([ + T.Resize((args.height, args.width), interpolation=InterpolationMode.BICUBIC), + T.Pad(10), + T.RandomCrop((args.height, args.width)), + T.RandomHorizontalFlip(0.5), + T.RandomRotation(5), + T.ColorJitter(brightness=(0.5, 2.0), contrast=(0.5, 2.0), saturation=(0.5, 2.0), hue=(-0.1, 0.1)), + T.RandomOcclusion(args.min_size, args.max_size), + T.ToTensor(), + ]) + test_transformer = T.Compose([ - T.Resize((height, width), interpolation=3), + T.Resize((args.height, args.width), interpolation=InterpolationMode.BICUBIC), T.ToTensor(), ]) + train_path = osp.join(dataset.images_dir, dataset.train_path) train_loader = DataLoader( - Preprocessor(dataset.train, root=osp.join(dataset.images_dir, dataset.train_path), - transform=test_transformer), - batch_size=batch_size, num_workers=workers, - shuffle=True, pin_memory=True, drop_last=True) + Preprocessor(dataset.train, root=train_path, transform=train_transformer), + batch_size=args.batch_size, num_workers=args.workers, + sampler=GraphSampler(dataset.train, train_path, test_transformer, model, matcher, args.batch_size, args.num_instance, + args.test_gal_batch, args.test_prob_batch, save_path, args.gs_verbose), + pin_memory=True) query_loader = DataLoader( Preprocessor(dataset.query, root=osp.join(dataset.images_dir, dataset.query_path), transform=test_transformer), - batch_size=test_batch, num_workers=workers, + batch_size=args.test_fea_batch, num_workers=args.workers, shuffle=False, pin_memory=True) gallery_loader = DataLoader( Preprocessor(dataset.gallery, root=osp.join(dataset.images_dir, dataset.gallery_path), transform=test_transformer), - batch_size=test_batch, num_workers=workers, + batch_size=args.test_fea_batch, num_workers=args.workers, shuffle=False, pin_memory=True) return dataset, num_classes, train_loader, query_loader, gallery_loader @@ -95,17 +110,15 @@ def main(args): # Redirect print to both console and log file sys.stdout = Logger(log_file) - # Create data loaders - dataset, num_classes, train_loader, _, _ = \ - get_data(args.dataset, args.data_dir, args.height, args.width, args.batch_size, args.combine_all, - args.workers, args.test_fea_batch) - # Create model - model = resmap.create(args.arch, ibn_type=args.ibn, final_layer=args.final_layer, neck=args.neck).cuda() + ibn_type = args.ibn + if ibn_type == 'none': + ibn_type = None + model = resmap.create(args.arch, ibn_type=ibn_type, final_layer=args.final_layer, neck=args.neck).cuda() num_features = model.num_features # print(model) # print('\n') - + feamap_factor = {'layer2': 8, 'layer3': 16, 'layer4': 32} hei = args.height // feamap_factor[args.final_layer] wid = args.width // feamap_factor[args.final_layer] @@ -116,7 +129,7 @@ def main(args): print('\n') # Criterion - criterion = ClassMemoryLoss(matcher, num_classes, num_features, hei, wid, args.mem_batch_size).cuda() + criterion = TripletLoss(matcher, args.margin).cuda() # Optimizer base_param_ids = set(map(id, model.base.parameters())) @@ -125,12 +138,15 @@ def main(args): param_groups = [ {'params': model.base.parameters(), 'lr': 0.1 * args.lr}, {'params': new_params, 'lr': args.lr}, - {'params': criterion.parameters(), 'lr': args.lr}] + {'params': matcher.parameters(), 'lr': args.lr}] optimizer = torch.optim.SGD(param_groups, lr=args.lr, momentum=0.9, weight_decay=5e-4, nesterov=True) # Load from checkpoint start_epoch = 0 + base_loss = None + final_epochs = args.max_epochs + lr_stepped = False if args.resume or args.evaluate: print('Loading checkpoint...') @@ -142,64 +158,78 @@ def main(args): criterion.load_state_dict(checkpoint['criterion']) optimizer.load_state_dict(checkpoint['optim']) start_epoch = checkpoint['epoch'] - print("=> Start epoch {} ".format(start_epoch)) - elif args.pre_epochs > 0: - pre_tr = PreTrainer(model, criterion, optimizer, train_loader, args.pre_epochs, args.max_steps, args.num_trials) - result_file = osp.join(exp_database_dir, args.method, 'pretrain_metric.txt') - model, criterion, optimizer = pre_tr.train(result_file, args.method, args.sub_method) + base_loss = checkpoint['base_loss'] + final_epochs = checkpoint['final_epochs'] + lr_stepped = checkpoint['lr_stepped'] - # Decay LR by a factor of 0.1 every step_size epochs - lr_scheduler = StepLR(optimizer, step_size=args.step_size, gamma=0.1, last_epoch=start_epoch-1) + if lr_stepped: + print('Decay the learning rate by a factor of 0.1.') + for group in optimizer.param_groups: + group['lr'] *= 0.1 + + print("=> Start epoch {} ".format(start_epoch)) model = nn.DataParallel(model).cuda() - criterion = nn.DataParallel(criterion).cuda() - enhance_data_aug = False + # Create data loaders + save_path = None + if args.gs_save: + save_path = output_dir + dataset, num_classes, train_loader, _, _ = get_data(args.dataset, args.data_dir, model, matcher, save_path, args) if not args.evaluate: # Trainer - trainer = Trainer(model, criterion) - + trainer = Trainer(model, criterion, args.clip_value) t0 = time.time() + # Start training - for epoch in range(start_epoch, args.epochs): + for epoch in range(start_epoch, args.max_epochs): loss, acc = trainer.train(epoch, train_loader, optimizer) + if epoch == 1: + base_loss = loss + lr = list(map(lambda group: group['lr'], optimizer.param_groups)) - lr_scheduler.step() + train_time = time.time() - t0 + epoch1 = epoch + 1 print( '* Finished epoch %d at lr=[%g, %g, %g]. Loss: %.3f. Acc: %.2f%%. Training time: %.0f seconds. \n' - % (epoch + 1, lr[0], lr[1], lr[2], loss, acc * 100, train_time)) + % (epoch1, lr[0], lr[1], lr[2], loss, acc * 100, train_time)) + + if (not lr_stepped) and (base_loss is not None) and (loss < base_loss * args.step_factor): + lr_stepped = True + final_epochs = min(args.max_epochs, epoch1 + epoch1 // 2) + print('Decay the learning rate by a factor of 0.1. Final epochs: %d.\n' % final_epochs) + for group in optimizer.param_groups: + group['lr'] *= 0.1 save_checkpoint({ 'model': model.module.state_dict(), - 'criterion': criterion.module.state_dict(), + 'criterion': criterion.state_dict(), 'optim': optimizer.state_dict(), - 'epoch': epoch + 1, + 'epoch': epoch1, + 'final_epochs': final_epochs, + 'base_loss': base_loss, + 'lr_stepped': lr_stepped, }, fpath=osp.join(output_dir, 'checkpoint.pth.tar')) + + if epoch1 == final_epochs: + print('The learning converges at epoch %d.\n' % epoch1) + break - if not enhance_data_aug and epoch < args.epochs - 1 and acc > args.acc_thr: - enhance_data_aug = True - print('\nAcc = %.2f%% > %.2f%%. Start to Flip and Block.\n' % (acc * 100, args.acc_thr *100)) - - train_transformer = T.Compose([ - T.Resize((args.height, args.width), interpolation=3), - T.Pad(10), - T.RandomCrop((args.height, args.width)), - T.RandomHorizontalFlip(0.5), - T.RandomRotation(5), - T.ColorJitter(brightness=(0.5, 2.0), contrast=(0.5, 2.0), saturation=(0.5, 2.0), hue=(-0.1, 0.1)), - T.RandomOcclusion(args.min_size, args.max_size), - T.ToTensor(), - ]) - - train_loader = DataLoader( - Preprocessor(dataset.train, root=osp.join(dataset.images_dir, dataset.train_path), - transform=train_transformer), - batch_size=args.batch_size, num_workers=args.workers, - shuffle=True, pin_memory=True, drop_last=True) + json_file = osp.join(output_dir, 'results.json') + + if not args.evaluate: + arg_dict = {'train_dataset': args.dataset, 'exp_dir': args.exp_dir, 'method': args.method, 'sub_method': args.sub_method} + with open(json_file, 'a') as f: + json.dump(arg_dict, f) + f.write('\n') + train_dict = {'train_dataset': args.dataset, 'loss': loss, 'acc': acc, 'epochs': epoch1, 'train_time': train_time} + with open(json_file, 'a') as f: + json.dump(train_dict, f) + f.write('\n') # Final test print('Evaluate the learned model:') @@ -208,53 +238,42 @@ def main(args): # Evaluator evaluator = Evaluator(model) - avg_rank1 = 0 - avg_mAP = 0 - num_testsets = 0 - results = {} - test_names = args.testset.strip().split(',') for test_name in test_names: if test_name not in datasets.names(): print('Unknown dataset: %s.' % test_name) continue + t1 = time.time() testset, test_query_loader, test_gallery_loader = \ get_test_data(test_name, args.data_dir, args.height, args.width, args.workers, args.test_fea_batch) if not args.do_tlift: testset.has_time_info = False + test_rank1, test_mAP, test_rank1_rerank, test_mAP_rerank, test_rank1_tlift, test_mAP_tlift, test_dist, \ test_dist_rerank, test_dist_tlift, pre_tlift_dict = \ evaluator.evaluate(matcher, testset, test_query_loader, test_gallery_loader, args.test_gal_batch, args.test_prob_batch, args.tau, args.sigma, args.K, args.alpha) - results[test_name] = [test_rank1, test_mAP] - if test_name != args.dataset: - avg_rank1 += test_rank1 - avg_mAP += test_mAP - num_testsets += 1 + test_time = time.time() - t1 if testset.has_time_info: + test_dict = {'test_dataset': test_name, 'rank1': test_rank1, 'mAP': test_mAP, 'rank1_rerank': test_rank1_rerank, + 'mAP_rerank': test_mAP_rerank, 'rank1_tlift': test_rank1_tlift, 'mAP_tlift': test_mAP_tlift, 'test_time': test_time} print(' %s: rank1=%.1f, mAP=%.1f, rank1_rerank=%.1f, mAP_rerank=%.1f,' ' rank1_rerank_tlift=%.1f, mAP_rerank_tlift=%.1f.\n' % (test_name, test_rank1 * 100, test_mAP * 100, test_rank1_rerank * 100, test_mAP_rerank * 100, test_rank1_tlift * 100, test_mAP_tlift * 100)) else: + test_dict = {'test_dataset': test_name, 'rank1': test_rank1, 'mAP': test_mAP, 'test_time': test_time} print(' %s: rank1=%.1f, mAP=%.1f.\n' % (test_name, test_rank1 * 100, test_mAP * 100)) - result_file = osp.join(exp_database_dir, args.method, test_name + '_results.txt') - with open(result_file, 'a') as f: - f.write('%s/%s:\n' % (args.method, args.sub_method)) - if testset.has_time_info: - f.write('\t%s: rank1=%.1f, mAP=%.1f, rank1_rerank=%.1f, mAP_rerank=%.1f, rank1_rerank_tlift=%.1f, ' - 'mAP_rerank_tlift=%.1f.\n\n' - % (test_name, test_rank1 * 100, test_mAP * 100, test_rank1_rerank * 100, test_mAP_rerank * 100, - test_rank1_tlift * 100, test_mAP_tlift * 100)) - else: - f.write('\t%s: rank1=%.1f, mAP=%.1f.\n\n' % (test_name, test_rank1 * 100, test_mAP * 100)) - + with open(json_file, 'a') as f: + json.dump(test_dict, f) + f.write('\n') + if args.save_score: test_gal_list = np.array([fname for fname, _, _, _ in testset.gallery], dtype=np.object) test_prob_list = np.array([fname for fname, _, _, _ in testset.query], dtype=np.object) @@ -275,31 +294,12 @@ def main(args): do_compression=True) test_time = time.time() - t0 - avg_rank1 /= num_testsets - avg_mAP /= num_testsets - - for key in results.keys(): - print('%s: rank1=%.1f%%, mAP=%.1f%%.' % (key, results[key][0] * 100, results[key][1] * 100)) - print('Average: rank1=%.2f%%, mAP=%.2f%%.\n\n' % (avg_rank1 * 100, avg_mAP * 100)) - - result_file = osp.join(exp_database_dir, args.method, args.sub_method[:-5] + '_avg_results.txt') - with open(result_file, 'a') as f: - f.write('%s/%s:\n' % (args.method, args.sub_method)) - if not args.evaluate: - f.write('\t Loss: %.3f, acc: %.2f%%. ' % (loss, acc * 100)) - f.write("Train: %.0fs. " % train_time) - f.write("Test: %.0fs. " % test_time) - f.write('Rank1: %.2f%%, mAP: %.2f%%.\n' % (avg_rank1 * 100, avg_mAP * 100)) - for key in results.keys(): - f.write('\t %s: Rank1: %.1f%%, mAP: %.1f%%.\n' % - (key, results[key][0] * 100, results[key][1] * 100)) - f.write('\n') if not args.evaluate: print('Finished training at epoch %d, loss = %.3f, acc = %.2f%%.\n' - % (epoch + 1, loss, acc * 100)) + % (epoch1, loss, acc * 100)) print("Total training time: %.3f sec. Average training time per epoch: %.3f sec." % ( - train_time, train_time / (args.epochs - start_epoch + 1))) + train_time, train_time / (epoch1 - start_epoch))) print("Total testing time: %.3f sec.\n" % test_time) for arg in sys.argv: @@ -308,14 +308,14 @@ def main(args): if __name__ == '__main__': - parser = argparse.ArgumentParser(description="QAConv") + parser = argparse.ArgumentParser(description="QAConv_GS") # data parser.add_argument('-d', '--dataset', type=str, default='market', choices=datasets.names(), help="the training dataset") parser.add_argument('--combine_all', action='store_true', default=False, help="combine all data for training, default: False") parser.add_argument('--testset', type=str, default='cuhk03_np_detected,msmt', help="the test datasets") - parser.add_argument('-b', '--batch-size', type=int, default=8, help="the batch size, default: 8") + parser.add_argument('-b', '--batch-size', type=int, default=64, help="the batch size, default: 64") parser.add_argument('-j', '--workers', type=int, default=8, help="the number of workers for the dataloader, default: 8") parser.add_argument('--height', type=int, default=384, help="height of the input image, default: 384") @@ -325,9 +325,9 @@ def main(args): help="the backbone network, default: resnet50") parser.add_argument('--final_layer', type=str, default='layer3', choices=['layer2', 'layer3', 'layer4'], help="the final layer, default: layer3") - parser.add_argument('--neck', type=int, default=64, - help="number of channels for the final neck layer, default: 64") - parser.add_argument('--ibn', type=str, choices={'a', 'b'}, default=None, help="IBN type. Choose from 'a' or 'b'. Default: None") + parser.add_argument('--neck', type=int, default=128, + help="number of channels for the final neck layer, default: 128") + parser.add_argument('--ibn', type=str, choices={'a', 'b', 'none'}, default='b', help="IBN type. Choose from 'a' or 'b'. Default: 'b'") # TLift parser.add_argument('--do_tlift', action='store_true', default=False, help="apply TLift, default: False") parser.add_argument('--tau', type=float, default=100, @@ -342,38 +342,34 @@ def main(args): # random occlusion parser.add_argument('--min_size', type=float, default=0, help="minimal size for the random occlusion, default: 0") - parser.add_argument('--max_size', type=float, default=0.8, - help="maximal size for the ramdom occlusion. default: 0.8") + parser.add_argument('--max_size', type=float, default=0.8, help="maximal size for the ramdom occlusion. default: 0.8") # optimizer parser.add_argument('--lr', type=float, default=0.005, help="Learning rate of the new parameters. For pretrained " "parameters it is 10 times smaller than this. Default: 0.005.") # training configurations - parser.add_argument('--epochs', type=int, default=15, help="the number of training epochs, default: 15") - parser.add_argument('--step_size', type=int, default=10, help="step size for the learning rate decay, default: 10") - parser.add_argument('--acc_thr', type=float, default=0.6, - help="the accuracy threshold to start enhanced data augmentation during training, default: 0.6") - parser.add_argument('--mem_batch_size', type=int, default=16, - help="Batch size for the convolution with the class memory in QAConvLoss. Default: 16." - "Reduce this if you encounter a GPU memory overflow.") + parser.add_argument('--step_factor', type=float, default=0.7, help="loss descent factor to reduce the learning rate") + parser.add_argument('--max_epochs', type=int, default=60, help="the maximal number of training epochs, default: 60") parser.add_argument('--resume', type=str, default='', metavar='PATH', help="Path for resuming training. Choices: '' (new start, default), " "'ori' (original path), or a real path") - # pre-train - parser.add_argument('--pre_epochs', type=int, default=1, help="the number of epochs in pre-training, default: 1") - parser.add_argument('--max_steps', type=int, default=2000, help="the maximal pre-training steps, default: 2000") - parser.add_argument('--num_trials', type=int, default=10, help="the number of trials in pre-training, default: 10") + parser.add_argument('--clip_value', type=float, default=8, help="the gradient clip value, default: 8") + parser.add_argument('--margin', type=float, default=16, help="margin of the triplet loss, default: 16") + # graph sampler + parser.add_argument('--num_instance', type=int, default=2, help="the number of instance per class in a batch, default: 2") + parser.add_argument('--gs_save', action='store_true', default=False, help="save the graph distance and top-k indices, default: False") + parser.add_argument('--gs_verbose', action='store_true', default=False, help="verbose for the graph sampler, default: False") # test configurations parser.add_argument('--evaluate', action='store_true', default=False, help="evaluation only, default: False") - parser.add_argument('--test_fea_batch', type=int, default=64, - help="Feature extraction batch size during testing. Default: 64." + parser.add_argument('--test_fea_batch', type=int, default=256, + help="Feature extraction batch size during testing. Default: 256." "Reduce this if you encounter a GPU memory overflow.") - parser.add_argument('--test_gal_batch', type=int, default=4, - help="QAConv gallery batch size during testing. Default: 4." + parser.add_argument('--test_gal_batch', type=int, default=256, + help="QAConv gallery batch size during testing. Default: 256." "Reduce this if you encounter a GPU memory overflow.") - parser.add_argument('--test_prob_batch', type=int, default=4096, - help="QAConv probe batch size (as kernel) during testing. Default: 4096." + parser.add_argument('--test_prob_batch', type=int, default=256, + help="QAConv probe batch size (as kernel) during testing. Default: 256." "Reduce this if you encounter a GPU memory overflow.") # misc working_dir = osp.dirname(osp.abspath(__file__)) @@ -381,8 +377,8 @@ def main(args): help="the path to the image data") parser.add_argument('--exp-dir', type=str, metavar='PATH', default=osp.join(working_dir, 'Exp'), help="the path to the output directory") - parser.add_argument('--method', type=str, default='QAConv', help="method name for the output directory") - parser.add_argument('--sub_method', type=str, default='res50_layer3', + parser.add_argument('--method', type=str, default='QAConv50_IBNb_GS', help="method name for the output directory") + parser.add_argument('--sub_method', type=str, default='res50-layer3-f64_pre1_clip512_lr3_bs64-k4_ep15s10', help="sub method name for the output directory") parser.add_argument('--save_score', default=False, action='store_true', help="save the matching score or not, default: False") diff --git a/main_gs.py b/main_gs.py deleted file mode 100644 index 8f40426..0000000 --- a/main_gs.py +++ /dev/null @@ -1,379 +0,0 @@ -from __future__ import print_function, absolute_import -import argparse -import os.path as osp -import sys -import string -import time - -import torch -from torch.backends import cudnn -import numpy as np -import scipy.io as sio - -from torch import nn -from torch.utils.data import DataLoader -from torch.optim.lr_scheduler import StepLR - -from reid import datasets -from reid.models import resmap -from reid.models.qaconv import QAConv -from reid.evaluators import Evaluator -from reid.utils.data import transforms as T -from reid.utils.data.preprocessor import Preprocessor -from reid.utils.logging import Logger -from reid.utils.serialization import load_checkpoint, save_checkpoint - -from reid.utils.data.graph_sampler import GraphSampler -from reid.loss.pairwise_matching_loss import PairwiseMatchingLoss -from reid.trainers_clip import Trainer - - -def get_data(dataname, data_dir, model, matcher, last_epoch, save_path, args): - root = osp.join(data_dir, dataname) - - dataset = datasets.create(dataname, root, combine_all=args.combine_all) - - num_classes = dataset.num_train_ids - - train_transformer = T.Compose([ - T.Resize((args.height, args.width), interpolation=3), - T.Pad(10), - T.RandomCrop((args.height, args.width)), - T.RandomHorizontalFlip(0.5), - T.RandomRotation(5), - T.ColorJitter(brightness=(0.5, 2.0), contrast=(0.5, 2.0), saturation=(0.5, 2.0), hue=(-0.1, 0.1)), - T.RandomOcclusion(args.min_size, args.max_size), - T.ToTensor(), - ]) - - test_transformer = T.Compose([ - T.Resize((args.height, args.width), interpolation=3), - T.ToTensor(), - ]) - - train_path = osp.join(dataset.images_dir, dataset.train_path) - train_loader = DataLoader( - Preprocessor(dataset.train, root=train_path, transform=train_transformer), - batch_size=args.batch_size, num_workers=args.workers, - sampler=GraphSampler(dataset.train, train_path, test_transformer, model, matcher, args.batch_size, args.num_instance, - not args.gs_rerank_off, args.gs_pre_epochs, last_epoch, save_path, args.gs_verbose), - pin_memory=True) - - query_loader = DataLoader( - Preprocessor(dataset.query, - root=osp.join(dataset.images_dir, dataset.query_path), transform=test_transformer), - batch_size=args.test_fea_batch, num_workers=args.workers, - shuffle=False, pin_memory=True) - - gallery_loader = DataLoader( - Preprocessor(dataset.gallery, - root=osp.join(dataset.images_dir, dataset.gallery_path), transform=test_transformer), - batch_size=args.test_fea_batch, num_workers=args.workers, - shuffle=False, pin_memory=True) - - return dataset, num_classes, train_loader, query_loader, gallery_loader - - -def get_test_data(dataname, data_dir, height, width, workers=8, test_batch=64): - root = osp.join(data_dir, dataname) - - dataset = datasets.create(dataname, root, combine_all=False) - - test_transformer = T.Compose([ - T.Resize((height, width), interpolation=3), - T.ToTensor(), - ]) - - query_loader = DataLoader( - Preprocessor(dataset.query, - root=osp.join(dataset.images_dir, dataset.query_path), transform=test_transformer), - batch_size=test_batch, num_workers=workers, - shuffle=False, pin_memory=True) - - gallery_loader = DataLoader( - Preprocessor(dataset.gallery, - root=osp.join(dataset.images_dir, dataset.gallery_path), transform=test_transformer), - batch_size=test_batch, num_workers=workers, - shuffle=False, pin_memory=True) - - return dataset, query_loader, gallery_loader - - -def main(args): - cudnn.deterministic = False - cudnn.benchmark = True - - exp_database_dir = osp.join(args.exp_dir, string.capwords(args.dataset)) - output_dir = osp.join(exp_database_dir, args.method, args.sub_method) - log_file = osp.join(output_dir, 'log.txt') - # Redirect print to both console and log file - sys.stdout = Logger(log_file) - - # Create model - ibn_type = args.ibn - if ibn_type == 'none': - ibn_type = None - model = resmap.create(args.arch, ibn_type=ibn_type, final_layer=args.final_layer, neck=args.neck).cuda() - num_features = model.num_features - # print(model) - # print('\n') - - feamap_factor = {'layer2': 8, 'layer3': 16, 'layer4': 32} - hei = args.height // feamap_factor[args.final_layer] - wid = args.width // feamap_factor[args.final_layer] - matcher = QAConv(num_features, hei, wid).cuda() - - for arg in sys.argv: - print('%s ' % arg, end='') - print('\n') - - # Criterion - criterion = PairwiseMatchingLoss(matcher).cuda() - - # Optimizer - base_param_ids = set(map(id, model.base.parameters())) - new_params = [p for p in model.parameters() if - id(p) not in base_param_ids] - param_groups = [ - {'params': model.base.parameters(), 'lr': 0.1 * args.lr}, - {'params': new_params, 'lr': args.lr}, - {'params': matcher.parameters(), 'lr': args.lr}] - - optimizer = torch.optim.SGD(param_groups, lr=args.lr, momentum=0.9, weight_decay=5e-4, nesterov=True) - - # Load from checkpoint - start_epoch = 0 - - if args.resume or args.evaluate: - print('Loading checkpoint...') - if args.resume and (args.resume != 'ori'): - checkpoint = load_checkpoint(args.resume) - else: - checkpoint = load_checkpoint(osp.join(output_dir, 'checkpoint.pth.tar')) - model.load_state_dict(checkpoint['model']) - criterion.load_state_dict(checkpoint['criterion']) - optimizer.load_state_dict(checkpoint['optim']) - start_epoch = checkpoint['epoch'] - print("=> Start epoch {} ".format(start_epoch)) - - model = nn.DataParallel(model).cuda() - - # Create data loaders - # Warning: this training data loader cannot be used elsewhere other than a continueous training, otherwise the - # switch between the PK sampler and GS sampler will be incorrect! - save_path = None - if args.gs_save: - save_path = output_dir - dataset, num_classes, train_loader, _, _ = get_data(args.dataset, args.data_dir, model, matcher, start_epoch-1, save_path, args) - - # Decay LR by a factor of 0.1 every step_size epochs - lr_scheduler = StepLR(optimizer, step_size=args.step_size, gamma=0.1, last_epoch=start_epoch-1) - - if not args.evaluate: - # Trainer - trainer = Trainer(model, criterion, args.clip_value) - - t0 = time.time() - # Start training - for epoch in range(start_epoch, args.epochs): - loss, acc = trainer.train(epoch, train_loader, optimizer) - - lr = list(map(lambda group: group['lr'], optimizer.param_groups)) - lr_scheduler.step() - train_time = time.time() - t0 - - print( - '* Finished epoch %d at lr=[%g, %g, %g]. Loss: %.3f. Acc: %.2f%%. Training time: %.0f seconds. \n' - % (epoch + 1, lr[0], lr[1], lr[2], loss, acc * 100, train_time)) - - save_checkpoint({ - 'model': model.module.state_dict(), - 'criterion': criterion.state_dict(), - 'optim': optimizer.state_dict(), - 'epoch': epoch + 1, - }, fpath=osp.join(output_dir, 'checkpoint.pth.tar')) - - # Final test - print('Evaluate the learned model:') - t0 = time.time() - - # Evaluator - evaluator = Evaluator(model) - - avg_rank1 = 0 - avg_mAP = 0 - num_testsets = 0 - results = {} - - test_names = args.testset.strip().split(',') - for test_name in test_names: - if test_name not in datasets.names(): - print('Unknown dataset: %s.' % test_name) - continue - - testset, test_query_loader, test_gallery_loader = \ - get_test_data(test_name, args.data_dir, args.height, args.width, args.workers, args.test_fea_batch) - - if not args.do_tlift: - testset.has_time_info = False - test_rank1, test_mAP, test_rank1_rerank, test_mAP_rerank, test_rank1_tlift, test_mAP_tlift, test_dist, \ - test_dist_rerank, test_dist_tlift, pre_tlift_dict = \ - evaluator.evaluate(matcher, testset, test_query_loader, test_gallery_loader, - args.test_gal_batch, args.test_prob_batch, - args.tau, args.sigma, args.K, args.alpha) - - results[test_name] = [test_rank1, test_mAP] - if test_name != args.dataset: - avg_rank1 += test_rank1 - avg_mAP += test_mAP - num_testsets += 1 - - if testset.has_time_info: - print(' %s: rank1=%.1f, mAP=%.1f, rank1_rerank=%.1f, mAP_rerank=%.1f,' - ' rank1_rerank_tlift=%.1f, mAP_rerank_tlift=%.1f.\n' - % (test_name, test_rank1 * 100, test_mAP * 100, test_rank1_rerank * 100, test_mAP_rerank * 100, - test_rank1_tlift * 100, test_mAP_tlift * 100)) - else: - print(' %s: rank1=%.1f, mAP=%.1f.\n' % (test_name, test_rank1 * 100, test_mAP * 100)) - - result_file = osp.join(exp_database_dir, args.method, test_name + '_results.txt') - with open(result_file, 'a') as f: - f.write('%s/%s:\n' % (args.method, args.sub_method)) - if testset.has_time_info: - f.write('\t%s: rank1=%.1f, mAP=%.1f, rank1_rerank=%.1f, mAP_rerank=%.1f, rank1_rerank_tlift=%.1f, ' - 'mAP_rerank_tlift=%.1f.\n\n' - % (test_name, test_rank1 * 100, test_mAP * 100, test_rank1_rerank * 100, test_mAP_rerank * 100, - test_rank1_tlift * 100, test_mAP_tlift * 100)) - else: - f.write('\t%s: rank1=%.1f, mAP=%.1f.\n\n' % (test_name, test_rank1 * 100, test_mAP * 100)) - - if args.save_score: - test_gal_list = np.array([fname for fname, _, _, _ in testset.gallery], dtype=np.object) - test_prob_list = np.array([fname for fname, _, _, _ in testset.query], dtype=np.object) - test_gal_ids = [pid for _, pid, _, _ in testset.gallery] - test_prob_ids = [pid for _, pid, _, _ in testset.query] - test_gal_cams = [c for _, _, c, _ in testset.gallery] - test_prob_cams = [c for _, _, c, _ in testset.query] - test_score_file = osp.join(exp_database_dir, args.method, args.sub_method, '%s_score.mat' % test_name) - sio.savemat(test_score_file, {'score': 1. - test_dist, - 'score_rerank': 1. - test_dist_rerank, - 'score_tlift': 1. - test_dist_tlift, - 'gal_time': pre_tlift_dict['gal_time'], - 'prob_time': pre_tlift_dict['prob_time'], - 'gal_list': test_gal_list, 'prob_list': test_prob_list, - 'gal_ids': test_gal_ids, 'prob_ids': test_prob_ids, - 'gal_cams': test_gal_cams, 'prob_cams': test_prob_cams}, - oned_as='column', - do_compression=True) - - test_time = time.time() - t0 - avg_rank1 /= num_testsets - avg_mAP /= num_testsets - for key in results.keys(): - print('%s: rank1=%.1f%%, mAP=%.1f%%.' % (key, results[key][0] * 100, results[key][1] * 100)) - print('Average: rank1=%.2f%%, mAP=%.2f%%.\n\n' % (avg_rank1 * 100, avg_mAP * 100)) - - result_file = osp.join(exp_database_dir, args.method, args.sub_method[:-5] + '_avg_results.txt') - with open(result_file, 'a') as f: - f.write('%s/%s:\n' % (args.method, args.sub_method)) - if not args.evaluate: - f.write('\t Loss: %.3f, acc: %.2f%%. ' % (loss, acc * 100)) - f.write("Train: %.0fs. " % train_time) - f.write("Test: %.0fs. " % test_time) - f.write('Rank1: %.2f%%, mAP: %.2f%%.\n' % (avg_rank1 * 100, avg_mAP * 100)) - for key in results.keys(): - f.write('\t %s: Rank1: %.1f%%, mAP: %.1f%%.\n' % - (key, results[key][0] * 100, results[key][1] * 100)) - f.write('\n') - - if not args.evaluate: - print('Finished training at epoch %d, loss = %.3f, acc = %.2f%%.\n' - % (epoch + 1, loss, acc * 100)) - print("Total training time: %.3f sec. Average training time per epoch: %.3f sec." % ( - train_time, train_time / (args.epochs - start_epoch + 1))) - print("Total testing time: %.3f sec.\n" % test_time) - - for arg in sys.argv: - print('%s ' % arg, end='') - print('\n') - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description="QAConv_GS") - # data - parser.add_argument('-d', '--dataset', type=str, default='market', choices=datasets.names(), - help="the training dataset") - parser.add_argument('--combine_all', action='store_true', default=False, - help="combine all data for training, default: False") - parser.add_argument('--testset', type=str, default='cuhk03_np_detected,msmt', help="the test datasets") - parser.add_argument('-b', '--batch-size', type=int, default=64, help="the batch size, default: 64") - parser.add_argument('-j', '--workers', type=int, default=8, - help="the number of workers for the dataloader, default: 8") - parser.add_argument('--height', type=int, default=384, help="height of the input image, default: 384") - parser.add_argument('--width', type=int, default=128, help="width of the input image, default: 128") - # model - parser.add_argument('-a', '--arch', type=str, default='resnet50', choices=resmap.names(), - help="the backbone network, default: resnet50") - parser.add_argument('--final_layer', type=str, default='layer3', choices=['layer2', 'layer3', 'layer4'], - help="the final layer, default: layer3") - parser.add_argument('--neck', type=int, default=64, - help="number of channels for the final neck layer, default: 64") - parser.add_argument('--ibn', type=str, choices={'a', 'b', 'none'}, default='b', help="IBN type. Choose from 'a' or 'b'. Default: 'b'") - # TLift - parser.add_argument('--do_tlift', action='store_true', default=False, help="apply TLift, default: False") - parser.add_argument('--tau', type=float, default=100, - help="the interval threshold to define nearby persons in TLift, default: 100") - parser.add_argument('--sigma', type=float, default=200, - help="the sensitivity parameter of the time difference in TLift, default: 200") - parser.add_argument('--K', type=int, default=10, - help="parameter of the top K retrievals used to define the pivot set P in TLift, " - "default: 10") - parser.add_argument('--alpha', type=float, default=0.2, - help="regularizer for the multiplication fusion in TLift, default: 0.2") - - # random occlusion - parser.add_argument('--min_size', type=float, default=0, help="minimal size for the random occlusion, default: 0") - parser.add_argument('--max_size', type=float, default=0.8, - help="maximal size for the ramdom occlusion. default: 0.8") - # optimizer - parser.add_argument('--lr', type=float, default=0.001, - help="Learning rate of the new parameters. For pretrained " - "parameters it is 10 times smaller than this. Default: 0.001.") - # training configurations - parser.add_argument('--epochs', type=int, default=15, help="the number of training epochs, default: 15") - parser.add_argument('--step_size', type=int, default=10, help="step size for the learning rate decay, default: 10") - parser.add_argument('--resume', type=str, default='', metavar='PATH', - help="Path for resuming training. Choices: '' (new start, default), " - "'ori' (original path), or a real path") - parser.add_argument('--clip_value', type=float, default=512, help="the gradient clip value, default: 512") - # graph sampler - parser.add_argument('--num_instance', type=int, default=4, help="the number of instance per class in a batch, default: 4") - parser.add_argument('--gs_pre_epochs', type=int, default=1, help="the number of starting epochs for the random PK sampler, default: 1") - parser.add_argument('--gs_rerank_off', action='store_true', default=False, help="turn off the reranking for the graph distance, default: False") - parser.add_argument('--gs_save', action='store_true', default=False, help="save the graph distance and top-k indices, default: False") - parser.add_argument('--gs_verbose', action='store_true', default=False, help="verbose for the graph sampler, default: False") - - # test configurations - parser.add_argument('--evaluate', action='store_true', default=False, help="evaluation only, default: False") - parser.add_argument('--test_fea_batch', type=int, default=64, - help="Feature extraction batch size during testing. Default: 64." - "Reduce this if you encounter a GPU memory overflow.") - parser.add_argument('--test_gal_batch', type=int, default=4, - help="QAConv gallery batch size during testing. Default: 4." - "Reduce this if you encounter a GPU memory overflow.") - parser.add_argument('--test_prob_batch', type=int, default=4096, - help="QAConv probe batch size (as kernel) during testing. Default: 4096." - "Reduce this if you encounter a GPU memory overflow.") - # misc - working_dir = osp.dirname(osp.abspath(__file__)) - parser.add_argument('--data-dir', type=str, metavar='PATH', default=osp.join(working_dir, 'data'), - help="the path to the image data") - parser.add_argument('--exp-dir', type=str, metavar='PATH', default=osp.join(working_dir, 'Exp'), - help="the path to the output directory") - parser.add_argument('--method', type=str, default='QAConv50_IBNb_GS', help="method name for the output directory") - parser.add_argument('--sub_method', type=str, default='res50-layer3-f64_pre1_clip512_lr3_bs64-k4_ep15s10', - help="sub method name for the output directory") - parser.add_argument('--save_score', default=False, action='store_true', - help="save the matching score or not, default: False") - - main(parser.parse_args()) diff --git a/reid/evaluators.py b/reid/evaluators.py index fb080ef..1a3ef2f 100644 --- a/reid/evaluators.py +++ b/reid/evaluators.py @@ -1,10 +1,10 @@ from __future__ import print_function, absolute_import +import sys import time from collections import OrderedDict import torch import numpy as np -from .utils import to_torch from .evaluation_metrics import cmc, mean_ap from .tlift import TLift @@ -23,10 +23,9 @@ def pre_tlift(gallery, query): def extract_cnn_feature(model, inputs): model = model.cuda().eval() - inputs = to_torch(inputs).cuda() with torch.no_grad(): outputs = model(inputs) - outputs = outputs.data.cpu() + outputs = outputs.cpu() return outputs @@ -180,14 +179,24 @@ def evaluate(self, matcher, testset, query_loader, gallery_loader, gal_batch_siz prob_batch_size=4096, tau=100, sigma=200, K=10, alpha=0.2): query = testset.query gallery = testset.gallery - prob_fea, _ = extract_features(self.model, query_loader, verbose=True) - prob_fea = torch.cat([prob_fea[f].unsqueeze(0) for f, _, _, _ in query], 0) - gal_fea, _ = extract_features(self.model, gallery_loader, verbose=True) - gal_fea = torch.cat([gal_fea[f].unsqueeze(0) for f, _, _, _ in gallery], 0) - print('Compute similarity...', end='\t') + print('Compute similarity ...', end='\t') start = time.time() - dist = pairwise_distance(matcher, prob_fea, gal_fea, gal_batch_size, prob_batch_size) # [p, g] + + prob_fea, _ = extract_features(self.model, query_loader) + prob_fea = torch.cat([prob_fea[f].unsqueeze(0) for f, _, _, _ in query], 0) + num_prob = len(query) + num_gal = len(gallery) + batch_size = gallery_loader.batch_size + dist = torch.zeros(num_prob, num_gal) + + for i, (imgs, fnames, pids, _) in enumerate(gallery_loader): + print('Compute similarity %d / %d. \t' % (i + 1, len(gallery_loader)), end='\r', file=sys.stdout.console) + gal_fea = extract_cnn_feature(self.model, imgs) + g0 = i * batch_size + g1 = min(num_gal, (i + 1) * batch_size) + dist[:, g0:g1] = pairwise_distance(matcher, prob_fea, gal_fea, batch_size, prob_batch_size) # [p, g] + print('Time: %.3f seconds.' % (time.time() - start)) rank1, mAP = evaluate_all(dist, query=query, gallery=gallery) @@ -204,6 +213,8 @@ def evaluate(self, matcher, testset, query_loader, gallery_loader, gal_batch_siz dist_rerank[num_prob:, :num_prob] = dist.t() dist_rerank[:num_prob, :num_prob] = pairwise_distance(matcher, prob_fea, prob_fea, gal_batch_size, prob_batch_size) + gal_fea, _ = extract_features(self.model, gallery_loader, verbose=True) + gal_fea = torch.cat([gal_fea[f].unsqueeze(0) for f, _, _, _ in gallery], 0) dist_rerank[num_prob:, num_prob:] = pairwise_distance(matcher, gal_fea, gal_fea, gal_batch_size, prob_batch_size) diff --git a/reid/loss/triplet_loss.py b/reid/loss/triplet_loss.py new file mode 100644 index 0000000..5cb4748 --- /dev/null +++ b/reid/loss/triplet_loss.py @@ -0,0 +1,58 @@ +"""Class for the hard triplet loss + Shengcai Liao and Ling Shao, "Graph Sampling Based Deep Metric Learning for Generalizable Person Re-Identification." In arXiv preprint, arXiv:2104.01546, 2021. + Author: + Shengcai Liao + scliao@ieee.org + Version: + V1.0 + April 1, 2021 + """ + +import torch +from torch.nn import Module +from torch import nn + + +class TripletLoss(Module): + def __init__(self, matcher, margin=16): + """ + Inputs: + matcher: a class for matching pairs of images + margin: margin parameter for the triplet loss + """ + super(TripletLoss, self).__init__() + self.matcher = matcher + self.margin = margin + self.ranking_loss = nn.MarginRankingLoss(margin=margin, reduction='none') + + def reset_running_stats(self): + self.matcher.reset_running_stats() + + def reset_parameters(self): + self.matcher.reset_parameters() + + def _check_input_dim(self, input): + if input.dim() != 4: + raise ValueError('expected 4D input (got {}D input)'.format(input.dim())) + + def forward(self, feature, target): + self._check_input_dim(feature) + self.matcher.make_kernel(feature) + + score = self.matcher(feature) # [b, b] + + target1 = target.unsqueeze(1) + mask = (target1 == target1.t()) + pair_labels = mask.float() + + min_pos = torch.min(score * pair_labels + + (1 - pair_labels + torch.eye(score.size(0), device=score.device)) * 1e15, dim=1)[0] + max_neg = torch.max(score * (1 - pair_labels) - pair_labels * 1e15, dim=1)[0] + + # Compute ranking hinge loss + loss = self.ranking_loss(min_pos, max_neg, torch.ones_like(target)) + + with torch.no_grad(): + acc = (min_pos >= max_neg).float() + + return loss, acc diff --git a/reid/models/qaconv.py b/reid/models/qaconv.py index 07afee8..2057811 100644 --- a/reid/models/qaconv.py +++ b/reid/models/qaconv.py @@ -6,14 +6,13 @@ Shengcai Liao scliao@ieee.org Version: - V1.2 - Mar. 31, 2021 + V1.3 + July 1, 2021 """ import torch from torch import nn from torch.nn import Module -from torch.nn import functional as F class QAConv(Module): @@ -29,9 +28,10 @@ def __init__(self, num_features, height, width): self.height = height self.width = width self.bn = nn.BatchNorm1d(1) - self.fc = nn.Linear(self.height * self.width * 2, 1) + self.fc = nn.Linear(self.height * self.width, 1) self.logit_bn = nn.BatchNorm1d(1) self.kernel = None + self.reset_parameters() def reset_running_stats(self): self.bn.reset_running_stats() @@ -39,31 +39,30 @@ def reset_running_stats(self): def reset_parameters(self): self.bn.reset_parameters() - self.fc.reset_parameters() self.logit_bn.reset_parameters() + with torch.no_grad(): + self.fc.weight.fill_(1. / (self.height * self.width)) def _check_input_dim(self, input): if input.dim() != 4: raise ValueError('expected 4D input (got {}D input)'.format(input.dim())) def make_kernel(self, features): # probe features - kernel = features.permute([0, 2, 3, 1]) # [p, h, w, d] - kernel = kernel.reshape(-1, self.num_features, 1, 1) # [phw, d, 1, 1] - self.kernel = kernel + self.kernel = features def forward(self, features): # gallery features self._check_input_dim(features) hw = self.height * self.width batch_size = features.size(0) - - score = F.conv2d(features, self.kernel) # [g, phw, h, w] + score = torch.einsum('g c h w, p c y x -> g p y x h w', features, self.kernel) score = score.view(batch_size, -1, hw, hw) score = torch.cat((score.max(dim=2)[0], score.max(dim=3)[0]), dim=-1) - score = score.view(-1, 1, 2 * hw) - score = self.bn(score).view(-1, 2 * hw) + score = score.view(-1, 1, hw) + score = self.bn(score).view(-1, hw) score = self.fc(score) + score = score.view(-1, 2).sum(dim=-1, keepdim=True) score = self.logit_bn(score) score = score.view(batch_size, -1).t() # [p, g] diff --git a/reid/models/resmap.py b/reid/models/resmap.py index c34e46c..0fe0706 100644 --- a/reid/models/resmap.py +++ b/reid/models/resmap.py @@ -5,8 +5,8 @@ Shengcai Liao scliao@ieee.org Version: - V1.1 - Feb. 7, 2021 + V1.2 + July 4, 2021 """ from __future__ import absolute_import @@ -58,9 +58,8 @@ def __init__(self, depth, ibn_type=None, final_layer='layer3', neck=128, pretrai out_planes = fea_dims[final_layer] if neck > 0: - self.neck_conv = nn.Conv2d(out_planes, neck, kernel_size=3, padding=1, bias=False) + self.neck_conv = nn.Conv2d(out_planes, neck, kernel_size=3, padding=1) out_planes = neck - self.neck_bn = nn.BatchNorm2d(out_planes) self.num_features = out_planes @@ -73,7 +72,6 @@ def forward(self, inputs): if self.neck > 0: x = self.neck_conv(x) - x = self.neck_bn(x) x = F.normalize(x) diff --git a/reid/pretrainer.py b/reid/pretrainer.py deleted file mode 100644 index b9682f2..0000000 --- a/reid/pretrainer.py +++ /dev/null @@ -1,151 +0,0 @@ -"""Pre-training code for a stable initialization - Shengcai Liao and Ling Shao, "Interpretable and Generalizable Person Re-Identification with Query-Adaptive - Convolution and Temporal Lifting." In The European Conference on Computer Vision (ECCV), 23-28 August, 2020. - Author: - Shengcai Liao - scliao@ieee.org - Version: - V1.0 - Feb. 7, 2021 - """ -from __future__ import print_function, absolute_import -from collections import defaultdict -import time -import sys -from copy import deepcopy - -import torch -from torch import nn - -from reid.utils.meters import AverageMeter - - -class PreTrainer(object): - def __init__(self, model, criterion, optimizer, data_loader, num_epochs=1, max_steps=2000, num_trials=10): - super().__init__() - self.model = model - self.criterion = criterion - self.optimizer = optimizer - self.data_loader = data_loader - self.num_epochs = num_epochs - self.max_steps = max_steps - self.num_trials = num_trials - self.base_model = deepcopy(model.base.state_dict()) - self.best_loss = 1e15 - self.best_acc = -1e15 - self.best_metric = -1e15 - self.best_model = None - - def train(self, result_file, method, sub_method): - print('Start pre-training.\n') - - for trial in range(self.num_trials): - model = nn.DataParallel(self.model).cuda() - criterion = nn.DataParallel(self.criterion).cuda() - - loss, acc = self.single_train(model, criterion, self.optimizer, trial) - metric = acc * 100 - loss - - if metric > self.best_metric: # cache - self.best_loss = loss - self.best_acc = acc - self.best_metric = metric - self.best_model = [deepcopy(model.module.state_dict()), - deepcopy(criterion.module.state_dict()), - deepcopy(self.optimizer.state_dict())] - - # reset states for the next trial - if trial < self.num_trials - 1: - self.model.base.load_state_dict(self.base_model) - if self.model.neck > 0: - self.model.neck_conv.reset_parameters() - self.model.neck_bn.reset_running_stats() - self.model.neck_bn.reset_parameters() - self.criterion.reset_running_stats() - self.criterion.reset_parameters() - self.optimizer.state = defaultdict(dict) - - print('Pre-training finished. Best metric: %.4f, Best loss: %.3f. Best acc: %.2f%%\n' - % (self.best_metric, self.best_loss, self.best_acc * 100)) - - with open(result_file, 'a') as f: - f.write('%s/%s:\n' % (method, sub_method)) - f.write('\tBest metric: %.4f, Best loss: %.3f. Best acc: %.2f%%\n\n' - % (self.best_metric, self.best_loss, self.best_acc * 100)) - - self.model.load_state_dict(self.best_model[0]) - self.criterion.load_state_dict(self.best_model[1]) - self.optimizer.load_state_dict(self.best_model[2]) - - return self.model, self.criterion, self.optimizer - - def single_train(self, model, criterion, optimizer, trial): - model.train() - criterion.train() - - batch_time = AverageMeter() - data_time = AverageMeter() - losses = AverageMeter() - precisions = AverageMeter() - - iters = 0 - start_time = time.time() - end = time.time() - - for ep in range(self.num_epochs): - for i, inputs in enumerate(self.data_loader): - data_time.update(time.time() - end) - - iters += 1 - inputs, targets = self._parse_data(inputs) - loss, acc = self._forward(model, criterion, inputs, targets) - - losses.update(loss.item(), targets.size(0)) - precisions.update(acc, targets.size(0)) - - optimizer.zero_grad() - loss.backward() - optimizer.step() - - batch_time.update(time.time() - end) - end = time.time() - - print('Trial {}: epoch [{}][{}/{}]. ' - 'Time: {:.3f} ({:.3f}). ' - 'Data: {:.3f} ({:.3f}). ' - 'Metric: {:.4f} ({:.4f}). ' - 'Loss: {:.3f} ({:.3f}). ' - 'Prec: {:.2%} ({:.2%}).' - .format(trial + 1, ep, i + 1, min(self.max_steps, len(self.data_loader)), - batch_time.val, batch_time.avg, - data_time.val, data_time.avg, - precisions.val / losses.val, precisions.avg / losses.avg, - losses.val, losses.avg, - precisions.val, precisions.avg), end='\r', file=sys.stdout.console) - - if iters == self.max_steps - 1: - break - - if iters == self.max_steps - 1: - break - - loss = losses.avg - acc = precisions.avg - - print( - '* Trial %d. Metric: %.4f. Loss: %.3f. Acc: %.2f%%. Training time: %.0f seconds. \n' - % (trial + 1, acc / loss, loss, acc * 100, time.time() - start_time)) - return loss, acc - - def _parse_data(self, inputs): - imgs, _, pids, _ = inputs - inputs = imgs.cuda() - targets = pids.cuda() - return inputs, targets - - def _forward(self, model, criterion, inputs, targets): - feature = model(inputs) - loss, acc = criterion(feature, targets) - loss = torch.mean(loss) - acc = torch.mean(acc) - return loss, acc diff --git a/reid/trainers.py b/reid/trainers.py index 2776a0a..38991df 100644 --- a/reid/trainers.py +++ b/reid/trainers.py @@ -1,72 +1,104 @@ -from __future__ import print_function, absolute_import -import time -import sys - -import torch -from .utils.meters import AverageMeter - - -class BaseTrainer(object): - def __init__(self, model, criterion): - super(BaseTrainer, self).__init__() - self.model = model - self.criterion = criterion - - def train(self, epoch, data_loader, optimizer): - self.model.train() - self.criterion.train() - - batch_time = AverageMeter() - data_time = AverageMeter() - losses = AverageMeter() - precisions = AverageMeter() - - end = time.time() - for i, inputs in enumerate(data_loader): - data_time.update(time.time() - end) - - inputs, targets = self._parse_data(inputs) - loss, acc = self._forward(inputs, targets) - - losses.update(loss.item(), targets.size(0)) - precisions.update(acc, targets.size(0)) - - optimizer.zero_grad() - loss.backward() - optimizer.step() - - batch_time.update(time.time() - end) - end = time.time() - - print('Epoch: [{}][{}/{}]. ' - 'Time: {:.3f} ({:.3f}). ' - 'Data: {:.3f} ({:.3f}). ' - 'Loss: {:.3f} ({:.3f}). ' - 'Prec: {:.2%} ({:.2%}).' - .format(epoch + 1, i + 1, len(data_loader), - batch_time.val, batch_time.avg, - data_time.val, data_time.avg, - losses.val, losses.avg, - precisions.val, precisions.avg), end='\r', file=sys.stdout.console) - return losses.avg, precisions.avg - - def _parse_data(self, inputs): - raise NotImplementedError - - def _forward(self, inputs, targets): - raise NotImplementedError - - -class Trainer(BaseTrainer): - def _parse_data(self, inputs): - imgs, _, pids, _ = inputs - inputs = imgs.cuda() - targets = pids.cuda() - return inputs, targets - - def _forward(self, inputs, targets): - feature = self.model(inputs) - loss, acc = self.criterion(feature, targets) - loss = torch.mean(loss) - acc = torch.mean(acc) - return loss, acc +from __future__ import print_function, absolute_import +import time +import sys + +import torch +from torch.nn.utils import clip_grad_norm_ +from .utils.meters import AverageMeter + + +class BaseTrainer(object): + def __init__(self, model, criterion, clip_value=16.0): + super(BaseTrainer, self).__init__() + self.model = model + self.criterion = criterion + self.clip_value = clip_value + + def train(self, epoch, data_loader, optimizer): + # Creates once at the beginning of training + scaler = torch.cuda.amp.GradScaler() + + batch_time = AverageMeter() + data_time = AverageMeter() + losses = AverageMeter() + precisions = AverageMeter() + + end = time.time() + for i, inputs in enumerate(data_loader): + self.model.eval() + self.criterion.train() + + data_time.update(time.time() - end) + inputs, targets = self._parse_data(inputs) + + optimizer.zero_grad() + + # Casts operations to mixed precision + with torch.cuda.amp.autocast(): + loss, acc = self._forward(inputs, targets) + + if loss is None: + continue + + losses.update(loss.item(), targets.size(0)) + precisions.update(acc.item(), targets.size(0)) + + if self.clip_value > 0: + # Scales the loss, and calls backward() to create scaled gradients + scaler.scale(loss).backward() + # Unscales the gradients of optimizer's assigned params in-place + scaler.unscale_(optimizer) + else: + loss.backward() + + clip_grad_norm_(self.model.parameters(), self.clip_value) + clip_grad_norm_(self.criterion.parameters(), self.clip_value) + + if self.clip_value > 0: + # Unscales gradients and calls or skips optimizer.step() + scaler.step(optimizer) + # Updates the scale for next iteration + scaler.update() + else: + optimizer.step() + + batch_time.update(time.time() - end) + end = time.time() + + print('Epoch: [{}][{}/{}]. ' + 'Time: {:.3f} ({:.3f}). ' + 'Data: {:.3f} ({:.3f}). ' + 'Loss: {:.3f} ({:.3f}). ' + 'Prec: {:.2%} ({:.2%}).' + .format(epoch + 1, i + 1, len(data_loader), + batch_time.val, batch_time.avg, + data_time.val, data_time.avg, + losses.val, losses.avg, + precisions.val, precisions.avg), end='\r', file=sys.stdout.console) + + return losses.avg, precisions.avg + + def _parse_data(self, inputs): + raise NotImplementedError + + def _forward(self, inputs, targets): + raise NotImplementedError + + +class Trainer(BaseTrainer): + def _parse_data(self, inputs): + imgs, _, pids, _ = inputs + inputs = imgs.cuda() + targets = pids.cuda() + return inputs, targets + + def _forward(self, inputs, targets): + feature = self.model(inputs) + loss, acc = self.criterion(feature, targets) + finite_mask = loss.isfinite() + if finite_mask.any(): + loss = loss[finite_mask].mean() + acc = acc[finite_mask].mean() + else: + loss = acc = None + return loss, acc diff --git a/reid/trainers_clip.py b/reid/trainers_clip.py deleted file mode 100644 index 38a4f95..0000000 --- a/reid/trainers_clip.py +++ /dev/null @@ -1,79 +0,0 @@ -from __future__ import print_function, absolute_import -import time -import sys - -import torch -from torch.nn.utils import clip_grad_norm_ -from .utils.meters import AverageMeter - - -class BaseTrainer(object): - def __init__(self, model, criterion, clip_value=512.0): - super(BaseTrainer, self).__init__() - self.model = model - self.criterion = criterion - self.clip_value = clip_value - - def train(self, epoch, data_loader, optimizer): - self.model.train() - self.criterion.train() - - batch_time = AverageMeter() - data_time = AverageMeter() - losses = AverageMeter() - precisions = AverageMeter() - - end = time.time() - for i, inputs in enumerate(data_loader): - data_time.update(time.time() - end) - - inputs, targets = self._parse_data(inputs) - loss, acc = self._forward(inputs, targets) - - losses.update(loss.item(), targets.size(0)) - precisions.update(acc.item(), targets.size(0)) - - optimizer.zero_grad() - loss.backward() - - clip_grad_norm_(self.model.parameters(), self.clip_value) - clip_grad_norm_(self.criterion.parameters(), self.clip_value) - - optimizer.step() - - batch_time.update(time.time() - end) - end = time.time() - - print('Epoch: [{}][{}/{}]. ' - 'Time: {:.3f} ({:.3f}). ' - 'Data: {:.3f} ({:.3f}). ' - 'Loss: {:.3f} ({:.3f}). ' - 'Prec: {:.2%} ({:.2%}).' - .format(epoch + 1, i + 1, len(data_loader), - batch_time.val, batch_time.avg, - data_time.val, data_time.avg, - losses.val, losses.avg, - precisions.val, precisions.avg), end='\r', file=sys.stdout.console) - - return losses.avg, precisions.avg - - def _parse_data(self, inputs): - raise NotImplementedError - - def _forward(self, inputs, targets): - raise NotImplementedError - - -class Trainer(BaseTrainer): - def _parse_data(self, inputs): - imgs, _, pids, _ = inputs - inputs = imgs.cuda() - targets = pids.cuda() - return inputs, targets - - def _forward(self, inputs, targets): - feature = self.model(inputs) - loss, acc = self.criterion(feature, targets) - loss = torch.mean(loss) - acc = torch.mean(acc) - return loss, acc diff --git a/reid/utils/data/graph_sampler.py b/reid/utils/data/graph_sampler.py index 5e3f924..7583df7 100644 --- a/reid/utils/data/graph_sampler.py +++ b/reid/utils/data/graph_sampler.py @@ -4,8 +4,8 @@ Shengcai Liao scliao@ieee.org Version: - V1.0 - April 1, 2021 + V1.1 + July 15, 2021 """ from __future__ import absolute_import @@ -20,12 +20,12 @@ from torch.utils.data.sampler import Sampler from .preprocessor import Preprocessor -from reid.evaluators import extract_features, pairwise_distance, reranking +from reid.evaluators import extract_features, pairwise_distance class GraphSampler(Sampler): def __init__(self, data_source, img_path, transformer, model, matcher, batch_size=64, num_instance=4, - rerank=True, pre_epochs=1, last_epoch=-1, save_path=None, verbose=False): + gal_batch_size=256, prob_batch_size=256, save_path=None, verbose=False): super(GraphSampler, self).__init__(data_source) self.data_source = data_source self.img_path = img_path @@ -34,9 +34,8 @@ def __init__(self, data_source, img_path, transformer, model, matcher, batch_siz self.matcher = matcher self.batch_size = batch_size self.num_instance = num_instance - self.rerank = rerank - self.pre_epochs = pre_epochs - self.last_epoch = last_epoch + self.gal_batch_size = gal_batch_size + self.prob_batch_size = prob_batch_size self.save_path = save_path self.verbose = verbose @@ -50,38 +49,12 @@ def __init__(self, data_source, img_path, transformer, model, matcher, batch_siz self.sam_index = None self.sam_pointer = [0] * self.num_pids - self.epoch = last_epoch + 1 def make_index(self): - if self.epoch < self.pre_epochs: - self.random_index() - else: - start = time.time() - self.graph_index() - if self.verbose: - print('\t GraphSampler: \tTotal GS time for epoch %d: %.3f seconds.\n' % (self.epoch + 1, time.time() - start)) - self.epoch += 1 - - def random_index(self): - sam_index = [] - for pid in self.pids: - index = self.index_dic[pid].copy() - batches = len(index) // self.num_instance - if batches == 0: - more = np.random.choice(index, size=self.num_instance-len(index), replace=True) - index.extend(more) - batches = 1 - shuffle(index) - if batches > self.batch_size: - index = index[: self.batch_size * self.num_instance] - else: - index = index[: batches * self.num_instance] - sam_index.extend(index) - sam_index = np.array(sam_index) - sam_index = sam_index.reshape((-1, self.num_instance)) - np.random.shuffle(sam_index) - sam_index = list(sam_index.flatten()) - self.sam_index = sam_index + start = time.time() + self.graph_index() + if self.verbose: + print('\nTotal GS time: %.3f seconds.\n' % (time.time() - start)) def calc_distance(self, dataset): data_loader = DataLoader( @@ -97,22 +70,11 @@ def calc_distance(self, dataset): if self.verbose: print('\t GraphSampler: \tCompute distance...', end='\t') start = time.time() - dist = pairwise_distance(self.matcher, features, features) + dist = pairwise_distance(self.matcher, features, features, self.gal_batch_size, self.prob_batch_size) + if self.verbose: print('Time: %.3f seconds.' % (time.time() - start)) - if self.rerank: - if self.verbose: - print('\t GraphSampler: \tRerank...', end='\t') - start = time.time() - with torch.no_grad(): - dist = torch.cat((dist, dist)) - dist = torch.cat((dist, dist), dim=1) - dist = reranking(dist, self.num_pids) - dist = torch.from_numpy(dist).cuda() - if self.verbose: - print('Time: %.3f seconds.' % (time.time() - start)) - return dist def graph_index(self):