Skip to content


Added ensemble benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
IDMIPPM committed Jul 13, 2020
1 parent 5651ae7 commit a0bbe6d
Show file tree
Hide file tree
Showing 2 changed files with 389 additions and 0 deletions.
47 changes: 47 additions & 0 deletions benchmark/
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
## COCO benchmark

Here you can find predictions for COCO validation from different freely available pretrained object detection models:
* [EfficientDet]( [[1](]
* [DetectoRS]( [[2](]

| Model | COCO validation mAP(0.5...0.95) | COCO validation mAP(0.5...0.95) Mirror |
| ------ | --------------- | --------------- |
| EffNet-B0 | **33.6** | **33.5** |
| EffNet-B1 | **39.2** | **39.2** |
| EffNet-B2 | **42.5** | **42.6** |
| EffNet-B3 | **45.9** | **45.5** |
| EffNet-B4 | **49.0** | **48.8** |
| EffNet-B5 | **50.5** | **50.2** |
| EffNet-B6 | **51.3** | **51.1** |
| EffNet-B7 | **52.1** | **51.9** |
| DetectoRS + ResNeXt-101 | **51.5** | **51.5** |
| DetectoRS + Resnet50 | **49.6** | **49.6** |

### Benchmark files

[Download ~240 MB]()

## Ensemble results

There is python code to get high score on COCO validation using WBF method: [](

WBF with weights: [0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 5, 5, 7, 7, 9, 9, 8, 8, 5, 5] and IoU = 0.7 gives **55.8** on COCO validation.

Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.558
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.740
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.616
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.399
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.605
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.702
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.404
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.681
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.748
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.619
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.788
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.875

## Requirements

numpy, pandas, pycocotools
342 changes: 342 additions & 0 deletions benchmark/
Original file line number Diff line number Diff line change
@@ -0,0 +1,342 @@
# coding: utf-8
__author__ = 'ZFTurbo:'

import numpy as np
import pandas as pd
import json
import time
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from multiprocessing import Pool, Process, cpu_count, Manager
from ensemble_boxes import *

def get_coco_annotations_data():
file_in = 'instances_val2017.json'
images = dict()
with open(file_in) as json_file:
data = json.load(json_file)
for i in range(len(data['images'])):
image_id = data['images'][i]['id']
images[image_id] = data['images'][i]

return images

def get_coco_score(csv_path):
images = get_coco_annotations_data()
s = pd.read_csv(csv_path, dtype={'img_id': np.str, 'label': np.str})

out = np.zeros((len(s), 7), dtype=np.float64)
out[:, 0] = s['img_id']
ids = s['img_id'].astype(np.int32).values
x1 = s['x1'].values
x2 = s['x2'].values
y1 = s['y1'].values
y2 = s['y2'].values
for i in range(len(s)):
width = images[ids[i]]['width']
height = images[ids[i]]['height']
out[i, 1] = x1[i] * width
out[i, 2] = y1[i] * height
out[i, 3] = (x2[i] - x1[i]) * width
out[i, 4] = (y2[i] - y1[i]) * height
out[:, 5] = s['score'].values
out[:, 6] = s['label'].values

filename = 'instances_val2017.json'
coco_gt = COCO(filename)
detections = out
image_ids = list(set(detections[:, 0]))
coco_dt = coco_gt.loadRes(detections)
coco_eval = COCOeval(coco_gt, coco_dt, iouType='bbox')
coco_eval.params.imgIds = image_ids
coco_metrics = coco_eval.stats
return coco_metrics, detections

def process_single_id(id, res_boxes, weights, params):
run_type = params['run_type']
verbose = params['verbose']

# print('Go for ID: {}'.format(id))
boxes_list = []
scores_list = []
labels_list = []
labels_to_use_forward = dict()
labels_to_use_backward = dict()

for i in range(len(res_boxes[id])):
boxes = []
scores = []
labels = []

dt = res_boxes[id][i]

for j in range(0, len(dt)):
lbl = dt[j][5]
scr = float(dt[j][4])
box_x1 = float(dt[j][0])
box_y1 = float(dt[j][1])
box_x2 = float(dt[j][2])
box_y2 = float(dt[j][3])

if box_x1 >= box_x2:
if verbose:
print('Problem with box x1 and x2: {}. Skip it'.format(dt[j]))
if box_y1 >= box_y2:
if verbose:
print('Problem with box y1 and y2: {}. Skip it'.format(dt[j]))
if scr <= 0:
if verbose:
print('Problem with box score: {}. Skip it'.format(dt[j]))

boxes.append([box_x1, box_y1, box_x2, box_y2])
if lbl not in labels_to_use_forward:
cur_point = len(labels_to_use_forward)
labels_to_use_forward[lbl] = cur_point
labels_to_use_backward[cur_point] = lbl

boxes = np.array(boxes, dtype=np.float32)
scores = np.array(scores, dtype=np.float32)
labels = np.array(labels, dtype=np.int32)


# Empty predictions for all models
if len(boxes_list) == 0:
return np.array([]), np.array([]), np.array([])

if run_type == 'wbf':
merged_boxes, merged_scores, merged_labels = weighted_boxes_fusion(boxes_list, scores_list, labels_list,
weights=weights, iou_thr=params['intersection_thr'],
elif run_type == 'nms':
iou_thr = params['iou_thr']
merged_boxes, merged_scores, merged_labels = nms(boxes_list, scores_list, labels_list, weights=weights, iou_thr=iou_thr)
elif run_type == 'soft-nms':
iou_thr = params['iou_thr']
sigma = params['sigma']
thresh = params['thresh']
merged_boxes, merged_scores, merged_labels = soft_nms(boxes_list, scores_list, labels_list,
weights=weights, iou_thr=iou_thr, sigma=sigma, thresh=thresh)
elif run_type == 'nmw':
merged_boxes, merged_scores, merged_labels = non_maximum_weighted(boxes_list, scores_list, labels_list,
weights=weights, iou_thr=params['intersection_thr'],

# print(len(boxes_list), len(merged_boxes))
if 'limit_boxes' in params:
limit_boxes = params['limit_boxes']
if len(merged_boxes) > limit_boxes:
merged_boxes = merged_boxes[:limit_boxes]
merged_scores = merged_scores[:limit_boxes]
merged_labels = merged_labels[:limit_boxes]

# Rename labels back
merged_labels_string = []
for m in merged_labels:
merged_labels = np.array(merged_labels_string, dtype=np.str)

# Create IDs array
ids_list = [id] * len(merged_labels)

return merged_boxes.copy(), merged_scores.copy(), merged_labels.copy(), ids_list.copy()

def process_part_of_data(proc_number, return_dict, ids_to_use, res_boxes, weights, params):
print('Start process: {} IDs to proc: {}'.format(proc_number, len(ids_to_use)))
result = []
for id in ids_to_use:
merged_boxes, merged_scores, merged_labels, ids_list = process_single_id(id, res_boxes, weights, params)
# print(merged_boxes.shape, merged_scores.shape, merged_labels.shape, len(ids_list))
result.append((merged_boxes, merged_scores, merged_labels, ids_list))
return_dict[proc_number] = result.copy()

def ensemble_predictions(pred_filenames, weights, params):
verbose = False
if 'verbose' in params:
verbose = params['verbose']

start_time = time.time()
procs_to_use = max(cpu_count() // 2, 1)
# procs_to_use = 6
print('Use processes: {}'.format(procs_to_use))
weights = np.array(weights)

res_boxes = dict()
ref_ids = None
for j in range(len(pred_filenames)):
if weights[j] == 0:
print('Read {}...'.format(pred_filenames[j]))
s = pd.read_csv(pred_filenames[j], dtype={'img_id': np.str, 'label': np.str})
s.sort_values('img_id', inplace=True)
s.reset_index(drop=True, inplace=True)
ids = s['img_id'].values
unique_ids = sorted(s['img_id'].unique())
if ref_ids is None:
ref_ids = tuple(unique_ids)
if ref_ids != tuple(unique_ids):
print('Different IDs in ensembled CSVs! {} != {}'.format(len(ref_ids), len(unique_ids)))
s = s[s['img_id'].isin(ref_ids)]
s.sort_values('img_id', inplace=True)
s.reset_index(drop=True, inplace=True)
ids = s['img_id'].values
preds = s[['x1', 'y1', 'x2', 'y2', 'score', 'label']].values
single_res = dict()
for i in range(len(ids)):
id = ids[i]
if id not in single_res:
single_res[id] = []
for el in single_res:
if el not in res_boxes:
res_boxes[el] = []

# Reduce weights if needed
weights = weights[weights != 0]

ids_to_use = sorted(list(res_boxes.keys()))
manager = Manager()
return_dict = manager.dict()
jobs = []
for i in range(procs_to_use):
start = i * len(ids_to_use) // procs_to_use
end = (i+1) * len(ids_to_use) // procs_to_use
if i == procs_to_use - 1:
end = len(ids_to_use)
p = Process(target=process_part_of_data, args=(i, return_dict, ids_to_use[start:end], res_boxes, weights, params))

for i in range(len(jobs)):

results = []
for i in range(len(jobs)):
results += return_dict[i]

# p = Pool(processes=procs_to_use)
# results = p.starmap(process_single_id, zip(ids_to_use, repeat(weights), repeat(params)))

all_ids = []
all_boxes = []
all_scores = []
all_labels = []
for boxes, scores, labels, ids_list in results:
if boxes is None:

all_ids = np.concatenate(all_ids)
all_boxes = np.concatenate(all_boxes)
all_scores = np.concatenate(all_scores)
all_labels = np.concatenate(all_labels)
if verbose:
print(all_ids.shape, all_boxes.shape, all_scores.shape, all_labels.shape)

res = pd.DataFrame(all_ids, columns=['img_id'])
res['label'] = all_labels
res['score'] = all_scores
res['x1'] = all_boxes[:, 0]
res['x2'] = all_boxes[:, 2]
res['y1'] = all_boxes[:, 1]
res['y2'] = all_boxes[:, 3]
print('Run time: {:.2f}'.format(time.time() - start_time))
return res

def ensemble(benchmark_csv, weights, params, get_score_init=True):
if get_score_init:
for bcsv in benchmark_csv:
print('Go for {}'.format(bcsv))

ensemble_preds = ensemble_predictions(benchmark_csv, weights, params)
ensemble_preds.to_csv("ensemble.csv", index=False)

if __name__ == '__main__':
if 0:
params = {
'run_type': 'nms',
'iou_thr': 0.5,
'verbose': True,
if 0:
params = {
'run_type': 'soft-nms',
'iou_thr': 0.5,
'thresh': 0.0001,
'sigma': 0.1,
'verbose': True,
if 0:
params = {
'run_type': 'nmw',
'skip_box_thr': 0.000000001,
'intersection_thr': 0.5,
'limit_boxes': 30000,
'verbose': True,

if 1:
params = {
'run_type': 'wbf',
'skip_box_thr': 0.001,
'intersection_thr': 0.7,
'conf_type': 'avg',
'limit_boxes': 30000,
'verbose': False,

in_dir = './'
benchmark_csv = [
in_dir + 'EffNetB0-preds.csv',
in_dir + 'EffNetB0-mirror-preds.csv',
in_dir + 'EffNetB1-preds.csv',
in_dir + 'EffNetB1-mirror-preds.csv',
in_dir + 'EffNetB2-preds.csv',
in_dir + 'EffNetB2-mirror-preds.csv',
in_dir + 'EffNetB3-preds.csv',
in_dir + 'EffNetB3-mirror-preds.csv',
in_dir + 'EffNetB4-preds.csv',
in_dir + 'EffNetB4-mirror-preds.csv',
in_dir + 'EffNetB5-preds.csv',
in_dir + 'EffNetB5-mirror-preds.csv',
in_dir + 'EffNetB6-preds.csv',
in_dir + 'EffNetB6-mirror-preds.csv',
in_dir + 'EffNetB7-preds.csv',
in_dir + 'EffNetB7-mirror-preds.csv',
in_dir + 'DetRS-valid.csv',
in_dir + 'DetRS-mirror-valid.csv',
in_dir + 'DetRS_resnet50-valid.csv',
in_dir + 'DetRS_resnet50-mirror-valid.csv',
weights = [0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 5, 5, 7, 7, 9, 9, 8, 8, 5, 5]
assert(len(benchmark_csv) == len(weights))
ensemble(benchmark_csv, weights, params, True)

0 comments on commit a0bbe6d

Please sign in to comment.