diff --git a/tools/submission/generate-final-report.py b/tools/submission/generate-final-report.py index 4346249ac..188b62e91 100644 --- a/tools/submission/generate-final-report.py +++ b/tools/submission/generate-final-report.py @@ -92,9 +92,8 @@ def main(): ] columns_order = [['Result'], [ - 'resnet', 'retinanet', '3d-unet-99', - '3d-unet-99.9', 'rnnt', 'bert-99', 'bert-99.9', - 'dlrm-99', 'dlrm-99.9' + 'resnet', 'retinanet', '3d-unet-99', '3d-unet-99.9', + 'rnnt', 'bert-99', 'bert-99.9', 'dlrm-99', 'dlrm-99.9' ], ['SingleStream', 'MultiStream', 'Server', 'Offline'], [ 'Latency (ms)', @@ -105,28 +104,28 @@ def main(): ]] filter_scenarios = { - 'datacenter': { - 'resnet': ['Server', 'Offline'], - 'retinanet': ['Server', 'Offline'], - 'rnnt': ['Server', 'Offline'], - 'bert-99': ['Server', 'Offline'], - 'bert-99.9': ['Server', 'Offline'], - 'dlrm-99': ['Server', 'Offline'], - 'dlrm-99.9': ['Server', 'Offline'], - '3d-unet-99': ['Offline'], - '3d-unet-99.9': ['Offline'], - }, - 'edge': { - 'resnet': ['SingleStream', 'MultiStream', 'Offline'], - 'retinanet': ['SingleStream', 'MultiStream', 'Offline'], - 'rnnt': ['SingleStream', 'Offline'], - 'bert-99': ['SingleStream', 'Offline'], - 'bert-99.9': [], - 'dlrm-99': [], - 'dlrm-99.9': [], - '3d-unet-99': ['SingleStream', 'Offline'], - '3d-unet-99.9': ['SingleStream', 'Offline'], - } + 'datacenter': { + 'resnet': ['Server', 'Offline'], + 'retinanet': ['Server', 'Offline'], + 'rnnt': ['Server', 'Offline'], + 'bert-99': ['Server', 'Offline'], + 'bert-99.9': ['Server', 'Offline'], + 'dlrm-99': ['Server', 'Offline'], + 'dlrm-99.9': ['Server', 'Offline'], + '3d-unet-99': ['Offline'], + '3d-unet-99.9': ['Offline'], + }, + 'edge': { + 'resnet': ['SingleStream', 'MultiStream', 'Offline'], + 'retinanet': ['SingleStream', 'MultiStream', 'Offline'], + 'rnnt': ['SingleStream', 'Offline'], + 'bert-99': ['SingleStream', 'Offline'], + 'bert-99.9': [], + 'dlrm-99': [], + 'dlrm-99.9': [], + '3d-unet-99': ['SingleStream', 'Offline'], + '3d-unet-99.9': ['SingleStream', 'Offline'], + } } def MakeWorksheet(df, index, filter_dict, sheet_name): @@ -158,7 +157,8 @@ def Apply(f, *args): return lambda x: f(x, *args) def FilterScenario(x, suite): - return x.apply(lambda y: y['Scenario'] in filter_scenarios[suite][y['Model']], axis = 1) + return x.apply( + lambda y: y['Scenario'] in filter_scenarios[suite][y['Model']], axis=1) def MakeUniqueID(x): key_list = ['Suite', 'Category', 'Submitter', 'Platform'] diff --git a/tools/submission/submission-checker.py b/tools/submission/submission-checker.py index 481eadec8..0d9eb0fae 100755 --- a/tools/submission/submission-checker.py +++ b/tools/submission/submission-checker.py @@ -1,5 +1,4 @@ -""" -A checker for mlperf inference submissions +"""A checker for mlperf inference submissions """ from __future__ import division @@ -18,7 +17,6 @@ # pylint: disable=missing-docstring - logging.basicConfig(level=logging.INFO) log = logging.getLogger("main") @@ -65,23 +63,30 @@ "schedule_rng_seed": 3622009729038561421, }, "test05_seeds": { - "qsl_rng_seed" : 195, - "sample_index_rng_seed" : 235, - "schedule_rng_seed" : 634, + "qsl_rng_seed": 195, + "sample_index_rng_seed": 235, + "schedule_rng_seed": 634, }, "ignore_errors": [ "check for ERROR in detailed", "Loadgen built with uncommitted changes", - "Ran out of generated queries to issue before the minimum query count and test duration were reached", + "Ran out of generated queries to issue before the minimum query " + "count and test duration were reached", "CAS failed", ], }, "v0.7": { "models": [ - "ssd-small", "ssd-large", "resnet", "rnnt", - "bert-99", "bert-99.9", - "dlrm-99", "dlrm-99.9", - "3d-unet-99", "3d-unet-99.9", + "ssd-small", + "ssd-large", + "resnet", + "rnnt", + "bert-99", + "bert-99.9", + "dlrm-99", + "dlrm-99.9", + "3d-unet-99", + "3d-unet-99.9", ], "required-scenarios-datacenter": { "resnet": ["Offline"], @@ -154,42 +159,102 @@ "schedule_rng_seed": 3135815929913719677, }, "test05_seeds": { - "qsl_rng_seed" : 313588358309856706, - "sample_index_rng_seed" : 471397156132239067, - "schedule_rng_seed" : 413914573387865862, + "qsl_rng_seed": 313588358309856706, + "sample_index_rng_seed": 471397156132239067, + "schedule_rng_seed": 413914573387865862, }, - "ignore_errors": [ - "CAS failed", - ], + "ignore_errors": ["CAS failed",], "latency-constraint": { - "resnet": {"Server": 15000000, "MultiStream": 50000000}, - "ssd-small": {"MultiStream": 50000000}, - "ssd-large": {"Server": 100000000, "MultiStream": 66000000}, - "rnnt": {"Server": 1000000000}, - "bert-99": {"Server": 130000000}, - "bert-99.9": {"Server": 130000000}, - "dlrm-99": {"Server": 30000000}, - "dlrm-99.9": {"Server": 30000000}, + "resnet": { + "Server": 15000000, + "MultiStream": 50000000 + }, + "ssd-small": { + "MultiStream": 50000000 + }, + "ssd-large": { + "Server": 100000000, + "MultiStream": 66000000 + }, + "rnnt": { + "Server": 1000000000 + }, + "bert-99": { + "Server": 130000000 + }, + "bert-99.9": { + "Server": 130000000 + }, + "dlrm-99": { + "Server": 30000000 + }, + "dlrm-99.9": { + "Server": 30000000 + }, }, "min-queries": { - "resnet": {"SingleStream": 1024, "Server": 270336, "MultiStream": 270336, "Offline": 1}, - "ssd-small": {"SingleStream": 1024, "MultiStream": 270336, "Offline": 1}, - "ssd-large": {"SingleStream": 1024, "Server": 270336, "MultiStream": 270336, "Offline": 1}, - "rnnt": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, - "bert-99": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, - "bert-99.9": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, - "dlrm-99": {"Server": 270336, "Offline": 1}, - "dlrm-99.9": {"Server": 270336, "Offline": 1}, - "3d-unet-99": {"SingleStream": 1024, "Offline": 1}, - "3d-unet-99.9": {"SingleStream": 1024, "Offline": 1}, + "resnet": { + "SingleStream": 1024, + "Server": 270336, + "MultiStream": 270336, + "Offline": 1 + }, + "ssd-small": { + "SingleStream": 1024, + "MultiStream": 270336, + "Offline": 1 + }, + "ssd-large": { + "SingleStream": 1024, + "Server": 270336, + "MultiStream": 270336, + "Offline": 1 + }, + "rnnt": { + "SingleStream": 1024, + "Server": 270336, + "Offline": 1 + }, + "bert-99": { + "SingleStream": 1024, + "Server": 270336, + "Offline": 1 + }, + "bert-99.9": { + "SingleStream": 1024, + "Server": 270336, + "Offline": 1 + }, + "dlrm-99": { + "Server": 270336, + "Offline": 1 + }, + "dlrm-99.9": { + "Server": 270336, + "Offline": 1 + }, + "3d-unet-99": { + "SingleStream": 1024, + "Offline": 1 + }, + "3d-unet-99.9": { + "SingleStream": 1024, + "Offline": 1 + }, }, }, "v1.0": { "models": [ - "ssd-small", "ssd-large", "resnet", "rnnt", - "bert-99", "bert-99.9", - "dlrm-99", "dlrm-99.9", - "3d-unet-99", "3d-unet-99.9", + "ssd-small", + "ssd-large", + "resnet", + "rnnt", + "bert-99", + "bert-99.9", + "dlrm-99", + "dlrm-99.9", + "3d-unet-99", + "3d-unet-99.9", ], "required-scenarios-datacenter": { "resnet": ["Offline"], @@ -284,41 +349,102 @@ "schedule_rng_seed": 3507442325620259414, }, "test05_seeds": { - "qsl_rng_seed" : 313588358309856706, - "sample_index_rng_seed" : 471397156132239067, - "schedule_rng_seed" : 413914573387865862, + "qsl_rng_seed": 313588358309856706, + "sample_index_rng_seed": 471397156132239067, + "schedule_rng_seed": 413914573387865862, }, - "ignore_errors": [ - ], + "ignore_errors": [], "latency-constraint": { - "resnet": {"Server": 15000000, "MultiStream": 50000000}, - "ssd-small": {"MultiStream": 50000000}, - "ssd-large": {"Server": 100000000, "MultiStream": 66000000}, - "rnnt": {"Server": 1000000000}, - "bert-99": {"Server": 130000000}, - "bert-99.9": {"Server": 130000000}, - "dlrm-99": {"Server": 30000000}, - "dlrm-99.9": {"Server": 30000000}, + "resnet": { + "Server": 15000000, + "MultiStream": 50000000 + }, + "ssd-small": { + "MultiStream": 50000000 + }, + "ssd-large": { + "Server": 100000000, + "MultiStream": 66000000 + }, + "rnnt": { + "Server": 1000000000 + }, + "bert-99": { + "Server": 130000000 + }, + "bert-99.9": { + "Server": 130000000 + }, + "dlrm-99": { + "Server": 30000000 + }, + "dlrm-99.9": { + "Server": 30000000 + }, }, "min-queries": { - "resnet": {"SingleStream": 1024, "Server": 270336, "MultiStream": 270336, "Offline": 1}, - "ssd-small": {"SingleStream": 1024, "MultiStream": 270336, "Offline": 1}, - "ssd-large": {"SingleStream": 1024, "Server": 270336, "MultiStream": 270336, "Offline": 1}, - "rnnt": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, - "bert-99": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, - "bert-99.9": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, - "dlrm-99": {"Server": 270336, "Offline": 1}, - "dlrm-99.9": {"Server": 270336, "Offline": 1}, - "3d-unet-99": {"SingleStream": 1024, "Offline": 1}, - "3d-unet-99.9": {"SingleStream": 1024, "Offline": 1}, + "resnet": { + "SingleStream": 1024, + "Server": 270336, + "MultiStream": 270336, + "Offline": 1 + }, + "ssd-small": { + "SingleStream": 1024, + "MultiStream": 270336, + "Offline": 1 + }, + "ssd-large": { + "SingleStream": 1024, + "Server": 270336, + "MultiStream": 270336, + "Offline": 1 + }, + "rnnt": { + "SingleStream": 1024, + "Server": 270336, + "Offline": 1 + }, + "bert-99": { + "SingleStream": 1024, + "Server": 270336, + "Offline": 1 + }, + "bert-99.9": { + "SingleStream": 1024, + "Server": 270336, + "Offline": 1 + }, + "dlrm-99": { + "Server": 270336, + "Offline": 1 + }, + "dlrm-99.9": { + "Server": 270336, + "Offline": 1 + }, + "3d-unet-99": { + "SingleStream": 1024, + "Offline": 1 + }, + "3d-unet-99.9": { + "SingleStream": 1024, + "Offline": 1 + }, }, }, "v1.1": { "models": [ - "ssd-small", "ssd-large", "resnet", "rnnt", - "bert-99", "bert-99.9", - "dlrm-99", "dlrm-99.9", - "3d-unet-99", "3d-unet-99.9", + "ssd-small", + "ssd-large", + "resnet", + "rnnt", + "bert-99", + "bert-99.9", + "dlrm-99", + "dlrm-99.9", + "3d-unet-99", + "3d-unet-99.9", ], "required-scenarios-datacenter": { "resnet": ["Offline"], @@ -407,41 +533,99 @@ "schedule_rng_seed": 10051496985653635065, }, "test05_seeds": { - "qsl_rng_seed" : 313588358309856706, - "sample_index_rng_seed" : 471397156132239067, - "schedule_rng_seed" : 413914573387865862, + "qsl_rng_seed": 313588358309856706, + "sample_index_rng_seed": 471397156132239067, + "schedule_rng_seed": 413914573387865862, }, - "ignore_errors": [ - ], + "ignore_errors": [], "latency-constraint": { - "resnet": {"Server": 15000000, "MultiStream": 50000000}, - "ssd-small": {"MultiStream": 50000000}, - "ssd-large": {"Server": 100000000, "MultiStream": 66000000}, - "rnnt": {"Server": 1000000000}, - "bert-99": {"Server": 130000000}, - "bert-99.9": {"Server": 130000000}, - "dlrm-99": {"Server": 30000000}, - "dlrm-99.9": {"Server": 30000000}, + "resnet": { + "Server": 15000000, + "MultiStream": 50000000 + }, + "ssd-small": { + "MultiStream": 50000000 + }, + "ssd-large": { + "Server": 100000000, + "MultiStream": 66000000 + }, + "rnnt": { + "Server": 1000000000 + }, + "bert-99": { + "Server": 130000000 + }, + "bert-99.9": { + "Server": 130000000 + }, + "dlrm-99": { + "Server": 30000000 + }, + "dlrm-99.9": { + "Server": 30000000 + }, }, "min-queries": { - "resnet": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, - "ssd-small": {"SingleStream": 1024, "Offline": 1}, - "ssd-large": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, - "rnnt": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, - "bert-99": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, - "bert-99.9": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, - "dlrm-99": {"Server": 270336, "Offline": 1}, - "dlrm-99.9": {"Server": 270336, "Offline": 1}, - "3d-unet-99": {"SingleStream": 1024, "Offline": 1}, - "3d-unet-99.9": {"SingleStream": 1024, "Offline": 1}, + "resnet": { + "SingleStream": 1024, + "Server": 270336, + "Offline": 1 + }, + "ssd-small": { + "SingleStream": 1024, + "Offline": 1 + }, + "ssd-large": { + "SingleStream": 1024, + "Server": 270336, + "Offline": 1 + }, + "rnnt": { + "SingleStream": 1024, + "Server": 270336, + "Offline": 1 + }, + "bert-99": { + "SingleStream": 1024, + "Server": 270336, + "Offline": 1 + }, + "bert-99.9": { + "SingleStream": 1024, + "Server": 270336, + "Offline": 1 + }, + "dlrm-99": { + "Server": 270336, + "Offline": 1 + }, + "dlrm-99.9": { + "Server": 270336, + "Offline": 1 + }, + "3d-unet-99": { + "SingleStream": 1024, + "Offline": 1 + }, + "3d-unet-99.9": { + "SingleStream": 1024, + "Offline": 1 + }, }, }, "v2.0": { "models": [ - "ssd-small", "ssd-large", "resnet", "rnnt", - "bert-99", "bert-99.9", - "dlrm-99", "dlrm-99.9", - "3d-unet-99", "3d-unet-99.9", + "ssd-small", + "ssd-large", + "resnet", + "rnnt", + "bert-99", + "bert-99.9", + "dlrm-99", + "dlrm-99.9", + "3d-unet-99", + "3d-unet-99.9", ], # FIXME: required/optional scenarios for v2.0 needs to be filled up correctly; below lists are temporary "required-scenarios-datacenter": { @@ -455,8 +639,7 @@ "3d-unet-99": ["Offline"], "3d-unet-99.9": ["Offline"], }, - "optional-scenarios-datacenter": { - }, + "optional-scenarios-datacenter": {}, "required-scenarios-edge": { "resnet": ["SingleStream", "MultiStream", "Offline"], "ssd-small": ["SingleStream", "MultiStream", "Offline"], @@ -466,8 +649,7 @@ "3d-unet-99": ["SingleStream", "Offline"], "3d-unet-99.9": ["SingleStream", "Offline"], }, - "optional-scenarios-edge": { - }, + "optional-scenarios-edge": {}, "required-scenarios-datacenter-edge": { "resnet": ["SingleStream", "Offline", "MultiStream", "Server"], "ssd-small": ["SingleStream", "Offline", "MultiStream"], @@ -480,8 +662,7 @@ "3d-unet-99": ["SingleStream", "Offline"], "3d-unet-99.9": ["SingleStream", "Offline"], }, - "optional-scenarios-datacenter-edge": { - }, + "optional-scenarios-datacenter-edge": {}, "accuracy-target": { "resnet": ("acc", 76.46 * 0.99), "ssd-small": ("mAP", 22 * 0.99), @@ -536,40 +717,96 @@ "schedule_rng_seed": 12662793979680847247, }, "test05_seeds": { - "qsl_rng_seed" : 313588358309856706, - "sample_index_rng_seed" : 471397156132239067, - "schedule_rng_seed" : 413914573387865862, + "qsl_rng_seed": 313588358309856706, + "sample_index_rng_seed": 471397156132239067, + "schedule_rng_seed": 413914573387865862, }, - "ignore_errors": [ - ], + "ignore_errors": [], "latency-constraint": { - "resnet": {"Server": 15000000}, - "ssd-large": {"Server": 100000000}, - "rnnt": {"Server": 1000000000}, - "bert-99": {"Server": 130000000}, - "bert-99.9": {"Server": 130000000}, - "dlrm-99": {"Server": 30000000}, - "dlrm-99.9": {"Server": 30000000}, + "resnet": { + "Server": 15000000 + }, + "ssd-large": { + "Server": 100000000 + }, + "rnnt": { + "Server": 1000000000 + }, + "bert-99": { + "Server": 130000000 + }, + "bert-99.9": { + "Server": 130000000 + }, + "dlrm-99": { + "Server": 30000000 + }, + "dlrm-99.9": { + "Server": 30000000 + }, }, "min-queries": { - "resnet": {"SingleStream": 1024, "MultiStream": 270336, "Server": 270336, "Offline": 1}, - "ssd-small": {"SingleStream": 1024, "MultiStream": 270336, "Offline": 1}, - "ssd-large": {"SingleStream": 1024, "MultiStream": 270336, "Server": 270336, "Offline": 1}, - "rnnt": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, - "bert-99": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, - "bert-99.9": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, - "dlrm-99": {"Server": 270336, "Offline": 1}, - "dlrm-99.9": {"Server": 270336, "Offline": 1}, - "3d-unet-99": {"SingleStream": 1024, "Offline": 1}, - "3d-unet-99.9": {"SingleStream": 1024, "Offline": 1}, + "resnet": { + "SingleStream": 1024, + "MultiStream": 270336, + "Server": 270336, + "Offline": 1 + }, + "ssd-small": { + "SingleStream": 1024, + "MultiStream": 270336, + "Offline": 1 + }, + "ssd-large": { + "SingleStream": 1024, + "MultiStream": 270336, + "Server": 270336, + "Offline": 1 + }, + "rnnt": { + "SingleStream": 1024, + "Server": 270336, + "Offline": 1 + }, + "bert-99": { + "SingleStream": 1024, + "Server": 270336, + "Offline": 1 + }, + "bert-99.9": { + "SingleStream": 1024, + "Server": 270336, + "Offline": 1 + }, + "dlrm-99": { + "Server": 270336, + "Offline": 1 + }, + "dlrm-99.9": { + "Server": 270336, + "Offline": 1 + }, + "3d-unet-99": { + "SingleStream": 1024, + "Offline": 1 + }, + "3d-unet-99.9": { + "SingleStream": 1024, + "Offline": 1 + }, }, }, "v2.1": { "models": [ - "resnet", "retinanet", "rnnt", - "bert-99", "bert-99.9", - "dlrm-99", "dlrm-99.9", - "3d-unet-99", "3d-unet-99.9", + "resnet", + "retinanet", + "rnnt", + "bert-99", + "bert-99.9", + "dlrm-99", + "dlrm-99.9", + "3d-unet-99", + "3d-unet-99.9", ], "required-scenarios-datacenter": { "resnet": ["Server", "Offline"], @@ -582,8 +819,7 @@ "3d-unet-99": ["Offline"], "3d-unet-99.9": ["Offline"], }, - "optional-scenarios-datacenter": { - }, + "optional-scenarios-datacenter": {}, "required-scenarios-edge": { "resnet": ["SingleStream", "MultiStream", "Offline"], "retinanet": ["SingleStream", "MultiStream", "Offline"], @@ -592,8 +828,7 @@ "3d-unet-99": ["SingleStream", "Offline"], "3d-unet-99.9": ["SingleStream", "Offline"], }, - "optional-scenarios-edge": { - }, + "optional-scenarios-edge": {}, "required-scenarios-datacenter-edge": { "resnet": ["SingleStream", "Offline", "MultiStream", "Server"], "retinanet": ["SingleStream", "Offline", "MultiStream", "Server"], @@ -605,8 +840,7 @@ "3d-unet-99": ["SingleStream", "Offline"], "3d-unet-99.9": ["SingleStream", "Offline"], }, - "optional-scenarios-datacenter-edge": { - }, + "optional-scenarios-datacenter-edge": {}, "accuracy-target": { "resnet": ("acc", 76.46 * 0.99), "retinanet": ("mAP", 37.55 * 0.99), @@ -661,31 +895,78 @@ "schedule_rng_seed": 299063814864929621, }, "test05_seeds": { - "qsl_rng_seed" : 313588358309856706, - "sample_index_rng_seed" : 471397156132239067, - "schedule_rng_seed" : 413914573387865862, + "qsl_rng_seed": 313588358309856706, + "sample_index_rng_seed": 471397156132239067, + "schedule_rng_seed": 413914573387865862, }, - "ignore_errors": [ - ], + "ignore_errors": [], "latency-constraint": { - "resnet": {"Server": 15000000}, - "retinanet": {"Server": 100000000}, - "rnnt": {"Server": 1000000000}, - "bert-99": {"Server": 130000000}, - "bert-99.9": {"Server": 130000000}, - "dlrm-99": {"Server": 30000000}, - "dlrm-99.9": {"Server": 30000000}, + "resnet": { + "Server": 15000000 + }, + "retinanet": { + "Server": 100000000 + }, + "rnnt": { + "Server": 1000000000 + }, + "bert-99": { + "Server": 130000000 + }, + "bert-99.9": { + "Server": 130000000 + }, + "dlrm-99": { + "Server": 30000000 + }, + "dlrm-99.9": { + "Server": 30000000 + }, }, "min-queries": { - "resnet": {"SingleStream": 1024, "MultiStream": 270336, "Server": 270336, "Offline": 1}, - "retinanet": {"SingleStream": 1024, "MultiStream": 270336, "Server": 270336, "Offline": 1}, - "rnnt": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, - "bert-99": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, - "bert-99.9": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, - "dlrm-99": {"Server": 270336, "Offline": 1}, - "dlrm-99.9": {"Server": 270336, "Offline": 1}, - "3d-unet-99": {"SingleStream": 1024, "Offline": 1}, - "3d-unet-99.9": {"SingleStream": 1024, "Offline": 1}, + "resnet": { + "SingleStream": 1024, + "MultiStream": 270336, + "Server": 270336, + "Offline": 1 + }, + "retinanet": { + "SingleStream": 1024, + "MultiStream": 270336, + "Server": 270336, + "Offline": 1 + }, + "rnnt": { + "SingleStream": 1024, + "Server": 270336, + "Offline": 1 + }, + "bert-99": { + "SingleStream": 1024, + "Server": 270336, + "Offline": 1 + }, + "bert-99.9": { + "SingleStream": 1024, + "Server": 270336, + "Offline": 1 + }, + "dlrm-99": { + "Server": 270336, + "Offline": 1 + }, + "dlrm-99.9": { + "Server": 270336, + "Offline": 1 + }, + "3d-unet-99": { + "SingleStream": 1024, + "Offline": 1 + }, + "3d-unet-99.9": { + "SingleStream": 1024, + "Offline": 1 + }, }, }, } @@ -695,8 +976,13 @@ REQUIRED_PERF_FILES = ["mlperf_log_summary.txt", "mlperf_log_detail.txt"] OPTIONAL_PERF_FILES = ["mlperf_log_accuracy.json"] REQUIRED_PERF_POWER_FILES = ["spl.txt"] -REQUIRED_POWER_FILES = ["client.json", "client.log", "ptd_logs.txt", "server.json", "server.log"] -REQUIRED_ACC_FILES = ["mlperf_log_summary.txt", "mlperf_log_detail.txt", "accuracy.txt", "mlperf_log_accuracy.json"] +REQUIRED_POWER_FILES = [ + "client.json", "client.log", "ptd_logs.txt", "server.json", "server.log" +] +REQUIRED_ACC_FILES = [ + "mlperf_log_summary.txt", "mlperf_log_detail.txt", "accuracy.txt", + "mlperf_log_accuracy.json" +] REQUIRED_MEASURE_FILES = ["mlperf.conf", "user.conf", "README.md"] MS_TO_NS = 1000 * 1000 S_TO_MS = 1000 @@ -706,7 +992,9 @@ TEST_DURATION_MS = 600000 REQUIRED_COMP_PER_FILES = ["mlperf_log_summary.txt", "mlperf_log_detail.txt"] REQUIRED_TEST01_ACC_FILES_1 = ["mlperf_log_accuracy.json", "accuracy.txt"] -REQUIRED_TEST01_ACC_FILES = REQUIRED_TEST01_ACC_FILES_1 + ["baseline_accuracy.txt", "compliance_accuracy.txt"] +REQUIRED_TEST01_ACC_FILES = REQUIRED_TEST01_ACC_FILES_1 + [ + "baseline_accuracy.txt", "compliance_accuracy.txt" +] SCENARIO_MAPPING = { "singlestream": "SingleStream", @@ -785,44 +1073,59 @@ } SYSTEM_DESC_REQUIRED_FIELDS = [ - "division", "submitter", "status", "system_name", "number_of_nodes", "host_processor_model_name", - "host_processors_per_node", "host_processor_core_count", "host_memory_capacity", "host_storage_capacity", - "host_storage_type", "accelerators_per_node", "accelerator_model_name", "accelerator_memory_capacity", - "framework", "operating_system" + "division", "submitter", "status", "system_name", "number_of_nodes", + "host_processor_model_name", "host_processors_per_node", + "host_processor_core_count", "host_memory_capacity", + "host_storage_capacity", "host_storage_type", "accelerators_per_node", + "accelerator_model_name", "accelerator_memory_capacity", "framework", + "operating_system" ] SYSTEM_DESC_REQUIED_FIELDS_SINCE_V1 = [ - "system_type", "other_software_stack", "host_processor_frequency", "host_processor_caches", - "host_memory_configuration", "host_processor_interconnect", "host_networking", "host_networking_topology", - "accelerator_frequency", "accelerator_host_interconnect", "accelerator_interconnect", + "system_type", "other_software_stack", "host_processor_frequency", + "host_processor_caches", "host_memory_configuration", + "host_processor_interconnect", "host_networking", + "host_networking_topology", "accelerator_frequency", + "accelerator_host_interconnect", "accelerator_interconnect", "accelerator_interconnect_topology", "accelerator_memory_configuration", "accelerator_on-chip_memories", "cooling", "hw_notes", "sw_notes" ] SYSTEM_DESC_REQUIED_FIELDS_POWER = [ - "power_management", "filesystem", "boot_firmware_version", "management_firmware_version", "other_hardware", - "number_of_type_nics_installed", "nics_enabled_firmware", "nics_enabled_os", "nics_enabled_connected", - "network_speed_mbit", "power_supply_quantity_and_rating_watts", "power_supply_details", "disk_drives", - "disk_controllers" + "power_management", "filesystem", "boot_firmware_version", + "management_firmware_version", "other_hardware", + "number_of_type_nics_installed", "nics_enabled_firmware", "nics_enabled_os", + "nics_enabled_connected", "network_speed_mbit", + "power_supply_quantity_and_rating_watts", "power_supply_details", + "disk_drives", "disk_controllers" ] SYSTEM_DESC_IS_NETWORK_MODE = "is_network" SYSTEM_DESC_REQUIRED_FIELDS_NETWORK_MODE = [ - SYSTEM_DESC_IS_NETWORK_MODE, "network_type", "network_media", "network_rate", "nic_loadgen", - "number_nic_loadgen", "net_software_stack_loadgen", "network_protocol", "number_connections", "nic_sut", - "number_nic_sut", "net_software_stack_sut", "network_topology" + SYSTEM_DESC_IS_NETWORK_MODE, "network_type", "network_media", + "network_rate", "nic_loadgen", "number_nic_loadgen", + "net_software_stack_loadgen", "network_protocol", "number_connections", + "nic_sut", "number_nic_sut", "net_software_stack_sut", "network_topology" ] NETWORK_MODE_REQUIRED_SUBSTRING_IN_SUT_NAME = "Network SUT" SYSTEM_IMP_REQUIRED_FILES = [ - "input_data_types", "retraining", "starting_weights_filename", "weight_data_types", + "input_data_types", + "retraining", + "starting_weights_filename", + "weight_data_types", "weight_transformations", ] class Config(): """Select config value by mlperf version and submission type.""" - def __init__(self, version, extra_model_benchmark_map, ignore_uncommited=False, more_power_check=False): + + def __init__(self, + version, + extra_model_benchmark_map, + ignore_uncommited=False, + more_power_check=False): self.base = MODEL_CONFIG.get(version) self.set_extra_model_benchmark_map(extra_model_benchmark_map) self.version = version @@ -840,9 +1143,9 @@ def __init__(self, version, extra_model_benchmark_map, ignore_uncommited=False, def set_extra_model_benchmark_map(self, extra_model_benchmark_map): if extra_model_benchmark_map: - for mapping in extra_model_benchmark_map.split(';'): - model_name, mlperf_model = mapping.split(':') - self.base['model_mapping'][model_name] = mlperf_model + for mapping in extra_model_benchmark_map.split(";"): + model_name, mlperf_model = mapping.split(":") + self.base["model_mapping"][model_name] = mlperf_model def set_type(self, submission_type): if submission_type is None and self.version in ["v0.5"]: @@ -918,7 +1221,8 @@ def ignore_errors(self, line): for error in self.base["ignore_errors"]: if error in line: return True - if self.ignore_uncommited and "ERROR : Loadgen built with uncommitted changes!" in line: + if self.ignore_uncommited and ("ERROR : Loadgen built with uncommitted " + "changes!") in line: return True return False @@ -934,33 +1238,47 @@ def has_new_logging_format(self): def uses_legacy_multistream(self): return self.version in ["v0.5", "v0.7", "v1.0", "v1.1"] - def uses_early_stopping(self, scenario): - return (self.version not in ["v0.5", "v0.7", "v1.0", "v1.1"]) and ( - scenario in ["Server", "SingleStream", "MultiStream"] - ) + return (self.version not in [ + "v0.5", "v0.7", "v1.0", "v1.1" + ]) and (scenario in ["Server", "SingleStream", "MultiStream"]) def has_query_count_in_log(self): return self.version not in ["v0.5", "v0.7", "v1.0", "v1.1"] - def has_power_utc_timestamps(self): return self.version not in ["v0.5", "v0.7", "v1.0"] - def get_args(): """Parse commandline.""" parser = argparse.ArgumentParser() parser.add_argument("--input", required=True, help="submission directory") - parser.add_argument("--version", default="v2.1", choices=list(MODEL_CONFIG.keys()), help="mlperf version") + parser.add_argument( + "--version", + default="v2.1", + choices=list(MODEL_CONFIG.keys()), + help="mlperf version") parser.add_argument("--submitter", help="filter to submitter") - parser.add_argument("--csv", default="summary.csv", help="csv file with results") - parser.add_argument("--skip_compliance", action="store_true", help="Pass this cmdline option to skip checking compliance/ dir") - parser.add_argument("--extra-model-benchmark-map", help="extra model name to benchmark mapping") + parser.add_argument( + "--csv", default="summary.csv", help="csv file with results") + parser.add_argument( + "--skip_compliance", + action="store_true", + help="Pass this cmdline option to skip checking compliance/ dir") + parser.add_argument( + "--extra-model-benchmark-map", + help="extra model name to benchmark mapping") parser.add_argument("--debug", action="store_true", help="extra debug output") - parser.add_argument("--submission-exceptions", action="store_true", help="ignore certain errors for submission") - parser.add_argument("--more-power-check", action="store_true", help="apply Power WG's check.py script on each power submission. Requires Python 3.7+") + parser.add_argument( + "--submission-exceptions", + action="store_true", + help="ignore certain errors for submission") + parser.add_argument( + "--more-power-check", + action="store_true", + help="apply Power WG's check.py script on each power submission. Requires Python 3.7+" + ) args = parser.parse_args() return args @@ -1034,7 +1352,8 @@ def check_accuracy_dir(config, model, path, verbose): if acc and float(acc) >= acc_target: is_valid = True elif verbose: - log.warning("%s accuracy not met: expected=%f, found=%s", path, acc_target, acc) + log.warning("%s accuracy not met: expected=%f, found=%s", path, acc_target, + acc) if not hash_val: log.error("%s not hash value for mlperf_log_accuracy.json", path) @@ -1058,7 +1377,8 @@ def check_accuracy_dir(config, model, path, verbose): return is_valid, acc -def check_performance_dir(config, model, path, scenario_fixed, division, system_json): +def check_performance_dir(config, model, path, scenario_fixed, division, + system_json): is_valid = False rt = {} @@ -1066,7 +1386,8 @@ def check_performance_dir(config, model, path, scenario_fixed, division, system_ if config.has_new_logging_format(): fname = os.path.join(path, "mlperf_log_detail.txt") mlperf_log = MLPerfLog(fname) - if "result_validity" in mlperf_log.get_keys() and mlperf_log["result_validity"] == "VALID": + if "result_validity" in mlperf_log.get_keys( + ) and mlperf_log["result_validity"] == "VALID": is_valid = True performance_sample_count = mlperf_log["effective_performance_sample_count"] qsl_rng_seed = mlperf_log["effective_qsl_rng_seed"] @@ -1079,14 +1400,16 @@ def check_performance_dir(config, model, path, scenario_fixed, division, system_ latency_99_percentile = mlperf_log["result_99.00_percentile_latency_ns"] latency_mean = mlperf_log["result_mean_latency_ns"] if scenario in ["MultiStream"]: - latency_99_percentile = mlperf_log["result_99.00_percentile_per_query_latency_ns"] + latency_99_percentile = mlperf_log[ + "result_99.00_percentile_per_query_latency_ns"] latency_mean = mlperf_log["result_mean_query_latency_ns"] min_query_count = mlperf_log["effective_min_query_count"] samples_per_query = mlperf_log["effective_samples_per_query"] min_duration = mlperf_log["effective_min_duration_ms"] if scenario == "SingleStream": # qps_wo_loadgen_overhead is only used for inferring Offline from SingleStream; only for old submissions - qps_wo_loadgen_overhead = mlperf_log["result_qps_without_loadgen_overhead"] + qps_wo_loadgen_overhead = mlperf_log[ + "result_qps_without_loadgen_overhead"] sut_name = mlperf_log["sut_name"] else: fname = os.path.join(path, "mlperf_log_summary.txt") @@ -1098,20 +1421,20 @@ def check_performance_dir(config, model, path, scenario_fixed, division, system_ m = re.match(r"^\s*([\w\s.\(\)\/]+)\s*\:\s*([\w\+\.][\w\+\.\s]*)", line) if m: rt[m.group(1).strip()] = m.group(2).strip() - performance_sample_count = int(rt['performance_sample_count']) + performance_sample_count = int(rt["performance_sample_count"]) qsl_rng_seed = int(rt["qsl_rng_seed"]) sample_index_rng_seed = int(rt["sample_index_rng_seed"]) schedule_rng_seed = int(rt["schedule_rng_seed"]) - scenario = rt["Scenario"].replace(" ","") + scenario = rt["Scenario"].replace(" ", "") res = float(rt[RESULT_FIELD[scenario]]) - latency_99_percentile = int(rt['99.00 percentile latency (ns)']) - latency_mean = int(rt['Mean latency (ns)']) - min_query_count = int(rt['min_query_count']) - samples_per_query = int(rt['samples_per_query']) + latency_99_percentile = int(rt["99.00 percentile latency (ns)"]) + latency_mean = int(rt["Mean latency (ns)"]) + min_query_count = int(rt["min_query_count"]) + samples_per_query = int(rt["samples_per_query"]) min_duration = int(rt["min_duration (ms)"]) if scenario == "SingleStream": qps_wo_loadgen_overhead = float(rt["QPS w/o loadgen overhead"]) - sut_name = str(rt['System Under Test (SUT) name: ']) + sut_name = str(rt["System Under Test (SUT) name: "]) # check if there are any errors in the detailed log fname = os.path.join(path, "mlperf_log_detail.txt") @@ -1120,19 +1443,23 @@ def check_performance_dir(config, model, path, scenario_fixed, division, system_ required_performance_sample_count = config.get_performance_sample_count(model) if performance_sample_count < required_performance_sample_count: - log.error("%s performance_sample_count, found %d, needs to be >= %d", - fname, performance_sample_count, required_performance_sample_count) + log.error("%s performance_sample_count, found %d, needs to be >= %d", fname, + performance_sample_count, required_performance_sample_count) is_valid = False config_seeds = config.seeds if "TEST05" not in fname else config.test05_seeds if qsl_rng_seed != config_seeds["qsl_rng_seed"]: - log.error("%s qsl_rng_seed is wrong, expected=%s, found=%s", fname, config_seeds["qsl_rng_seed"], qsl_rng_seed) + log.error("%s qsl_rng_seed is wrong, expected=%s, found=%s", fname, + config_seeds["qsl_rng_seed"], qsl_rng_seed) if sample_index_rng_seed != config_seeds["sample_index_rng_seed"]: - log.error("%s sample_index_rng_seed is wrong, expected=%s, found=%s", fname, config_seeds["sample_index_rng_seed"], sample_index_rng_seed) + log.error("%s sample_index_rng_seed is wrong, expected=%s, found=%s", fname, + config_seeds["sample_index_rng_seed"], sample_index_rng_seed) if schedule_rng_seed != config_seeds["schedule_rng_seed"]: - log.error("%s schedule_rng_seed is wrong, expected=%s, found=%s", fname, config_seeds["schedule_rng_seed"], schedule_rng_seed) + log.error("%s schedule_rng_seed is wrong, expected=%s, found=%s", fname, + config_seeds["schedule_rng_seed"], schedule_rng_seed) - if scenario == "SingleStream" or (scenario == "MultiStream" and not config.uses_legacy_multistream()): + if scenario == "SingleStream" or (scenario == "MultiStream" and + not config.uses_legacy_multistream()): res /= MS_TO_NS # Check if current scenario (and version) uses early stopping @@ -1146,28 +1473,33 @@ def check_performance_dir(config, model, path, scenario_fixed, division, system_ # check if early_stopping condition was met if not mlperf_log["early_stopping_met"]: early_stopping_result = mlperf_log["early_stopping_result"] - log.error("Early stopping condition was not met, msg=%s", early_stopping_result) + log.error("Early stopping condition was not met, msg=%s", + early_stopping_result) # If the scenario has a target latency (Server scenario), check # that the target latency that was passed to the early stopping # is less than the target latency. - target_latency = config.latency_constraint.get(model, dict()).get(scenario) + target_latency = config.latency_constraint.get(model, + dict()).get(scenario) if target_latency: early_stopping_latency_ns = mlperf_log["effective_target_latency_ns"] log.info("Target latency: %s, Early Stopping Latency: %s, Scenario: %s", - target_latency, early_stopping_latency_ns, scenario) + target_latency, early_stopping_latency_ns, scenario) if early_stopping_latency_ns > target_latency: - log.error("%s Latency constraint with early stopping not met, expected=%s, found=%s", - fname, target_latency, early_stopping_latency_ns) + log.error( + "%s Latency constraint with early stopping not met, expected=%s, found=%s", + fname, target_latency, early_stopping_latency_ns) else: # check if the benchmark meets latency constraint - target_latency = config.latency_constraint.get(model, dict()).get(scenario) - log.info("Target latency: %s, Latency: %s, Scenario: %s", target_latency, latency_99_percentile, scenario) + target_latency = config.latency_constraint.get(model, + dict()).get(scenario) + log.info("Target latency: %s, Latency: %s, Scenario: %s", target_latency, + latency_99_percentile, scenario) if target_latency: if latency_99_percentile > target_latency: log.error("%s Latency constraint not met, expected=%s, found=%s", - fname, target_latency, latency_99_percentile) + fname, target_latency, latency_99_percentile) # Check Minimum queries were issued to meet test duration # Check if this run uses early stopping. If it does, get the @@ -1176,18 +1508,23 @@ def check_performance_dir(config, model, path, scenario_fixed, division, system_ if not uses_early_stopping: required_min_query_count = config.get_min_query_count(model, scenario) if required_min_query_count and min_query_count < required_min_query_count: - log.error("%s Required minimum Query Count not met by user config, Expected=%s, Found=%s", - fname, required_min_query_count, min_query_count) + log.error( + "%s Required minimum Query Count not met by user config, Expected=%s, Found=%s", + fname, required_min_query_count, min_query_count) if scenario == "Offline" and (samples_per_query < OFFLINE_MIN_SPQ): - log.error("%s Required minimum samples per query not met by user config, Expected=%s, Found=%s", - fname, OFFLINE_MIN_SPQ, samples_per_query) + log.error( + "%s Required minimum samples per query not met by user config, Expected=%s, Found=%s", + fname, OFFLINE_MIN_SPQ, samples_per_query) # Test duration of 600s is met - required_min_duration = TEST_DURATION_MS_PRE_1_0 if config.version in ["v0.5", "v0.7"] else TEST_DURATION_MS + required_min_duration = TEST_DURATION_MS_PRE_1_0 if config.version in [ + "v0.5", "v0.7" + ] else TEST_DURATION_MS if min_duration < required_min_duration: - log.error("%s Test duration lesser than 600s in user config. expected=%s, found=%s", - fname, required_min_duration, min_duration) + log.error( + "%s Test duration lesser than 600s in user config. expected=%s, found=%s", + fname, required_min_duration, min_duration) inferred = False # special case for results inferred from different scenario @@ -1195,11 +1532,13 @@ def check_performance_dir(config, model, path, scenario_fixed, division, system_ inferred = True res = qps_wo_loadgen_overhead - if (scenario_fixed in ["Offline"] and not config.uses_legacy_multistream()) and scenario in ["MultiStream"]: + if (scenario_fixed in ["Offline"] and + not config.uses_legacy_multistream()) and scenario in ["MultiStream"]: inferred = True res = samples_per_query * S_TO_MS / (latency_mean / MS_TO_NS) - if (scenario_fixed in ["MultiStream"] and not config.uses_legacy_multistream()) and scenario in ["SingleStream"]: + if (scenario_fixed in ["MultiStream"] and + not config.uses_legacy_multistream()) and scenario in ["SingleStream"]: inferred = True # samples_per_query does not match with the one reported in the logs # when inferring MultiStream from SingleStream @@ -1207,25 +1546,30 @@ def check_performance_dir(config, model, path, scenario_fixed, division, system_ if uses_early_stopping: early_stopping_latency_ms = mlperf_log["early_stopping_latency_ms"] if early_stopping_latency_ms == 0: - log.error("Not enough samples were processed for early stopping to make an estimate") + log.error( + "Not enough samples were processed for early stopping to make an estimate" + ) is_valid = False res = (early_stopping_latency_ms * samples_per_query) / MS_TO_NS else: res = (latency_99_percentile * samples_per_query) / MS_TO_NS - is_network_system, is_network_mode_valid = is_system_over_network(division, system_json, path) + is_network_system, is_network_mode_valid = is_system_over_network( + division, system_json, path) is_valid &= is_network_mode_valid if is_network_system: # for network mode verify the SUT name is valid, accodring to the rules (must include "Network SUT" in name) if NETWORK_MODE_REQUIRED_SUBSTRING_IN_SUT_NAME not in sut_name: log.error( - f"{fname} invalid sut name for network mode. expecting the substring '{NETWORK_MODE_REQUIRED_SUBSTRING_IN_SUT_NAME}' got '{sut_name}'") + f"{fname} invalid sut name for network mode. expecting the substring '{NETWORK_MODE_REQUIRED_SUBSTRING_IN_SUT_NAME}' got '{sut_name}'" + ) is_valid = False return is_valid, res, inferred -def check_power_dir(power_path, ranging_path, testing_path, scenario_fixed, config): +def check_power_dir(power_path, ranging_path, testing_path, scenario_fixed, + config): more_power_check = config.more_power_check @@ -1234,11 +1578,13 @@ def check_power_dir(power_path, ranging_path, testing_path, scenario_fixed, conf # check if all the required files are present required_files = REQUIRED_PERF_FILES + REQUIRED_PERF_POWER_FILES - diff = files_diff(list_files(testing_path), required_files, OPTIONAL_PERF_FILES) + diff = files_diff( + list_files(testing_path), required_files, OPTIONAL_PERF_FILES) if diff: log.error("%s has file list mismatch (%s)", testing_path, diff) is_valid = False - diff = files_diff(list_files(ranging_path), required_files, OPTIONAL_PERF_FILES) + diff = files_diff( + list_files(ranging_path), required_files, OPTIONAL_PERF_FILES) if diff: log.error("%s has file list mismatch (%s)", ranging_path, diff) is_valid = False @@ -1260,9 +1606,11 @@ def check_power_dir(power_path, ranging_path, testing_path, scenario_fixed, conf client_timezone = datetime.timedelta(seconds=json.load(f)["timezone"]) detail_log_fname = os.path.join(testing_path, "mlperf_log_detail.txt") mlperf_log = MLPerfLog(detail_log_fname) - datetime_format = '%m-%d-%Y %H:%M:%S.%f' - power_begin = datetime.datetime.strptime(mlperf_log["power_begin"], datetime_format) + client_timezone - power_end = datetime.datetime.strptime(mlperf_log["power_end"], datetime_format) + client_timezone + datetime_format = "%m-%d-%Y %H:%M:%S.%f" + power_begin = datetime.datetime.strptime(mlperf_log["power_begin"], + datetime_format) + client_timezone + power_end = datetime.datetime.strptime(mlperf_log["power_end"], + datetime_format) + client_timezone # Obtain the scenario also from logs to check if power is inferred if config.has_new_logging_format(): scenario = mlperf_log["effective_scenario"] @@ -1277,16 +1625,18 @@ def check_power_dir(power_path, ranging_path, testing_path, scenario_fixed, conf m = re.match(r"^\s*([\w\s.\(\)\/]+)\s*\:\s*([\w\+\.][\w\+\.\s]*)", line) if m: rt[m.group(1).strip()] = m.group(2).strip() - scenario = rt["Scenario"].replace(" ","") + scenario = rt["Scenario"].replace(" ", "") spl_fname = os.path.join(testing_path, "spl.txt") power_list = [] with open(spl_fname) as f: for line in f: - timestamp = datetime.datetime.strptime(line.split(",")[1], datetime_format) + server_timezone + timestamp = datetime.datetime.strptime( + line.split(",")[1], datetime_format) + server_timezone if timestamp > power_begin and timestamp < power_end: power_list.append(float(line.split(",")[3])) if len(power_list) == 0: - log.error("%s has no power samples falling in power range: %s - %s", spl_fname, power_begin, power_end) + log.error("%s has no power samples falling in power range: %s - %s", + spl_fname, power_begin, power_end) is_valid = False else: avg_power = sum(power_list) / len(power_list) @@ -1296,7 +1646,8 @@ def check_power_dir(power_path, ranging_path, testing_path, scenario_fixed, conf power_metric = avg_power else: # In SingleStream and MultiStream scenarios, the power metric is in J/query. - assert scenario_fixed in ["MultiStream", "SingleStream"], "Unknown scenario: {:}".format(scenario_fixed) + assert scenario_fixed in ["MultiStream", "SingleStream" + ], "Unknown scenario: {:}".format(scenario_fixed) if not config.has_query_count_in_log(): # Before v2.0, LoadGen does NOT print out the actual number of queries in detail logs. There is a # "generated_query_count", but LoadGen exits early when the min_duration has been met, so it is not equal to @@ -1306,24 +1657,35 @@ def check_power_dir(power_path, ranging_path, testing_path, scenario_fixed, conf # to 600-sec total runtime and can be offsetted by removing the "+1" when reconstructing the sample_count). # As for MultiStream, it always runs for 270336 queries, so using "generated_query_count" as above is fine. if scenario_fixed in ["MultiStream"]: - num_queries = mlperf_log["generated_query_count"] * mlperf_log["generated_samples_per_query"] + num_queries = mlperf_log["generated_query_count"] * mlperf_log[ + "generated_samples_per_query"] elif scenario_fixed in ["SingleStream"]: - num_queries = mlperf_log["result_qps_with_loadgen_overhead"] * power_duration + num_queries = mlperf_log[ + "result_qps_with_loadgen_overhead"] * power_duration else: # Starting from v2.0, LoadGen logs the actual number of issued queries. num_queries = int(mlperf_log["result_query_count"]) power_metric = avg_power * power_duration / num_queries - if (scenario_fixed in ["MultiStream"] and not config.uses_legacy_multistream()) and scenario in ["SingleStream"]: + if (scenario_fixed in ["MultiStream"] and + not config.uses_legacy_multistream()) and scenario in [ + "SingleStream" + ]: samples_per_query = 8 power_metric = avg_power * power_duration * samples_per_query / num_queries if more_power_check: python_version_major = int(sys.version.split(" ")[0].split(".")[0]) python_version_minor = int(sys.version.split(" ")[0].split(".")[1]) - assert python_version_major == 3 and python_version_minor >= 7, "The --more-power-check only supports Python 3.7+" + assert python_version_major == 3 and python_version_minor >= 7, ("The " + "--more-power-check" + " only " + "supports " + "Python " + "3.7+") assert os.path.exists(os.path.join(submission_checker_dir, "power-dev", "compliance", "check.py")), \ - "Please run 'git submodule update --init tools/submission/power-dev' to get Power WG's check.py." + ("Please run 'git submodule update --init tools/submission/power-dev' " + "to get Power WG's check.py.") sys.path.insert(0, os.path.join(submission_checker_dir, "power-dev")) from compliance.check import check as check_power_more perf_path = os.path.dirname(power_path) @@ -1337,7 +1699,6 @@ def check_power_dir(power_path, ranging_path, testing_path, scenario_fixed, conf return is_valid, power_metric - def files_diff(list1, list2, optional=None): """returns a list of files that are missing or added.""" if not optional: @@ -1345,26 +1706,36 @@ def files_diff(list1, list2, optional=None): optional = optional + ["mlperf_log_trace.json", "results.json", ".gitkeep"] return set(list1).symmetric_difference(set(list2)) - set(optional) + def is_system_over_network(division, system_json, path): """ - Verify whether the submitted system is over network and whether it is valid for the division + Verify whether the submitted system is over network and whether it is valid + for the division for 'network' division, it is mandatory that the system is over-network for 'closed' division, the system must not be over-network for 'open' division, the system may be either local or over-network """ is_network_mode_sys_spec_str = system_json.get(SYSTEM_DESC_IS_NETWORK_MODE) - is_network_system = is_network_mode_sys_spec_str.lower()=="true" if is_network_mode_sys_spec_str is not None else False + is_network_system = is_network_mode_sys_spec_str.lower( + ) == "true" if is_network_mode_sys_spec_str is not None else False # verify that the system corresponds the division is_valid = True expected_state_by_division = {"network": True, "closed": False} if division in expected_state_by_division: is_valid = expected_state_by_division[division] is is_network_system if not is_valid: - log.error(f"{path} incorrect network mode (={is_network_system}) for division '{division}'") + log.error( + f"{path} incorrect network mode (={is_network_system}) for division '{division}'" + ) return is_network_system, is_valid -def check_results_dir(config, filter_submitter, skip_compliance, csv, debug=False): + +def check_results_dir(config, + filter_submitter, + skip_compliance, + csv, + debug=False): """ Walk the results directory and do the checking. @@ -1386,7 +1757,8 @@ def check_results_dir(config, filter_submitter, skip_compliance, csv, debug=Fal check the performance directory check the accuracy directory if all was good, add the result to the results directory - if there are errors write a None as result so we can report later what failed + if there are errors write a None as result so we can report later what + failed """ head = [ "Organization", "Availability", "Division", "SystemType", "SystemName", @@ -1495,9 +1867,11 @@ def log_result(submitter, # # check if system_id is good. # - system_id_json = os.path.join(division, submitter, "systems", system_desc + ".json") + system_id_json = os.path.join(division, submitter, "systems", + system_desc + ".json") if not os.path.exists(system_id_json): - log.error("no system_desc for %s/%s/%s", division, submitter, system_desc) + log.error("no system_desc for %s/%s/%s", division, submitter, + system_desc) results[os.path.join(results_path, system_desc)] = None continue @@ -1515,11 +1889,13 @@ def log_result(submitter, if config.version not in ["v0.7"]: valid_system_types += ["datacenter,edge", "edge,datacenter"] if system_type not in valid_system_types: - log.error("%s has invalid system type (%s)", system_id_json, system_type) + log.error("%s has invalid system type (%s)", system_id_json, + system_type) results[name] = None continue config.set_type(system_type) - if not check_system_desc_id(name, system_json, submitter, division, config.version): + if not check_system_desc_id(name, system_json, submitter, division, + config.version): results[name] = None continue @@ -1536,8 +1912,8 @@ def log_result(submitter, if is_closed_or_network and mlperf_model not in config.models: # for closed/network divisions we want the model name to match. # for open division the model_name might be different than the task - log.error("%s has an invalid model %s for closed/network division", name, - model_name) + log.error("%s has an invalid model %s for closed/network division", + name, model_name) results[name] = None continue @@ -1552,7 +1928,9 @@ def log_result(submitter, continue errors = 0 - all_scenarios = set(list(required_scenarios) + list(config.get_optional(mlperf_model))) + all_scenarios = set( + list(required_scenarios) + + list(config.get_optional(mlperf_model))) for scenario in list_dir(results_path, system_desc, model_name): # some submissions in v0.5 use lower case scenarios - map them for now scenario_fixed = SCENARIO_MAPPING.get(scenario, scenario) @@ -1562,7 +1940,9 @@ def log_result(submitter, name = os.path.join(results_path, system_desc, model_name, scenario) results[name] = None if is_closed_or_network and scenario_fixed not in all_scenarios: - log.warning("%s ignoring scenario %s (neither required nor optional)", name, scenario) + log.warning( + "%s ignoring scenario %s (neither required nor optional)", + name, scenario) continue # check if measurement_dir is good. @@ -1574,7 +1954,8 @@ def log_result(submitter, errors += 1 else: if not check_measurement_dir(measurement_dir, name, system_desc, - os.path.join(division, submitter), model_name, scenario): + os.path.join(division, submitter), + model_name, scenario): log.error("%s measurement_dir has issues", measurement_dir) # results[measurement_dir] = None errors += 1 @@ -1592,10 +1973,12 @@ def log_result(submitter, diff = files_diff(list_files(acc_path), REQUIRED_ACC_FILES) if diff: log.error("%s has file list mismatch (%s)", acc_path, diff) - accuracy_is_valid, acc = check_accuracy_dir(config, mlperf_model, acc_path, debug or is_closed_or_network) + accuracy_is_valid, acc = check_accuracy_dir( + config, mlperf_model, acc_path, debug or is_closed_or_network) if not accuracy_is_valid and not is_closed_or_network: if debug: - log.warning("%s, accuracy not valid but taken for open", acc_path) + log.warning("%s, accuracy not valid but taken for open", + acc_path) accuracy_is_valid = True if not accuracy_is_valid: # a little below we'll not copy this into the results csv @@ -1623,34 +2006,46 @@ def log_result(submitter, required_perf_files = REQUIRED_PERF_FILES + REQUIRED_PERF_POWER_FILES else: required_perf_files = REQUIRED_PERF_FILES - diff = files_diff(list_files(perf_path), required_perf_files, OPTIONAL_PERF_FILES) + diff = files_diff( + list_files(perf_path), required_perf_files, + OPTIONAL_PERF_FILES) if diff: log.error("%s has file list mismatch (%s)", perf_path, diff) try: - is_valid, r, is_inferred = check_performance_dir(config, mlperf_model, perf_path, scenario_fixed, division, system_json) + is_valid, r, is_inferred = check_performance_dir( + config, mlperf_model, perf_path, scenario_fixed, division, + system_json) if is_inferred: inferred = 1 log.info("%s has inferred results, qps=%s", perf_path, r) except Exception as e: - log.error("%s caused exception in check_performance_dir: %s", perf_path, e) + log.error("%s caused exception in check_performance_dir: %s", + perf_path, e) is_valid, r = False, None power_metric = 0 if has_power: try: ranging_path = os.path.join(name, "performance", "ranging") - power_is_valid, power_metric = check_power_dir(power_path, ranging_path, perf_path, scenario_fixed, + power_is_valid, power_metric = check_power_dir( + power_path, ranging_path, perf_path, scenario_fixed, config) if not power_is_valid: is_valid = False power_metric = 0 except Exception as e: - log.error("%s caused exception in check_power_dir: %s", perf_path, e) + log.error("%s caused exception in check_power_dir: %s", + perf_path, e) is_valid, r, power_metric = False, None, 0 if is_valid: - results[name] = r if r is None or power_metric == 0 else "{:f} with power_metric = {:f}".format(r, power_metric) + results[ + name] = r if r is None or power_metric == 0 else ("{:f} " + "with " + "power_metric" + " = {:f}").format( + r, power_metric) required_scenarios.discard(scenario_fixed) else: log.error("%s has issues", perf_path) @@ -1665,7 +2060,9 @@ def log_result(submitter, log.error("no compliance dir for %s", name) results[name] = None else: - if not check_compliance_dir(compliance_dir, mlperf_model, scenario_fixed, config, division, system_json): + if not check_compliance_dir(compliance_dir, mlperf_model, + scenario_fixed, config, division, + system_json): log.error("compliance dir %s has issues", compliance_dir) results[name] = None else: @@ -1673,8 +2070,25 @@ def log_result(submitter, if results.get(name): if accuracy_is_valid: - log_result(submitter, available, division, system_type, system_json.get("system_name"), system_desc, model_name, mlperf_model, - scenario_fixed, r, acc, system_json, name, compliance, errors, config, inferred=inferred, power_metric=power_metric) + log_result( + submitter, + available, + division, + system_type, + system_json.get("system_name"), + system_desc, + model_name, + mlperf_model, + scenario_fixed, + r, + acc, + system_json, + name, + compliance, + errors, + config, + inferred=inferred, + power_metric=power_metric) else: results[name] = None log.error("%s is OK but accuracy has issues", name) @@ -1683,9 +2097,11 @@ def log_result(submitter, name = os.path.join(results_path, system_desc, model_name) if is_closed_or_network: results[name] = None - log.error("%s does not have all required scenarios, missing %s", name, required_scenarios) + log.error("%s does not have all required scenarios, missing %s", + name, required_scenarios) elif debug: - log.warning("%s ignoring missing scenarios in open division (%s)", name, required_scenarios) + log.warning("%s ignoring missing scenarios in open division (%s)", + name, required_scenarios) return results @@ -1698,7 +2114,8 @@ def check_system_desc_id(fname, systems_json, submitter, division, version): else: required_fields = SYSTEM_DESC_REQUIRED_FIELDS + SYSTEM_DESC_REQUIED_FIELDS_SINCE_V1 - is_network_system, is_network_mode_valid = is_system_over_network(division, systems_json, fname) + is_network_system, is_network_mode_valid = is_system_over_network( + division, systems_json, fname) is_valid &= is_network_mode_valid if is_network_system: required_fields += SYSTEM_DESC_REQUIRED_FIELDS_NETWORK_MODE @@ -1719,15 +2136,18 @@ def check_system_desc_id(fname, systems_json, submitter, division, version): log.warning("%s, field %s is unknown", fname, k) if systems_json.get("submitter").lower() != submitter.lower(): - log.error("%s has submitter %s, directory has %s", fname, systems_json.get("submitter"), submitter) + log.error("%s has submitter %s, directory has %s", fname, + systems_json.get("submitter"), submitter) is_valid = False if systems_json.get("division") != division: - log.error("%s has division %s, division has %s", fname, systems_json.get("division"), division) + log.error("%s has division %s, division has %s", fname, + systems_json.get("division"), division) is_valid = False return is_valid -def check_measurement_dir(measurement_dir, fname, system_desc, root, model, scenario): +def check_measurement_dir(measurement_dir, fname, system_desc, root, model, + scenario): files = list_files(measurement_dir) system_file = None is_valid = True @@ -1772,6 +2192,7 @@ def check_measurement_dir(measurement_dir, fname, system_desc, root, model, scen return is_valid + def check_compliance_perf_dir(test_dir): is_valid = False @@ -1804,6 +2225,7 @@ def check_compliance_perf_dir(test_dir): return is_valid + def check_compliance_acc_dir(test_dir, model, config): is_valid = False acc_passed = False @@ -1829,7 +2251,9 @@ def check_compliance_acc_dir(test_dir, model, config): log.error("%s has no accuracy directory", test_dir) is_valid = False else: - diff = files_diff(list_files(test_acc_path), REQUIRED_TEST01_ACC_FILES_1 if acc_passed else REQUIRED_TEST01_ACC_FILES) + diff = files_diff( + list_files(test_acc_path), REQUIRED_TEST01_ACC_FILES_1 + if acc_passed else REQUIRED_TEST01_ACC_FILES) if diff: log.error("%s has file list mismatch (%s)", test_acc_path, diff) is_valid = False @@ -1842,12 +2266,18 @@ def check_compliance_acc_dir(test_dir, model, config): else: required_delta_perc = 0.1 acc_baseline = acc_compliance = 0 - with open(os.path.join(test_acc_path, "baseline_accuracy.txt"), "r", encoding="utf-8") as f: + with open( + os.path.join(test_acc_path, "baseline_accuracy.txt"), + "r", + encoding="utf-8") as f: for line in f: m = re.match(pattern, line) if m: acc_baseline = float(m.group(1)) - with open(os.path.join(test_acc_path, "compliance_accuracy.txt"), "r", encoding="utf-8") as f: + with open( + os.path.join(test_acc_path, "compliance_accuracy.txt"), + "r", + encoding="utf-8") as f: for line in f: m = re.match(pattern, line) if m: @@ -1863,13 +2293,18 @@ def check_compliance_acc_dir(test_dir, model, config): return is_valid -def check_compliance_dir(compliance_dir, model, scenario, config, division, system_json): + +def check_compliance_dir(compliance_dir, model, scenario, config, division, + system_json): compliance_perf_pass = True compliance_perf_dir_pass = True compliance_acc_pass = True test_list = ["TEST01", "TEST04", "TEST05"] - if model in ["rnnt", "bert-99", "bert-99.9", "dlrm-99", "dlrm-99.9", "3d-unet-99", "3d-unet-99.9", "retinanet"]: + if model in [ + "rnnt", "bert-99", "bert-99.9", "dlrm-99", "dlrm-99.9", "3d-unet-99", + "3d-unet-99.9", "retinanet" + ]: test_list.remove("TEST04") #Check performance of all Tests @@ -1880,32 +2315,40 @@ def check_compliance_dir(compliance_dir, model, scenario, config, division, syst compliance_perf_dir_pass = False else: try: - compliance_perf_dir = os.path.join(compliance_dir, test, "performance","run_1") - compliance_perf_valid, r, is_inferred = check_performance_dir(config, model, compliance_perf_dir, scenario, division, system_json) + compliance_perf_dir = os.path.join(compliance_dir, test, "performance", + "run_1") + compliance_perf_valid, r, is_inferred = check_performance_dir( + config, model, compliance_perf_dir, scenario, division, system_json) if is_inferred: log.info("%s has inferred results, qps=%s", compliance_perf_dir, r) except Exception as e: - log.error("%s caused exception in check_performance_dir: %s", compliance_perf_dir, e) + log.error("%s caused exception in check_performance_dir: %s", + compliance_perf_dir, e) is_valid, r = False, None - compliance_perf_pass = compliance_perf_pass and check_compliance_perf_dir(test_dir) and compliance_perf_valid - - + compliance_perf_pass = compliance_perf_pass and check_compliance_perf_dir( + test_dir) and compliance_perf_valid #Check accuracy for TEST01 - compliance_acc_pass = check_compliance_acc_dir(os.path.join(compliance_dir, "TEST01"), model, config) + compliance_acc_pass = check_compliance_acc_dir( + os.path.join(compliance_dir, "TEST01"), model, config) return compliance_perf_pass and compliance_acc_pass and compliance_perf_dir_pass + def main(): args = get_args() - config = Config(args.version, args.extra_model_benchmark_map, ignore_uncommited=args.submission_exceptions, + config = Config( + args.version, + args.extra_model_benchmark_map, + ignore_uncommited=args.submission_exceptions, more_power_check=args.more_power_check) with open(args.csv, "w") as csv: os.chdir(args.input) # check results directory - results = check_results_dir(config, args.submitter, args.skip_compliance, csv, args.debug) + results = check_results_dir(config, args.submitter, args.skip_compliance, + csv, args.debug) # log results log.info("---") @@ -1921,7 +2364,8 @@ def main(): # print summary log.info("---") - log.info("Results=%d, NoResults=%d", with_results, len(results) - with_results) + log.info("Results=%d, NoResults=%d", with_results, + len(results) - with_results) if len(results) != with_results: log.error("SUMMARY: submission has errors") return 1