Skip to content

Commit

Permalink
Merge branch 'det-tst' of https://github.com/gidden/cycamore into gid…
Browse files Browse the repository at this point in the history
…den-det-tst
  • Loading branch information
scopatz committed Feb 9, 2014
2 parents 1e9431e + 726403f commit cf68e22
Show file tree
Hide file tree
Showing 5 changed files with 265 additions and 29 deletions.
12 changes: 12 additions & 0 deletions tests/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,15 @@ Finally, feel free to clean up after yourself
.. code-block:: bash
$ rm *.h5
Nondeterminisitic Analysis
==========================

An `analysis` python module can assist in analyzing the determinism of Cyclus
output. It does so by running the regression tests some number of times and
analyzing the frequency of nondeterminism of output tables and columns within
those tables. See the module's help:

.. code-block:: python
$ python analysis.py -h
122 changes: 122 additions & 0 deletions tests/analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
from __future__ import print_function
from __future__ import division

import subprocess
from multiprocessing import Pool, Manager, cpu_count
from collections import defaultdict
import argparse as ap
import time

import test_regression as tst

diff_tbl = """table is different"""
diff_col = """Column"""

def collect(args):
"""collects information on a determinisitic regression test run
"""
tbl_freq, col_freq = args

rtn = subprocess.Popen(
["python", "-c",
"import test_regression as t; " +
"t.setup(); t.test_regression(check_deterministic=True)"],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = rtn.communicate()
#print(out, err)

for line in out.split("\n"):
line = line.strip()
if diff_tbl in line.strip():
tbl_name = line.split()[0]
tbl_freq[tbl_name] = \
tbl_freq[tbl_name] + 1 if tbl_name in tbl_freq else 1
if diff_col in line.strip():
col_name = line.split()[1]
col_freq.append((tbl_name, col_name))

def proxy_lst_to_dict(lst):
"""converts the col_freq list into a dictionary for easier processing
"""
col_freq = defaultdict(lambda: defaultdict(int))
for tbl, col in lst:
col_freq[tbl][col] += 1
return col_freq

def determ_analysis(niter=1000, fname="report"):
"""
Calls deterministic regression tests for a number of iterations and reports
findings of nondeterminism to a file.
Parameters
----------
niter : int
The number of times to run regression tests
fname : str
The output filename to report to
"""
m = Manager()

tbl_freq = m.dict()
col_freq = m.list()

# collect
nproc = cpu_count()
count = nproc if nproc == 1 else nproc - 1
pool = Pool(count)

print("Beginning iterations on " + str(nproc) + " processors.")
args = ((tbl_freq, col_freq) for i in range(niter))
jobs = pool.map_async(collect, args)
while not jobs.ready():
print('{0:.1%} of jobs left to start.'.format(
jobs._number_left / niter))
time.sleep(5.0)
pool.close()
pool.join()
print("Finished iterations.")

# convert from proxy
col_freq = proxy_lst_to_dict(col_freq)
tbl_freq = {item[0]: item[1] for item in tbl_freq.items()}

# normalize
for tbl, dic in col_freq.iteritems():
for col, freq in dic.iteritems():
dic[col] = "{0:.2f}".format(float(freq) / tbl_freq[tbl])
for k, v in tbl_freq.iteritems():
tbl_freq[k] = "{0:.2f}".format(float(v) / niter)

# report
lines = []
lines.append("Table values are reported as percent nondeterministic" +
" of total runs.\n\n")
lines.append("Column values are reported as percent nondeterministic" +
" of all table nondeterminism occurrences.\n\n")
if len(tbl_freq) == 0:
lines.append("No nondeterminism found.")
for tbl, freq in tbl_freq.iteritems():
lines.append(tbl + " " + freq + "\n")
for col, freq in col_freq[tbl].iteritems():
lines.append(" " + col + " " + freq + "\n")
with open(fname, "w") as f:
f.writelines(lines)

def main():
description = "A module for analyzing the determinism of Cyclus output."

parser = ap.ArgumentParser(description=description)

niter = 'the number of regression test runs to perform'
parser.add_argument('-n', '--niterations', type=int, help=niter,
default=100)

report = 'the file to write the report to'
parser.add_argument('--report', help=report, default='report')

args = parser.parse_args()
determ_analysis(args.niterations, args.report)

if __name__ == "__main__":
main()
120 changes: 112 additions & 8 deletions tests/cyclus_tools.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
from tools import check_cmd

from numpy import array_equal
import numpy as np
import tables

import visitors
Expand All @@ -14,13 +14,117 @@ def run_cyclus(cyclus, cwd, in_path, out_path):
cmd = [cyclus, "-o", out_path, "--input-file", in_path]
check_cmd(cmd, cwd, holdsrtn)

def db_comparator(path1, path2):
"""Compares two Cyclus HDF5 databases
Returns:
True or False. In case of False, it prints out the names
and differences in the compared databases.
"""
def compare_nondeterm(path1, path2):
"""Compares two Cyclus HDF5 databases assuming non-deterministic AgentIDs
and TransactionIDs.
Returns
-------
rtn : bool
True if both databases are the same, taking into account
nondeterministic id assignments.
"""
v1 = visitors.HDF5RegressionVisitor(path1)
v2 = visitors.HDF5RegressionVisitor(path2)
return v1.walk() == v2.walk()

def compare_determ(path1, path2, verbose=False):
"""Compares two Cyclus HDF5 databases assuming deterministic AgentIDs and
TransactionIDs
Returns
-------
rtn : bool
True if both databases are identical other than their SimIDs
"""
dbs_same = True
db_one = tables.open_file(path1, mode = "r")
db_two = tables.open_file(path2, mode = "r")
path_one = []
path_two = []

for node in db_one.walk_nodes(classname = "Table"):
path_one.append(node._v_pathname)

for node in db_two.walk_nodes(classname = "Table"):
path_two.append(node._v_pathname)

# Check if databases contain the same tables
if not np.all(path_one == path_two):
if verbose:
print("The number or names of tables in databases are not the same.")
print(path_one)
print(path_two)
# Close databases
db_one.close()
db_two.close()
dbs_same = False
return dbs_same

paths = path_one

for path in paths:
data_one = db_one.get_node(path)[:]
data_two = db_two.get_node(path)[:]
names = []

for name in data_one.dtype.names:
if name != "SimID":
names.append(name)

data_one = data_one[names]
data_two = data_two[names]

if np.all(data_one == data_two):
continue

dbs_same = False
if verbose:
msg = ""
msg += path.replace("/", "")
msg += " table is different in the databases.\n"
msg += determ_err_msg(names, data_one, data_two)
print(msg)

# Close databases
db_one.close()
db_two.close()
return dbs_same

def determ_err_msg(names, data_one, data_two):
"""Returns a string describing the deterministic difference between two
databases.
"""
msg = ""
# Investigation of the differences
# check if the lengths are different
if len(data_one) != len(data_two):
msg += "Length mismatch: " + str(len(data_one)) + ", " + str(len(data_two))
else:
for name in names:
column_one = data_one[name]
column_two = data_two[name]
# check if data types are the same
if column_one.dtype != column_two.dtype:
msg += "Datatypes in column " + name +" are different."
msg += str(column_one.dtype)
msg += str(column_two.dtype)
elif not np.all(column_one == column_two):
msg += "Column " + name
diff = np.equal(column_one, column_two)
# find indices and elements for numerical values
indices = np.where(diff==False)
# check if whole table is different
if len(indices) == len(column_one):
msg += " is completely different"
else:
# provide mismatch percentage
mismatch = 100*float(len(indices))/len(column_one)
msg += " has a mismatch of"
msg += " {0:.2f}".format(mismatch) + "% \n"
msg += "Indices of different objects are:\n"
msg += str(indices[0]) + "\n"
msg += "The different elements on these indices: \n"
msg += str(column_one[indices]) + "\n"
msg += str(column_two[indices]) + "\n"
return msg
26 changes: 19 additions & 7 deletions tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
import json
import hashlib
import urllib
import uuid
from nose.tools import assert_true
from cyclus_tools import run_cyclus, db_comparator
from cyclus_tools import run_cyclus, compare_determ, compare_nondeterm

sim_files = {}
fetchdir = "fetch"
Expand All @@ -18,7 +19,9 @@ def setup():
refs = json.load(f)
cyclus_ref = refs[-1]["cyclus-ref"]
cycamore_ref = refs[-1]["cycamore-ref"]
refs = [r for r in refs if r["cyclus-ref"] == cyclus_ref and r["cycamore-ref"] == cycamore_ref]
refs = [r for r in refs
if r["cyclus-ref"] == cyclus_ref
and r["cycamore-ref"] == cycamore_ref]
base_url = "http://regtests.fuelcycle.org/"
for r in refs:
fpath = os.path.join(fetchdir, r["fname"])
Expand All @@ -31,22 +34,31 @@ def setup():
raise RuntimeError("They tooks our data!!! All our rackspace are belong to them.")
sim_files[r["input-file"]] = fpath

def test_regression():
def test_regression(check_deterministic=False):
"""Test for all inputs in sim_files. Checks if reference and current cyclus
output is the same.
Parameters
----------
check_deterministic : bool
If True, also test determinisitc equality of simulations
WARNING: the tests require cyclus executable to be included in PATH
"""
for root, dirs, files in os.walk("../input"):
for f in files:
if f not in sim_files:
continue

# print("testing input: " + sim_input + " and bench_db: " + bench_db)
tmp_file = "tmp.h5"

tmp_file = str(uuid.uuid4()) + ".h5"
run_cyclus("cyclus", os.getcwd(), os.path.join(root, f), tmp_file)

if os.path.isfile(tmp_file):
assert_true(db_comparator(sim_files[f], tmp_file))
nondeterm = compare_nondeterm(sim_files[f], tmp_file)
if check_deterministic:
determ = compare_determ(sim_files[f], tmp_file, verbose=True)
os.remove(tmp_file)

assert_true(nondeterm)
if check_deterministic:
assert_true(determ)
14 changes: 0 additions & 14 deletions tests/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,17 +95,3 @@ def skip_then_continue(msg=""):
to this function.
"""
raise SkipTest(msg)

#
# Here there be Hackons!
#

# hack to make sure that we are actually in the tests dir when we start running
# tests. This works because this file is imported by many of the other test
# files.
_fdir = os.path.dirname(__file__)
if os.getcwd() != _fdir:
os.chdir(_fdir)
del _fdir


0 comments on commit cf68e22

Please sign in to comment.