Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New to old data converter #64

Open
wants to merge 28 commits into
base: main
Choose a base branch
from
Open
Changes from 3 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
759c92d
first draft of new to old data converter
comane Apr 16, 2024
db610a8
added convertion from new data to old DATA_... format
comane Apr 27, 2024
302437b
kinematics written as numbers
comane Apr 29, 2024
ccd2d9c
fix typo
FrancescoMerlotti May 8, 2024
08c8894
add .pdf PBSP logos
FrancescoMerlotti May 13, 2024
18308d5
added load_commondata to core, level0_commondata_wc and make_level1_d…
FrancescoMerlotti May 14, 2024
db2124a
added parse_fakepdf to config.py
FrancescoMerlotti May 16, 2024
f5ba638
add chi2 provider functions
FrancescoMerlotti May 20, 2024
f3a6dd9
added usage write_chi2
FrancescoMerlotti May 20, 2024
3513ff1
fixed repo
FrancescoMerlotti May 20, 2024
dd605f9
moved function in simunet_analysis & changed their name
FrancescoMerlotti May 26, 2024
c9cb4cb
changed cuts to commondata_table_indices
FrancescoMerlotti May 30, 2024
2caf4cb
changed cuts to commondata_table_indices
FrancescoMerlotti May 30, 2024
664a69b
added rules classes, static KIN_LABEL dict, and replaced cpp Export m…
FrancescoMerlotti Jun 12, 2024
2deae97
added commondatawriter.py & export method for CommonData python objects
FrancescoMerlotti Jun 12, 2024
b34de76
added xq2 map for hadronic MQQ processes ref. [2303.06159]
FrancescoMerlotti Jun 12, 2024
98f9c77
Revert "added xq2 map for hadronic MQQ processes ref. [2303.06159]"
FrancescoMerlotti Jun 18, 2024
cd67771
Revert "added commondatawriter.py & export method for CommonData pyth…
FrancescoMerlotti Jun 18, 2024
9c32bea
Revert "added rules classes, static KIN_LABEL dict, and replaced cpp …
FrancescoMerlotti Jun 18, 2024
387a866
debug convert_new_data_to_old
FrancescoMerlotti Jun 25, 2024
b65ceb6
added test_utils to .gitignore
FrancescoMerlotti Jun 25, 2024
069d827
tested writer
FrancescoMerlotti Jul 4, 2024
ce8d085
changes in utils are useful for data converter, changes in dataplots …
FrancescoMerlotti Jul 11, 2024
b55a5e2
scripts to convert and test the conversion
FrancescoMerlotti Oct 24, 2024
d1b60f4
make plotting files script (works on dis datasets)
FrancescoMerlotti Oct 25, 2024
b4eaf59
corrected bug
FrancescoMerlotti Oct 25, 2024
3d610ab
added cuts to fixed sm predictions
FrancescoMerlotti Nov 4, 2024
ed8fb57
update of commodata converter picking the right files
FrancescoMerlotti Nov 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 139 additions & 0 deletions validphys2/src/validphys/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import numpy as np

from validobj import parse_input, ValidationError
from reportengine.compat import yaml


def parse_yaml_inp(inp, spec, path):
Expand Down Expand Up @@ -219,3 +220,141 @@ def scale_from_grid(grid):
Returns ``'linear'`` if the scale of the grid object is linear,
and otherwise ``' log'``."""
return 'linear' if grid.scale == 'linear' else 'log'


def uncertainty_yaml_to_systype(path_uncertainty_yaml, name_dataset, path_systype=None, write_to_file=True):
"""
Convert the new style uncertainty yaml file to the old style systype.
Writes

Parameters
----------
path_uncertainty_yaml : str, or Path
Path to the new style uncertainty yaml file to be converted

path_systype : str, or Path, optional
path to the output systype file

Returns
-------
n_sys : int
Number of systematics in the systype file
"""
# open the uncertainty yaml file
with open(path_uncertainty_yaml) as f:
uncertainty = yaml.safe_load(f)

# get uncertainty definitions
uncertainty_definitions = uncertainty['definitions']

# check whether path_systype is provided else save it in the same directory in which the uncertainty yaml file is
if path_systype is None:
if isinstance(path_uncertainty_yaml, str):
path_uncertainty_yaml = pathlib.Path(path_uncertainty_yaml)
path_systype = path_uncertainty_yaml.parent / f"SYSTYPE_{name_dataset}_DEFAULT.dat"
else:
path_systype = pathlib.Path(path_systype) / f"SYSTYPE_{name_dataset}_DEFAULT.dat"

# get number of sys (note: stat is not included in the sys)
if 'stat' in uncertainty_definitions.keys():
n_sys = len(uncertainty_definitions.keys()) - 1
else:
n_sys = len(uncertainty_definitions.keys())

if write_to_file:
# open the systype file for writing
with open(path_systype, 'w') as stream:

# header: number of sys
stream.write(f"{n_sys}\n")

# write the systype treatments

# remove stat from the uncertainty definitions
uncertainty_definitions.pop('stat', None)

for i, (_, sys_dict) in enumerate(uncertainty_definitions.items()):
# four spaces seems to be the standard format (has to be checked for other datasets than CMS_1JET_8TEV)
stream.write(f"{i+1} {sys_dict['treatment']} {sys_dict['type']}\n")

return n_sys


def convert_new_data_to_old(path_data_yaml, path_uncertainty_yaml, path_kinematics, path_metadata, name_dataset, path_DATA=None):
"""
Convert the new data format into the old data format
"""

# open the metadata yaml file
with open(path_metadata) as f:
metadata = yaml.safe_load(f)

# open the data yaml file
with open(path_data_yaml) as f:
data = yaml.safe_load(f)

# open the uncertainty yaml file
with open(path_uncertainty_yaml) as f:
uncertainty = yaml.safe_load(f)

# open the kinematics yaml file
with open(path_kinematics) as f:
kinematics = yaml.safe_load(f)

# get uncertainty definitions and values
uncertainty_definitions = uncertainty['definitions']
uncertainty_values = uncertainty['bins']
n_sys = uncertainty_yaml_to_systype(path_uncertainty_yaml, name_dataset, write_to_file=False)
stats = np.array([entr['stat'] for entr in uncertainty_values])

# get data values
data_values = data['data_central']

# check whether path_DATA is provided else save it in the same directory in which the uncertainty yaml file is
if path_DATA is None:
if isinstance(path_uncertainty_yaml, str):
path_uncertainty_yaml = pathlib.Path(path_uncertainty_yaml)
path_DATA = path_uncertainty_yaml.parent / f"DATA_{name_dataset}.dat"
else:
path_DATA = pathlib.Path(path_DATA) / f"DATA_{name_dataset}.dat"

kin_names = list(kinematics['bins'][0].keys())
kin_values = kinematics['bins']
# open the DATA file for writing
with open(path_DATA, 'w') as stream:

# write the header: Dataset name, number of sys errors, and number of data points, whitespace separated
stream.write(f"{name_dataset} {n_sys} {len(data_values)}\n")

for i, (data_value, stat) in enumerate(zip(data_values, stats)):
cd_line = f"{i+1}\t {metadata['implemented_observables'][0]['process_type']}\t {kin_values[i][kin_names[2]]['mid']}\t {kin_values[i][kin_names[1]]['mid']}\t {kin_values[i][kin_names[0]]['mid']}\t {data_value}\t {stat}\t"
for j, sys in enumerate(uncertainty_values):

for k, (sys_name, sys_val) in enumerate(sys.items()):
if sys_name == 'stat':
continue

if uncertainty_definitions[sys_name]['treatment'] == "ADD":
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove if clause, since the systematics in the new format are all stored as additive

add_sys = sys_val
mult_sys = add_sys * 100.0 / data_value if data_value != 0.0 else 0.0

elif uncertainty_definitions[sys_name]['treatment'] == "MULT":
mult_sys = sys_val
add_sys = mult_sys * data_value / 100.0

if k == len(sys)-1:
cd_line += f"{add_sys}\t {mult_sys}\n"
else:
cd_line += f"{add_sys}\t {mult_sys}\t"

stream.write(cd_line)



if __name__ == '__main__':
path_unc_file = "/Users/markcostantini/codes/nnpdfgit/nnpdf/nnpdf_data/nnpdf_data/new_commondata/CMS_1JET_8TEV/uncertainties_legacy_PTY.yaml"
path_data_yaml = "/Users/markcostantini/codes/nnpdfgit/nnpdf/nnpdf_data/nnpdf_data/new_commondata/CMS_1JET_8TEV/data.yaml"
path_kin = "/Users/markcostantini/codes/nnpdfgit/nnpdf/nnpdf_data/nnpdf_data/new_commondata/CMS_1JET_8TEV/kinematics.yaml"
path_metadata = "/Users/markcostantini/codes/nnpdfgit/nnpdf/nnpdf_data/nnpdf_data/new_commondata/CMS_1JET_8TEV/metadata.yaml"
uncertainty_yaml_to_systype(path_unc_file, name_dataset="CMS_1JET_8TEV")
convert_new_data_to_old(path_data_yaml, path_unc_file, path_kin, path_metadata, name_dataset="CMS_1JET_8TEV", path_DATA=None)