Skip to content

Commit

Permalink
Merge pull request #1224 from cta-observatory/update_dl1_to_dl2_script
Browse files Browse the repository at this point in the history
Update dl1 to dl2 script (Less required memory and less required step)
  • Loading branch information
vuillaut authored Feb 27, 2024
2 parents a453522 + 24ecab8 commit fdb1a65
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 19 deletions.
17 changes: 10 additions & 7 deletions lstchain/reco/dl1_to_dl2.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,15 +649,16 @@ def apply_models(dl1,
elif config['disp_method'] == 'disp_norm_sign':
if isinstance(reg_disp_norm, (str, bytes, Path)):
reg_disp_norm = joblib.load(reg_disp_norm)
disp_norm = reg_disp_norm.predict(dl2[disp_regression_features])
del reg_disp_norm

if isinstance(cls_disp_sign, (str, bytes, Path)):
cls_disp_sign = joblib.load(cls_disp_sign)
disp_norm = reg_disp_norm.predict(dl2[disp_regression_features])
disp_sign_proba = cls_disp_sign.predict_proba(dl2[disp_classification_features])
col = list(cls_disp_sign.classes_).index(1)
disp_sign = np.where(disp_sign_proba[:, col] > 0.5, 1, -1)

del reg_disp_norm
del cls_disp_sign

dl2['reco_disp_norm'] = disp_norm
dl2['reco_disp_sign'] = disp_sign
dl2['reco_disp_sign_proba'] = disp_sign_proba[:, 0]
Expand Down Expand Up @@ -719,18 +720,20 @@ def apply_models(dl1,

if isinstance(classifier, (str, bytes, Path)):
classifier = joblib.load(classifier)
dl2['reco_type'] = classifier.predict(dl2[classification_features]).astype(int)
probs = classifier.predict_proba(dl2[classification_features])
del classifier

# This check is valid as long as we train on only two classes (gammas and protons)
if probs.shape[1] > 2:
raise ValueError("The classifier is predicting more than two classes, "
"the predicted probabilty to assign as gammaness is unclear."
"Please check training data")

# gammaness is the prediction probability for the first class (0)
dl2['gammaness'] = probs[:, 0]
# gammaness is the prediction probability for the class 0 (proton: class 101)
mc_type_gamma, mc_type_proton = 0, 101
col = list(classifier.classes_).index(mc_type_gamma)
dl2['gammaness'] = probs[:, col]
dl2['reco_type'] = np.where(probs[:, col] > 0.5, mc_type_gamma, mc_type_proton)
del classifier

return dl2

Expand Down
33 changes: 21 additions & 12 deletions lstchain/scripts/lstchain_dl1_to_dl2.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,15 +132,15 @@ def apply_to_file(filename, models_dict, output_dir, config):
dl2 = dl1_to_dl2.apply_models(data,
models_dict['cls_gh'],
models_dict['reg_energy'],
reg_disp_vector=models_dict['disp_vector'],
reg_disp_vector=models_dict['reg_disp_vector'],
effective_focal_length=effective_focal_length,
custom_config=config)
elif config['disp_method'] == 'disp_norm_sign':
dl2 = dl1_to_dl2.apply_models(data,
models_dict['cls_gh'],
models_dict['reg_energy'],
reg_disp_norm=models_dict['disp_norm'],
cls_disp_sign=models_dict['disp_sign'],
reg_disp_norm=models_dict['reg_disp_norm'],
cls_disp_sign=models_dict['cls_disp_sign'],
effective_focal_length=effective_focal_length,
custom_config=config)

Expand Down Expand Up @@ -173,15 +173,15 @@ def apply_to_file(filename, models_dict, output_dir, config):
dl2 = dl1_to_dl2.apply_models(data_with_srcdep_param,
models_dict['cls_gh'],
models_dict['reg_energy'],
reg_disp_vector=models_dict['disp_vector'],
reg_disp_vector=models_dict['reg_disp_vector'],
effective_focal_length=effective_focal_length,
custom_config=config)
elif config['disp_method'] == 'disp_norm_sign':
dl2 = dl1_to_dl2.apply_models(data_with_srcdep_param,
models_dict['cls_gh'],
models_dict['reg_energy'],
reg_disp_norm=models_dict['disp_norm'],
cls_disp_sign=models_dict['disp_sign'],
reg_disp_norm=models_dict['reg_disp_norm'],
cls_disp_sign=models_dict['cls_disp_sign'],
effective_focal_length=effective_focal_length,
custom_config=config)

Expand Down Expand Up @@ -266,14 +266,23 @@ def main():

config = replace_config(standard_config, custom_config)

# load models once and for all
models_dict = {'reg_energy': joblib.load(Path(args.path_models, 'reg_energy.sav'))}
models_dict['cls_gh'] = joblib.load(Path(args.path_models, 'cls_gh.sav'))
models_keys = ['reg_energy', 'cls_gh']

if config['disp_method'] == 'disp_vector':
models_dict['disp_vector'] = joblib.load(Path(args.path_models, 'reg_disp_vector.sav'))
models_keys.append('reg_disp_vector')
elif config['disp_method'] == 'disp_norm_sign':
models_dict['disp_norm'] = joblib.load(Path(args.path_models, 'reg_disp_norm.sav'))
models_dict['disp_sign'] = joblib.load(Path(args.path_models, 'cls_disp_sign.sav'))
models_keys.extend(['reg_disp_norm', 'cls_disp_sign'])

models_dict = {}
for models_key in models_keys:
models_path = Path(args.path_models, f'{models_key}.sav')

# For a single input file, each model is loaded just before it is used
if len(args.input_files)==1:
models_dict[models_key] = models_path
# For multiple input files, all the models are loaded only once here
else:
models_dict[models_key] = joblib.load(models_path)

for filename in args.input_files:
apply_to_file(filename, models_dict, args.output_dir, config)
Expand Down

0 comments on commit fdb1a65

Please sign in to comment.