-
Notifications
You must be signed in to change notification settings - Fork 1
/
dataset.py
73 lines (59 loc) · 1.91 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# -*- coding: utf-8 -*-
import numpy as np
from numpy.lib import recfunctions as np_rfn
import os.path
from config import CFG
def load_and_prepare_data(pathfilenames):
"""Loads the data file(s), renames fields and applies diffuse dataset cuts.
Parameters
----------
pathfilenames : str | sequence of str
The file name(s), including path(s), of the monte-carlo data file(s).
Returns
-------
data : numpy record ndarray
Loaded and prepared monte-carlo data.
"""
if isinstance(pathfilenames, basestring):
pathfilenames = [pathfilenames]
pathfilename = pathfilenames[0]
assert_file_exists(pathfilename)
data = np.load(pathfilename)
for i in range(1, len(pathfilenames)):
pathfilename = pathfilenames[i]
assert_file_exists(pathfilename)
data = np.append(data, np.load(pathfilename))
# Rename fields based on MC_keys dictionary.
data = np_rfn.rename_fields(data, CFG['MC_keys'])
# Apply diffuse dataset cuts.
data = diffuse_cuts(data)
return data
def diffuse_cuts(mc):
"""Applies diffuse dataset cuts on a given monte-carlo data.
Parameters
----------
mc : str | numpy record ndarray
Monte-carlo data.
Returns
-------
mc_dc : numpy record ndarray
Monte-carlo data after diffuse dataset cuts.
"""
log_true_e = np.log10(mc['true_energy'])
mc_dc = mc[(mc['true_dec'] > np.radians(-5)) &
(np.log10(mc['true_energy']) < 8.0) &
(mc['sigmaok'] == 0)]
return mc_dc
def assert_file_exists(pathfilename):
"""Checks if the given file exists.
Parameters
----------
pathfilenames : str
The file name, including path.
Raises
------
RuntimeError
If the file does not exist.
"""
if(not os.path.isfile(pathfilename)):
raise RuntimeError('The data file "%s" does not exist!'%(pathfilename))