Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add PublicBenchmarkDataset & SecretDataset #747

Closed
wants to merge 34 commits into from
Closed
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
6a06d65
revert changes on main
RasmusOrsoe May 20, 2024
534a529
adjust download logic
RasmusOrsoe Aug 28, 2024
fbafb46
Merge branch 'main' of https://github.com/RasmusOrsoe/graphnet
RasmusOrsoe Sep 3, 2024
cbc4228
Merge branch 'main' of https://github.com/RasmusOrsoe/graphnet
RasmusOrsoe Sep 10, 2024
810f6c7
add merging functionality to graph_definition
RasmusOrsoe Sep 11, 2024
76c8b83
generalize temp ids to xyz
RasmusOrsoe Sep 11, 2024
b9cf465
reference time column in Detector
RasmusOrsoe Sep 11, 2024
7d487f4
add `sensor_time_name` as `Detector` property
RasmusOrsoe Sep 11, 2024
6779ee0
add `sensor_time_column` to all Detectors
RasmusOrsoe Sep 11, 2024
e9e3a68
pass new args through specific graph implementations
RasmusOrsoe Sep 11, 2024
6f993ce
add `charge_name` as Detector property
RasmusOrsoe Sep 11, 2024
fac18e6
add `charge_column` to all Detectors
RasmusOrsoe Sep 11, 2024
72de10e
add member variable for charge in graph def
RasmusOrsoe Sep 11, 2024
8227d74
add unit test for merging functionality
RasmusOrsoe Sep 11, 2024
6c5cf10
remove stray print statement
RasmusOrsoe Sep 11, 2024
2899067
adjust logic for path finding
RasmusOrsoe Sep 12, 2024
c521d27
grab chunk ids instead of inferring them in ParquetDatset _get_all_in…
RasmusOrsoe Sep 12, 2024
a386817
remove non-existing ids froms indices in parquet_dataset
RasmusOrsoe Sep 12, 2024
3593366
adjust pathing for secret dataset
RasmusOrsoe Sep 13, 2024
074ebdb
add z flag for extraction with tar for speedup
RasmusOrsoe Sep 13, 2024
b683831
toggle z-flag off for tar extraction for parquet backend
RasmusOrsoe Sep 13, 2024
50f9a35
add PublicBenchmarkDataset and SecretDataset
RasmusOrsoe Sep 13, 2024
bf3fc6e
add imports to init
RasmusOrsoe Sep 13, 2024
3c3b962
adjust doc string
RasmusOrsoe Sep 13, 2024
40aee1f
black
RasmusOrsoe Sep 13, 2024
2c1d202
overwrite previous changes to DataConverter
RasmusOrsoe Sep 13, 2024
ea39d1c
fix _get_all_indices_ in parquetdataset
RasmusOrsoe Sep 13, 2024
f35a04e
remove changes to DataConverter
RasmusOrsoe Sep 13, 2024
2f778a9
remove unintended comment
RasmusOrsoe Sep 13, 2024
17d3d44
cast list to str
RasmusOrsoe Sep 14, 2024
4012e77
Only infer train/val selection in DataModule if test selection is not…
RasmusOrsoe Sep 14, 2024
0db2229
grammar
RasmusOrsoe Sep 14, 2024
31b99c5
Merge pull request #30 from RasmusOrsoe/pulse_merging_graph_definition
RasmusOrsoe Sep 14, 2024
03c5935
Merge branch 'paper-test-branch' into new_dataset
RasmusOrsoe Sep 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/graphnet/data/curated_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,17 @@ def prepare_data(self) -> None:
return
else:
# Download, unzip and delete zipped file

os.makedirs(self.dataset_dir, exist_ok=True)

os.makedirs(self.dataset_dir)
_, file_name = os.path.split(file_hash)
extension = ".tar.gz" if ".tar.gz" not in file_name else ""
file_path = os.path.join(
self.dataset_dir,
file_name + extension,
)

os.system(f"wget -O {file_path} {self._mirror}/{file_hash}")
print("Unzipping file, this might take a while..")
if self._backend == "parquet":
Expand Down
12 changes: 11 additions & 1 deletion src/graphnet/models/detector/detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def forward( # type: ignore
@property
def geometry_table(self) -> pd.DataFrame:
"""Public get method for retrieving a `Detector`s geometry table."""
if ~hasattr(self, "_geometry_table"):
if not hasattr(self, "_geometry_table"):
try:
assert hasattr(self, "geometry_table_path")
except AssertionError as e:
Expand All @@ -60,6 +60,16 @@ def sensor_index_name(self) -> str:
"""Public get method for retrieving the sensor id column name."""
return self.sensor_id_column

@property
def sensor_time_name(self) -> str:
"""Public get method for retrieving the sensor time column name."""
return self.sensor_time_column

@property
def charge_name(self) -> str:
"""Public get method for retrieving the charge column name."""
return self.charge_column

@final
def _standardize(
self, input_features: torch.tensor, input_feature_names: List[str]
Expand Down
6 changes: 6 additions & 0 deletions src/graphnet/models/detector/icecube.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ class IceCube86(Detector):
xyz = ["dom_x", "dom_y", "dom_z"]
string_id_column = "string"
sensor_id_column = "sensor_id"
sensor_time_column = "dom_time"
charge_column = "charge"

def feature_map(self) -> Dict[str, Callable]:
"""Map standardization functions to each dimension of input data."""
Expand Down Expand Up @@ -57,6 +59,8 @@ class IceCubeKaggle(Detector):
xyz = ["x", "y", "z"]
string_id_column = "string"
sensor_id_column = "sensor_id"
sensor_time_column = "time"
charge_column = "charge"

def feature_map(self) -> Dict[str, Callable]:
"""Map standardization functions to each dimension of input data."""
Expand Down Expand Up @@ -122,6 +126,8 @@ class IceCubeUpgrade(Detector):
xyz = ["dom_x", "dom_y", "dom_z"]
string_id_column = "string"
sensor_id_column = "sensor_id"
sensor_time_column = "dom_time"
charge_column = "charge"

def feature_map(self) -> Dict[str, Callable]:
"""Map standardization functions to each dimension of input data."""
Expand Down
2 changes: 2 additions & 0 deletions src/graphnet/models/detector/liquido.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ class LiquidO_v1(Detector):
xyz = ["sipm_x", "sipm_y", "sipm_z"]
string_id_column = "fiber_id"
sensor_id_column = "sipm_id"
sensor_time_column = "t"
charge_column = None

def feature_map(self) -> Dict[str, Callable]:
"""Map standardization functions to each dimension."""
Expand Down
24 changes: 24 additions & 0 deletions src/graphnet/models/detector/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ class ORCA150SuperDense(Detector):
xyz = ["sensor_pos_x", "sensor_pos_y", "sensor_pos_z"]
string_id_column = "sensor_string_id"
sensor_id_column = "sensor_id"
sensor_time_column = "t"
charge_column = "charge"

def feature_map(self) -> Dict[str, Callable]:
"""Map standardization functions to each dimension."""
Expand Down Expand Up @@ -47,6 +49,8 @@ class TRIDENT1211(Detector):
xyz = ["sensor_pos_x", "sensor_pos_y", "sensor_pos_z"]
string_id_column = "sensor_string_id"
sensor_id_column = "sensor_id"
sensor_time_column = "t"
charge_column = "charge"

def feature_map(self) -> Dict[str, Callable]:
"""Map standardization functions to each dimension."""
Expand Down Expand Up @@ -77,6 +81,8 @@ class IceCubeUpgrade7(Detector):
xyz = ["sensor_pos_x", "sensor_pos_y", "sensor_pos_z"]
string_id_column = "sensor_string_id"
sensor_id_column = "sensor_id"
sensor_time_column = "t"
charge_column = "charge"

def feature_map(self) -> Dict[str, Callable]:
"""Map standardization functions to each dimension."""
Expand Down Expand Up @@ -107,6 +113,8 @@ class WaterDemo81(Detector):
xyz = ["sensor_pos_x", "sensor_pos_y", "sensor_pos_z"]
string_id_column = "sensor_string_id"
sensor_id_column = "sensor_id"
sensor_time_column = "t"
charge_column = "charge"

def feature_map(self) -> Dict[str, Callable]:
"""Map standardization functions to each dimension."""
Expand Down Expand Up @@ -137,6 +145,8 @@ class BaikalGVD8(Detector):
xyz = ["sensor_pos_x", "sensor_pos_y", "sensor_pos_z"]
string_id_column = "sensor_string_id"
sensor_id_column = "sensor_id"
sensor_time_column = "t"
charge_column = "charge"

def feature_map(self) -> Dict[str, Callable]:
"""Map standardization functions to each dimension."""
Expand Down Expand Up @@ -167,6 +177,8 @@ class IceDemo81(Detector):
xyz = ["sensor_pos_x", "sensor_pos_y", "sensor_pos_z"]
string_id_column = "sensor_string_id"
sensor_id_column = "sensor_id"
sensor_time_column = "t"
charge_column = "charge"

def feature_map(self) -> Dict[str, Callable]:
"""Map standardization functions to each dimension."""
Expand Down Expand Up @@ -197,6 +209,8 @@ class ARCA115(Detector):
xyz = ["sensor_pos_x", "sensor_pos_y", "sensor_pos_z"]
string_id_column = "sensor_string_id"
sensor_id_column = "sensor_id"
sensor_time_column = "t"
charge_column = "charge"

def feature_map(self) -> Dict[str, Callable]:
"""Map standardization functions to each dimension."""
Expand Down Expand Up @@ -227,6 +241,8 @@ class ORCA150(Detector):
xyz = ["sensor_pos_x", "sensor_pos_y", "sensor_pos_z"]
string_id_column = "sensor_string_id"
sensor_id_column = "sensor_id"
sensor_time_column = "t"
charge_column = "charge"

def feature_map(self) -> Dict[str, Callable]:
"""Map standardization functions to each dimension."""
Expand Down Expand Up @@ -257,6 +273,8 @@ class IceCube86Prometheus(Detector):
xyz = ["sensor_pos_x", "sensor_pos_y", "sensor_pos_z"]
string_id_column = "sensor_string_id"
sensor_id_column = "sensor_id"
sensor_time_column = "t"
charge_column = "charge"

def feature_map(self) -> Dict[str, Callable]:
"""Map standardization functions to each dimension."""
Expand Down Expand Up @@ -287,6 +305,8 @@ class IceCubeDeepCore8(Detector):
xyz = ["sensor_pos_x", "sensor_pos_y", "sensor_pos_z"]
string_id_column = "sensor_string_id"
sensor_id_column = "sensor_id"
sensor_time_column = "t"
charge_column = "charge"

def feature_map(self) -> Dict[str, Callable]:
"""Map standardization functions to each dimension."""
Expand Down Expand Up @@ -317,6 +337,8 @@ class IceCubeGen2(Detector):
xyz = ["sensor_pos_x", "sensor_pos_y", "sensor_pos_z"]
string_id_column = "sensor_string_id"
sensor_id_column = "sensor_id"
sensor_time_column = "t"
charge_column = "charge"

def feature_map(self) -> Dict[str, Callable]:
"""Map standardization functions to each dimension."""
Expand Down Expand Up @@ -344,6 +366,8 @@ class PONETriangle(Detector):
xyz = ["sensor_pos_x", "sensor_pos_y", "sensor_pos_z"]
string_id_column = "sensor_string_id"
sensor_id_column = "sensor_id"
sensor_time_column = "t"
charge_column = "charge"

def feature_map(self) -> Dict[str, Callable]:
"""Map standardization functions to each dimension."""
Expand Down
Loading
Loading