Skip to content

Commit

Permalink
refactor DataHarmonizer
Browse files Browse the repository at this point in the history
  • Loading branch information
mschwoer committed Oct 28, 2024
1 parent 543201c commit 2f65a45
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 10 deletions.
4 changes: 3 additions & 1 deletion alphastats/DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ def __init__(
self._check_loader(loader=loader)

# fill data from loader
self.rawinput: pd.DataFrame = DataHarmonizer(loader).get_rawinput()
self.rawinput: pd.DataFrame = DataHarmonizer(loader).get_harmonized_rawinput(
loader.rawinput
)
self.filter_columns: List[str] = loader.filter_columns

self.software: str = loader.software
Expand Down
18 changes: 9 additions & 9 deletions alphastats/dataset_harmonizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,17 @@ class DataHarmonizer:
"""Harmonize input data to a common format."""

def __init__(self, loader: BaseLoader):
self._rawinput = loader.rawinput
self._index_column = loader.index_column
self._rename_dict = {loader.index_column: Cols.INDEX}

def get_rawinput(self) -> pd.DataFrame:
def get_harmonized_rawinput(self, rawinput: pd.DataFrame) -> pd.DataFrame:
"""Harmonize the rawinput data to a common format."""
if Cols.INDEX in self._rawinput.columns:
raise ValueError(
f"Column name {Cols.INDEX} already exists in rawinput. Please rename the column."
)
for target_name in self._rename_dict.values():
if target_name in rawinput.columns:
raise ValueError(
f"Column name {target_name} already exists in rawinput. Please rename the column."
)

return self._rawinput.rename(
columns={self._index_column: Cols.INDEX},
return rawinput.rename(
columns=self._rename_dict,
errors="raise",
)

0 comments on commit 2f65a45

Please sign in to comment.