From c5ba01cdc83ed95d074aad5d476b92ff5affd956 Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Sun, 26 May 2024 10:29:08 +0800 Subject: [PATCH] feat: return physionet_2019 as two subsets; --- tsdb/loading_funcs/physionet_2019.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/tsdb/loading_funcs/physionet_2019.py b/tsdb/loading_funcs/physionet_2019.py index 8e75273..df2fd9e 100644 --- a/tsdb/loading_funcs/physionet_2019.py +++ b/tsdb/loading_funcs/physionet_2019.py @@ -16,11 +16,12 @@ def load_physionet2019(local_path): time_series_measurements_dir = ["training", "training_setB"] - label_feature = "SepsisLabel" # feature SepsisLabel contains labels indicating whether patients get sepsis - time_feature = "ICULOS" # ICU length-of-stay (hours since ICU admit) + # label_feature = "SepsisLabel" # feature SepsisLabel contains labels indicating whether patients get sepsis + # time_feature = "ICULOS" # ICU length-of-stay (hours since ICU admit) - df_collector = [] + set_collector = [] for m_ in time_series_measurements_dir: + df_collector = [] raw_data_dir = os.path.join(local_path, m_) for filename in os.listdir(raw_data_dir): recordID = filename.split(".psv")[0] @@ -28,11 +29,12 @@ def load_physionet2019(local_path): df_temp = pd.read_csv(f, sep="|", header=0) df_temp["RecordID"] = recordID df_collector.append(df_temp) - - df = pd.concat(df_collector, sort=True) - df = df.reset_index(drop=True) - y = df[["RecordID", time_feature, label_feature]] - X = df.drop(label_feature, axis=1) - - data = {"X": X, "y": y, "static_features": ["Age", "Gender", "HospAdmTime"]} + df = pd.concat(df_collector, sort=True) + set_collector.append(df) + + data = { + "training_setA": set_collector[0], + "training_setB": set_collector[1], + "static_features": ["Age", "Gender", "Unit1", "Unit2", "HospAdmTime"], + } return data