This repository has been archived by the owner on May 1, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #348 from Aarhus-Psychiatry-Research/marbern/refac…
…tor_load Marbern/refactor load
- Loading branch information
Showing
118 changed files
with
1,875 additions
and
3,232 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# @package _global_ | ||
data: | ||
# General config | ||
n_training_samples: null | ||
dir: E:\shared_resources\t2d\feature_sets\psycop_t2d_adminmanber_features_2022_12_19_15_36 | ||
suffix: parquet | ||
|
||
# Feature specs | ||
pred_prefix: pred_ | ||
outc_prefix: outc_ | ||
|
||
col_name: | ||
pred_timestamp: timestamp | ||
outcome_timestamp: timestamp_first_t2d_hba1c | ||
id: dw_ek_borger | ||
age: pred_age_in_years | ||
exclusion_timestamp: timestamp_exclusion | ||
custom: | ||
n_hba1c: eval_hba1c_within_9999_days_count_fallback_nan |
2 changes: 1 addition & 1 deletion
2
...model_training/config/default_config.yaml → application/config/default_config.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
30 changes: 30 additions & 0 deletions
30
application/config/preprocessing/default_preprocessing.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# @package _global_ | ||
preprocessing: | ||
pre_split: | ||
convert_to_boolean: false | ||
convert_booleans_to_int: true | ||
drop_datetime_predictor_columns: true | ||
convert_datetimes_to_ordinal: false | ||
drop_patient_if_exclusion_before_date: 2013-01-01 | ||
min_prediction_time_date: 2013-01-01 | ||
min_lookahead_days: 1825 | ||
lookbehind_combination: [30, 90, 180, 365, 730] | ||
min_age: 18 | ||
post_split: | ||
imputation_method: most_frequent | ||
scaling: z-score-normalisation | ||
feature_selection: | ||
name: chi2 | ||
params: | ||
percentile: 20 # (int): Percent of features to keep. Defaults to 10. | ||
|
||
# Parameters that will only take effect if running with --multirun | ||
hydra: | ||
sweeper: | ||
params: | ||
++preprocessing.post_split.imputation_method: choice("most_frequent", "mean", "median", "null") | ||
++preprocessing.post_split.scaling: choice("z-score-normalization", "null") | ||
++preprocessing.post_split.feature_selection.name: choice("chi2", "null") | ||
++preprocessing.post_split.feature_selection.params.percentile: int(tag(log, interval(1, 90))) | ||
++preprocessing.pre_split.lookbehind_combination: choice([30, 90, 180, 365, 730], [30, 180, 730], [730], [365], [90], [30]) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
name: t2d | ||
seed: 42 | ||
|
||
wandb: | ||
entity: psycop # Which entity to run WanDB in. | ||
mode: run # Which mode to run WanDB in. Takes "run", "dryrun", "offline" and "disabled" | ||
group: t2d # Which group to run WanDB in. | ||
|
||
gpu: true |
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
"""Example of how to inspect a dataset using the configs.""" | ||
from psycop_model_training.data_loader.utils import ( | ||
load_and_filter_train_from_cfg, | ||
load_train_raw, | ||
) | ||
from psycop_model_training.utils.config_schemas import load_test_cfg_as_pydantic | ||
|
||
|
||
def main(): | ||
"""Main.""" | ||
config_file_name = "default_config.yaml" | ||
|
||
cfg = load_test_cfg_as_pydantic(config_file_name=config_file_name) | ||
df = load_train_raw(cfg=cfg) # pylint: disable=unused-variable | ||
|
||
df_filtered = load_and_filter_train_from_cfg( # pylint: disable=unused-variable | ||
cfg=cfg, | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import pandas as pd | ||
from psycopmlutils.sql.loader import sql_load | ||
|
||
|
||
def load_timestamp_for_any_diabetes(): | ||
"""Loads timestamps for the broad definition of diabetes used for wash-in. | ||
See R files for details. | ||
""" | ||
timestamp_any_diabetes = sql_load( | ||
query="SELECT * FROM [fct].[psycop_t2d_first_diabetes_any]", | ||
format_timestamp_cols_to_datetime=False, | ||
)[["dw_ek_borger", "datotid_first_diabetes_any"]] | ||
|
||
timestamp_any_diabetes = timestamp_any_diabetes.rename( | ||
columns={"datotid_first_diabetes_any": "timestamp_washin"}, | ||
) | ||
|
||
return timestamp_any_diabetes | ||
|
||
|
||
def add_washin_timestamps(dataset: pd.DataFrame) -> pd.DataFrame: | ||
"""Add washin timestamps to dataset. | ||
Washin is an exclusion criterion. E.g. if the patient has any visit | ||
that looks like diabetes before the study starts (i.e. during | ||
washin), they are excluded. | ||
""" | ||
timestamp_washin = load_timestamp_for_any_diabetes() | ||
|
||
dataset = dataset.merge( | ||
timestamp_washin, | ||
on="dw_ek_borger", | ||
how="left", | ||
) | ||
|
||
return dataset |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
58 changes: 0 additions & 58 deletions
58
...2d/outcome_specification - move to t2d-feature-gen-repo/00_generate_dfs/10_medication.rmd
This file was deleted.
Oops, something went wrong.
33 changes: 0 additions & 33 deletions
33
...ion/t2d/outcome_specification - move to t2d-feature-gen-repo/00_generate_dfs/20_hba1c.rmd
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.