-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
bp_project: setup bipolar patient representations project (#882)
Initial setup of dynamic patient representations project for investigating subtypes in patients with bipolar disorders. Initial implementation includes: - [x] Cohort definition - [x] Script for generating predictions timestamps
- Loading branch information
Showing
8 changed files
with
254 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
98 changes: 98 additions & 0 deletions
98
psycop/projects/bipolar/cohort_definition/bipolar_cohort_definition.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
import pandas as pd | ||
import polars as pl | ||
from tqdm import tqdm | ||
|
||
from psycop.common.cohort_definition import ( | ||
CohortDefiner, | ||
FilteredPredictionTimeBundle, | ||
filter_prediction_times, | ||
) | ||
from psycop.common.feature_generation.loaders.raw.load_visits import ( | ||
get_time_of_first_visit_to_psychiatry, | ||
) | ||
from psycop.projects.bipolar.cohort_definition.diagnosis_specification.first_bipolar_diagnosis import ( | ||
get_first_bipolar_diagnosis, | ||
) | ||
from psycop.projects.bipolar.cohort_definition.eligible_data.single_filters import ( | ||
BipolarMinAgeFilter, | ||
BipolarMinDateFilter, | ||
BipolarPatientsWithF20F25Filter, | ||
BipolarWashoutMove, | ||
) | ||
|
||
|
||
def generate_timestamps( | ||
first_visit_date: pd.Timestamp, diagnosis_date: pd.Timestamp, interval_days: int = 30 | ||
) -> list: # type: ignore | ||
timestamps = [diagnosis_date] | ||
current_date = diagnosis_date | ||
while current_date > (first_visit_date + pd.to_timedelta(interval_days, "d")): | ||
current_date -= pd.Timedelta(days=interval_days) | ||
timestamps.append(current_date) | ||
return timestamps[::-1] | ||
|
||
|
||
class BipolarCohortDefiner(CohortDefiner): | ||
@staticmethod | ||
def get_bipolar_cohort(interval_days: int = 30) -> FilteredPredictionTimeBundle: | ||
bipolar_diagnosis_timestamps = pl.from_pandas(get_first_bipolar_diagnosis()) | ||
|
||
filtered_bipolar_diagnosis_timestamps = filter_prediction_times( | ||
prediction_times=bipolar_diagnosis_timestamps.lazy(), | ||
filtering_steps=( | ||
BipolarMinDateFilter(), | ||
BipolarMinAgeFilter(), | ||
BipolarWashoutMove(), | ||
BipolarPatientsWithF20F25Filter(), | ||
), | ||
entity_id_col_name="dw_ek_borger", | ||
) | ||
|
||
filtered_bipolar_diagnosis_timestamps_df = pd.DataFrame( | ||
filtered_bipolar_diagnosis_timestamps.prediction_times.frame.to_pandas() | ||
) | ||
|
||
first_visits_to_psychiatry = pd.DataFrame( | ||
get_time_of_first_visit_to_psychiatry().to_pandas() | ||
) | ||
|
||
filtered_bipolar_diagnosis_timestamps_df = filtered_bipolar_diagnosis_timestamps_df.merge( | ||
first_visits_to_psychiatry, | ||
on="dw_ek_borger", | ||
how="left", | ||
suffixes=(None, "_first_visit"), | ||
) | ||
|
||
filtered_bipolar_diagnosis_timestamps_df = filtered_bipolar_diagnosis_timestamps_df.dropna( | ||
subset=["timestamp_first_visit"] | ||
) | ||
|
||
filtered_bipolar_diagnosis_timestamps_df = filtered_bipolar_diagnosis_timestamps_df[ | ||
filtered_bipolar_diagnosis_timestamps_df["timestamp"] | ||
>= filtered_bipolar_diagnosis_timestamps_df["timestamp_first_visit"] | ||
] | ||
|
||
filtered_bipolar_diagnosis_timestamps_df["time_from_first_visit"] = ( | ||
filtered_bipolar_diagnosis_timestamps_df["timestamp"] | ||
- filtered_bipolar_diagnosis_timestamps_df["timestamp_first_visit"] | ||
) | ||
|
||
timestamps_per_patient = [] | ||
|
||
for _, row in tqdm(filtered_bipolar_diagnosis_timestamps_df.iterrows()): | ||
timestamps = generate_timestamps( | ||
row["timestamp_first_visit"], row["timestamp"], interval_days=interval_days | ||
) | ||
timestamps_per_patient.extend( | ||
[(row["dw_ek_borger"], timestamp) for timestamp in timestamps] | ||
) | ||
|
||
result_df = pd.DataFrame(timestamps_per_patient, columns=["patient_id", "timestamp"]) | ||
|
||
filtered_bipolar_diagnosis_timestamps.prediction_times.frame = result_df # type: ignore | ||
|
||
return filtered_bipolar_diagnosis_timestamps | ||
|
||
|
||
if __name__ == "__main__": | ||
df = BipolarCohortDefiner.get_bipolar_cohort() |
16 changes: 16 additions & 0 deletions
16
psycop/projects/bipolar/cohort_definition/diagnosis_specification/first_bipolar_diagnosis.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import pandas as pd | ||
import polars as pl | ||
|
||
from psycop.common.feature_generation.loaders.raw.load_diagnoses import bipolar_a_diagnosis | ||
|
||
|
||
def get_first_bipolar_diagnosis() -> pd.DataFrame: | ||
diagnoses = pl.DataFrame(bipolar_a_diagnosis()) | ||
|
||
first_bipolar = diagnoses.sort("timestamp").groupby("dw_ek_borger").first() | ||
|
||
return first_bipolar.to_pandas()[["dw_ek_borger", "timestamp"]] | ||
|
||
|
||
if __name__ == "__main__": | ||
df = get_first_bipolar_diagnosis() |
Empty file.
18 changes: 18 additions & 0 deletions
18
psycop/projects/bipolar/cohort_definition/eligible_data/add_age.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import polars as pl | ||
|
||
from psycop.common.feature_generation.loaders.raw.load_demographic import birthdays | ||
from psycop.projects.forced_admission_inpatient.cohort.prediction_timestamp_filters.eligible_config import ( | ||
AGE_COL_NAME, | ||
) | ||
|
||
|
||
def add_age(df: pl.DataFrame) -> pl.DataFrame: | ||
birthday_df = pl.from_pandas(birthdays()) | ||
|
||
df = df.join(birthday_df, on="dw_ek_borger", how="inner") | ||
df = df.with_columns( | ||
((pl.col("timestamp") - pl.col("date_of_birth")).dt.days()).alias(AGE_COL_NAME) | ||
) | ||
df = df.with_columns((pl.col(AGE_COL_NAME) / 365.25).alias(AGE_COL_NAME)) | ||
|
||
return df |
5 changes: 5 additions & 0 deletions
5
psycop/projects/bipolar/cohort_definition/eligible_data/eligible_config.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
from datetime import datetime | ||
|
||
AGE_COL_NAME = "age" | ||
MIN_AGE = 18 | ||
MIN_DATE = datetime(year=2013, month=1, day=1) |
73 changes: 73 additions & 0 deletions
73
psycop/projects/bipolar/cohort_definition/eligible_data/single_filters.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import pandas as pd | ||
import polars as pl | ||
|
||
from psycop.common.cohort_definition import PredictionTimeFilter | ||
from psycop.common.feature_generation.loaders.raw.load_diagnoses import ( | ||
schizoaffective, | ||
schizophrenia, | ||
) | ||
from psycop.common.feature_generation.loaders.raw.load_moves import MoveIntoRMBaselineLoader | ||
from psycop.common.model_training_v2.trainer.preprocessing.steps.row_filter_other import ( | ||
QuarantineFilter, | ||
) | ||
from psycop.projects.bipolar.cohort_definition.eligible_data.add_age import add_age | ||
from psycop.projects.bipolar.cohort_definition.eligible_data.eligible_config import ( | ||
AGE_COL_NAME, | ||
MIN_AGE, | ||
MIN_DATE, | ||
) | ||
|
||
|
||
class BipolarMinDateFilter(PredictionTimeFilter): | ||
def apply(self, df: pl.LazyFrame) -> pl.LazyFrame: | ||
after_df = df.filter(pl.col("timestamp") > MIN_DATE) | ||
return after_df | ||
|
||
|
||
class BipolarMinAgeFilter(PredictionTimeFilter): | ||
def apply(self, df: pl.LazyFrame) -> pl.LazyFrame: | ||
df = add_age(df.collect()).lazy() | ||
after_df = df.filter(pl.col(AGE_COL_NAME) >= MIN_AGE) | ||
return after_df | ||
|
||
|
||
class BipolarWashoutMove(PredictionTimeFilter): | ||
def apply(self, df: pl.LazyFrame) -> pl.LazyFrame: | ||
not_within_two_years_from_move = QuarantineFilter( | ||
entity_id_col_name="dw_ek_borger", | ||
quarantine_timestamps_loader=MoveIntoRMBaselineLoader(), | ||
quarantine_interval_days=730, | ||
timestamp_col_name="timestamp", | ||
).apply(df) | ||
|
||
return not_within_two_years_from_move | ||
|
||
|
||
class BipolarPatientsWithF20F25Filter(PredictionTimeFilter): | ||
def apply(self, df: pl.LazyFrame) -> pl.LazyFrame: | ||
f20_df = schizophrenia() | ||
f25_df = schizoaffective() | ||
pd_df = pd.DataFrame(df.collect().to_pandas()) | ||
|
||
merged_df_f20 = pd.merge( | ||
pd_df, f20_df, on="dw_ek_borger", how="left", suffixes=("_df", "_f20") | ||
) | ||
bipolar_patients_with_later_f20 = merged_df_f20[ | ||
merged_df_f20["timestamp_df"] <= merged_df_f20["timestamp_f20"] | ||
].dw_ek_borger.unique() | ||
|
||
merged_df_f25 = pd.merge( | ||
pd_df, f25_df, on="dw_ek_borger", how="left", suffixes=("_df", "_f25") | ||
) | ||
bipolar_patients_with_later_f25 = merged_df_f25[ | ||
merged_df_f25["timestamp_df"] <= merged_df_f25["timestamp_f25"] | ||
].dw_ek_borger.unique() | ||
|
||
bipolar_patients_with_f20_f25 = set(bipolar_patients_with_later_f20).union( | ||
set(bipolar_patients_with_later_f25) | ||
) | ||
filtered_df = pd_df[~pd_df["dw_ek_borger"].isin(bipolar_patients_with_f20_f25)] | ||
|
||
filtered_df = pl.DataFrame(filtered_df).lazy() | ||
|
||
return filtered_df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters