diff --git a/.github/workflows/cache_version b/.github/workflows/cache_version deleted file mode 100644 index afaf360d..00000000 --- a/.github/workflows/cache_version +++ /dev/null @@ -1 +0,0 @@ -1.0.0 \ No newline at end of file diff --git a/.github/workflows/main_test_and_release.yml b/.github/workflows/main_test_and_release.yml index 5edc5436..a683345b 100644 --- a/.github/workflows/main_test_and_release.yml +++ b/.github/workflows/main_test_and_release.yml @@ -9,10 +9,10 @@ on: pull_request: push: branches: - - main + - main env: - cache-version: 0.0.4 + cache-version: 0.0.6 poetry-version: 1.1.15 python-version: 3.9 # Change this number if you want to manually invalidate all caches @@ -26,7 +26,7 @@ jobs: # This allows a subsequently queued workflow run to interrupt previous runs concurrency: - group: '${{ github.workflow }} - ${{ matrix.os }} @ ${{ github.ref }}' + group: "${{ github.workflow }} - ${{ matrix.os }} @ ${{ github.ref }}" cancel-in-progress: true steps: @@ -46,21 +46,21 @@ jobs: needs: test if: ${{ github.ref == 'refs/heads/main' }} steps: - # Checkout action is required for token to persist - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - token: ${{ secrets.RELEASE_BOT }} + # Checkout action is required for token to persist + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + token: ${{ secrets.RELEASE_BOT }} - - name: Python Semantic Release - uses: relekang/python-semantic-release@v7.32.0 - with: - github_token: ${{ secrets.RELEASE_BOT }} - # Remember to copy the tool.semantic_release section from pyproject.toml - # as well - # To enable pypi, - # 1) Set upload_to_pypi to true in pyproject.toml and - # 2) Set the pypi_token in the repo - # 3) Uncomment the two lines below - repository_username: __token__ - repository_password: ${{ secrets.PYPI_TOKEN }} \ No newline at end of file + - name: Python Semantic Release + uses: relekang/python-semantic-release@v7.32.0 + with: + github_token: ${{ secrets.RELEASE_BOT }} + # Remember to copy the tool.semantic_release section from pyproject.toml + # as well + # To enable pypi, + # 1) Set upload_to_pypi to true in pyproject.toml and + # 2) Set the pypi_token in the repo + # 3) Uncomment the two lines below + repository_username: __token__ + repository_password: ${{ secrets.PYPI_TOKEN }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c540559..9d669bdf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -63,7 +63,7 @@ * Reimplement ([`c99585f`](https://github.com/Aarhus-Psychiatry-Research/psycop-feature-generation/commit/c99585fdf9f9f407a69e0ead05f935d34ed86a63)) * Use lru cache decorator for values_df loading ([`4006818`](https://github.com/Aarhus-Psychiatry-Research/psycop-feature-generation/commit/40068187da20854fcca980872bc42b8a3a096cc9)) * Add support for loader kwargs ([`127f821`](https://github.com/Aarhus-Psychiatry-Research/psycop-feature-generation/commit/127f8215c35b792390595b890210baa0e8cf3591)) -* Move values_df resolution to anyspec object ([`714e83f`](https://github.com/Aarhus-Psychiatry-Research/psycop-feature-generation/commit/714e83fd3722b298cdd256b06915659ca7a34259)) +* Move values_df resolution to _AnySpec object ([`714e83f`](https://github.com/Aarhus-Psychiatry-Research/psycop-feature-generation/commit/714e83fd3722b298cdd256b06915659ca7a34259)) * Make date of birth output prefix a param ([`0ed1198`](https://github.com/Aarhus-Psychiatry-Research/psycop-feature-generation/commit/0ed11982ba1b239e5650d23dbfab707100e38137)) * Ensure that dfs are sorted and of same length before concat ([`84a4d65`](https://github.com/Aarhus-Psychiatry-Research/psycop-feature-generation/commit/84a4d65b731a6822d0a8f6313d01b7de9c574afe)) * Use pandas with set_index for concat ([`b93290a`](https://github.com/Aarhus-Psychiatry-Research/psycop-feature-generation/commit/b93290ae733857855abe8197291dd047cf6c6fa8)) diff --git a/src/application/t2d/main.py b/src/application/t2d/main.py index 1a2d31c7..f0537ee5 100644 --- a/src/application/t2d/main.py +++ b/src/application/t2d/main.py @@ -4,9 +4,10 @@ maturity. """ +import logging + import wandb -import psycop_feature_generation.loaders.raw # noqa pylint: disable=unused-import from application.t2d.modules.specify_features import get_feature_specs from psycop_feature_generation.application_modules.describe_flattened_dataset import ( save_flattened_dataset_description_to_disk, @@ -14,6 +15,7 @@ from psycop_feature_generation.application_modules.flatten_dataset import ( create_flattened_dataset, ) +from psycop_feature_generation.application_modules.loggers import init_root_logger from psycop_feature_generation.application_modules.project_setup import ( get_project_info, init_wandb, @@ -21,31 +23,25 @@ from psycop_feature_generation.application_modules.save_dataset_to_disk import ( split_and_save_dataset_to_disk, ) +from psycop_feature_generation.application_modules.wandb_utils import ( + wandb_alert_on_exception, +) from psycop_feature_generation.loaders.raw.load_visits import ( physical_visits_to_psychiatry, ) +log = logging.getLogger() + +@wandb_alert_on_exception def main(): """Main function for loading, generating and evaluating a flattened dataset.""" - project_info = get_project_info( - project_name="t2d", - ) - feature_specs = get_feature_specs(project_info=project_info) - # Use wandb to keep track of your dataset generations - # Makes it easier to find paths on wandb, as well as - # allows monitoring and automatic slack alert on failure - init_wandb( - feature_specs=feature_specs, - project_info=project_info, - ) - flattened_df = create_flattened_dataset( feature_specs=feature_specs, - prediction_times_df=physical_visits_to_psychiatry(), + prediction_times_df=physical_visits_to_psychiatry(timestamps_only=True), drop_pred_times_with_insufficient_look_distance=False, project_info=project_info, ) @@ -53,13 +49,34 @@ def main(): split_and_save_dataset_to_disk( flattened_df=flattened_df, project_info=project_info, - output_format="parquet", ) save_flattened_dataset_description_to_disk( - feature_specs=feature_specs, - load_file_format="parquet", project_info=project_info, ) wandb.log_artifact("poetry.lock", name="poetry_lock_file", type="poetry_lock") + + +if __name__ == "__main__": + # Run elements that are required before wandb init first, + # then run the rest in main so you can wrap it all in + # wandb_alert_on_exception, which will send a slack alert + # if you have wandb alerts set up in wandb + project_info = get_project_info( + project_name="t2d", + ) + + init_root_logger(project_info=project_info) + + log.info(f"Stdout level is {logging.getLevelName(log.level)}") + log.debug("Debugging is still captured in the log file") + + # Use wandb to keep track of your dataset generations + # Makes it easier to find paths on wandb, as well as + # allows monitoring and automatic slack alert on failure + init_wandb( + project_info=project_info, + ) + + main() diff --git a/src/application/t2d/modules/specify_features.py b/src/application/t2d/modules/specify_features.py index 621a2398..1ca1c00b 100644 --- a/src/application/t2d/modules/specify_features.py +++ b/src/application/t2d/modules/specify_features.py @@ -1,16 +1,24 @@ """Feature specification module.""" +import logging + import numpy as np + +from psycop_feature_generation.application_modules.project_setup import ProjectInfo from timeseriesflattener.feature_spec_objects import ( - AnySpec, BaseModel, OutcomeGroupSpec, OutcomeSpec, PredictorGroupSpec, PredictorSpec, StaticSpec, + _AnySpec, ) -from psycop_feature_generation.application_modules.project_setup import ProjectInfo +from .loaders.t2d_loaders import ( # noqa pylint: disable=unused-import + timestamp_exclusion, +) + +log = logging.getLogger(__name__) class SpecSet(BaseModel): @@ -19,11 +27,11 @@ class SpecSet(BaseModel): temporal_predictors: list[PredictorSpec] static_predictors: list[StaticSpec] outcomes: list[OutcomeSpec] - metadata: list[AnySpec] + metadata: list[_AnySpec] def get_static_predictor_specs(project_info: ProjectInfo): - """Get static predictor specs.""" + """Get static predictor specs""" return [ StaticSpec( values_loader="sex_female", @@ -33,8 +41,10 @@ def get_static_predictor_specs(project_info: ProjectInfo): ] -def get_metadata_specs(project_info: ProjectInfo) -> list[AnySpec]: - """Get metadata specs.""" +def get_metadata_specs(project_info: ProjectInfo) -> list[_AnySpec]: + """Get metadata specs""" + log.info("–––––––– Generating metadata specs ––––––––") + return [ StaticSpec( values_loader="t2d", @@ -51,7 +61,7 @@ def get_metadata_specs(project_info: ProjectInfo) -> list[AnySpec]: PredictorSpec( values_loader="hba1c", fallback=np.nan, - interval_days=9999, + lookbehind_days=9999, resolve_multiple_fn="count", allowed_nan_value_prop=0.0, prefix=project_info.prefix.eval, @@ -60,13 +70,15 @@ def get_metadata_specs(project_info: ProjectInfo) -> list[AnySpec]: def get_outcome_specs(project_info: ProjectInfo): - """Get outcome specs.""" + """Get outcome specs""" + log.info("–––––––– Generating outcome specs ––––––––") + return OutcomeGroupSpec( values_loader=["t2d"], lookahead_days=[year * 365 for year in (1, 2, 3, 4, 5)], resolve_multiple_fn=["max"], fallback=[0], - incident=True, + incident=[True], allowed_nan_value_prop=[0], prefix=project_info.prefix.outcome, ).create_combinations() @@ -74,68 +86,154 @@ def get_outcome_specs(project_info: ProjectInfo): def get_temporal_predictor_specs(project_info: ProjectInfo) -> list[PredictorSpec]: """Generate predictor spec list.""" + log.info("–––––––– Generating temporal predictor specs ––––––––") + resolve_multiple = ["max", "min", "mean", "latest", "count"] interval_days = [30, 90, 180, 365, 730] allowed_nan_value_prop = [0] - lab_results = PredictorGroupSpec( + lab_results = get_lab_result_specs( + resolve_multiple, interval_days, allowed_nan_value_prop + ) + + diagnoses = get_diagnoses_specs( + resolve_multiple, interval_days, allowed_nan_value_prop + ) + + medications = get_medication_specs( + resolve_multiple, interval_days, allowed_nan_value_prop + ) + + demographics = PredictorGroupSpec( + values_loader=["weight_in_kg", "height_in_cm", "bmi"], + lookbehind_days=interval_days, + resolve_multiple_fn=["latest"], + fallback=[np.nan], + allowed_nan_value_prop=allowed_nan_value_prop, + prefix=project_info.prefix.predictor, + ).create_combinations() + + return lab_results + medications + diagnoses + demographics + + +def get_medication_specs(resolve_multiple, interval_days, allowed_nan_value_prop): + log.info("–––––––– Generating medication specs ––––––––") + + psychiatric_medications = PredictorGroupSpec( values_loader=( - "hba1c", - "alat", - "hdl", - "ldl", - "scheduled_glc", - "unscheduled_p_glc", - "triglycerides", - "fasting_ldl", - "crp", - "egfr", - "albumine_creatinine_ratio", + "antipsychotics", + "clozapine", + "top_10_weight_gaining_antipsychotics", + "lithium", + "valproate", + "lamotrigine", + "benzodiazepines", + "pregabaline", + "ssri", + "snri", + "tca", + "selected_nassa", + "benzodiazepine_related_sleeping_agents", ), + lookbehind_days=interval_days, resolve_multiple_fn=resolve_multiple, + fallback=[0], + allowed_nan_value_prop=allowed_nan_value_prop, + ).create_combinations() + + lifestyle_medications = PredictorGroupSpec( + values_loader=( + "gerd_drugs", + "statins", + "antihypertensives", + "diuretics", + ), lookbehind_days=interval_days, - fallback=[np.nan], + resolve_multiple_fn=resolve_multiple, + fallback=[0], allowed_nan_value_prop=allowed_nan_value_prop, - prefix=project_info.prefix.predictor, ).create_combinations() - diagnoses = PredictorGroupSpec( + return psychiatric_medications + lifestyle_medications + + +def get_diagnoses_specs(resolve_multiple, interval_days, allowed_nan_value_prop): + log.info("–––––––– Generating diagnoses specs ––––––––") + + lifestyle_diagnoses = PredictorGroupSpec( values_loader=( "essential_hypertension", "hyperlipidemia", "polycystic_ovarian_syndrome", "sleep_apnea", + "gerd", ), resolve_multiple_fn=resolve_multiple, lookbehind_days=interval_days, fallback=[0], allowed_nan_value_prop=allowed_nan_value_prop, - prefix=project_info.prefix.predictor, ).create_combinations() - medications = PredictorGroupSpec( - values_loader=("antipsychotics",), - lookbehind_days=interval_days, + psychiatric_diagnoses = PredictorGroupSpec( + values_loader=( + "f0_disorders", + "f1_disorders", + "f2_disorders", + "f3_disorders", + "f4_disorders", + "f5_disorders", + "f6_disorders", + "f7_disorders", + "f8_disorders", + "hyperkinetic_disorders", + ), resolve_multiple_fn=resolve_multiple, + lookbehind_days=interval_days, fallback=[0], allowed_nan_value_prop=allowed_nan_value_prop, - prefix=project_info.prefix.predictor, ).create_combinations() - demographics = PredictorGroupSpec( - values_loader=["weight_in_kg", "height_in_cm", "bmi"], + return lifestyle_diagnoses + psychiatric_diagnoses + + +def get_lab_result_specs(resolve_multiple, interval_days, allowed_nan_value_prop): + log.info("–––––––– Generating lab result specs ––––––––") + + general_lab_results = PredictorGroupSpec( + values_loader=( + "alat", + "hdl", + "ldl", + "triglycerides", + "fasting_ldl", + "crp", + ), + resolve_multiple_fn=resolve_multiple, + lookbehind_days=interval_days, + fallback=[np.nan], + allowed_nan_value_prop=allowed_nan_value_prop, + ).create_combinations() + + diabetes_lab_results = PredictorGroupSpec( + values_loader=( + "hba1c", + "scheduled_glc", + "unscheduled_p_glc", + "egfr", + "albumine_creatinine_ratio", + ), + resolve_multiple_fn=resolve_multiple, lookbehind_days=interval_days, - resolve_multiple_fn=["latest"], fallback=[np.nan], allowed_nan_value_prop=allowed_nan_value_prop, - prefix=project_info.prefix.predictor, ).create_combinations() - return lab_results + diagnoses + medications + demographics + return general_lab_results + diabetes_lab_results -def get_feature_specs(project_info: ProjectInfo) -> list[AnySpec]: +def get_feature_specs(project_info: ProjectInfo) -> list[_AnySpec]: """Get a spec set.""" + return ( get_temporal_predictor_specs(project_info=project_info) + get_static_predictor_specs(project_info=project_info) diff --git a/src/psycop_feature_generation/application_modules/flatten_dataset.py b/src/psycop_feature_generation/application_modules/flatten_dataset.py index 5f8c194a..64012a5a 100644 --- a/src/psycop_feature_generation/application_modules/flatten_dataset.py +++ b/src/psycop_feature_generation/application_modules/flatten_dataset.py @@ -1,20 +1,20 @@ """Flatten the dataset.""" import pandas as pd import psutil -from timeseriesflattener.feature_cache.cache_to_disk import DiskCache -from timeseriesflattener.feature_spec_objects import AnySpec -from timeseriesflattener.flattened_dataset import TimeseriesFlattener from psycop_feature_generation.application_modules.project_setup import ProjectInfo from psycop_feature_generation.application_modules.wandb_utils import ( wandb_alert_on_exception, ) from psycop_feature_generation.loaders.raw.load_demographic import birthdays +from timeseriesflattener.feature_cache.cache_to_disk import DiskCache +from timeseriesflattener.feature_spec_objects import _AnySpec +from timeseriesflattener.flattened_dataset import TimeseriesFlattener @wandb_alert_on_exception def create_flattened_dataset( - feature_specs: list[AnySpec], + feature_specs: list[_AnySpec], prediction_times_df: pd.DataFrame, drop_pred_times_with_insufficient_look_distance: bool, project_info: ProjectInfo, @@ -22,7 +22,7 @@ def create_flattened_dataset( """Create flattened dataset. Args: - feature_specs (list[AnySpec]): List of feature specifications of any type. + feature_specs (list[_AnySpec]): List of feature specifications of any type. project_info (ProjectInfo): Project info. prediction_times_df (pd.DataFrame): Prediction times dataframe. Should contain entity_id and timestamp columns with col_names matching those in project_info.col_names. @@ -37,10 +37,10 @@ def create_flattened_dataset( prediction_times_df=prediction_times_df, n_workers=min( len(feature_specs), - psutil.cpu_count(logical=False), + psutil.cpu_count(logical=True), ), cache=DiskCache( - feature_cache_dir=project_info.feature_set_path / "feature_cache", + feature_cache_dir=project_info.project_path / "feature_cache", ), drop_pred_times_with_insufficient_look_distance=drop_pred_times_with_insufficient_look_distance, predictor_col_name_prefix=project_info.prefix.predictor, @@ -54,4 +54,6 @@ def create_flattened_dataset( date_of_birth_col_name="date_of_birth", ) + flattened_dataset.add_spec(spec=feature_specs) + return flattened_dataset.get_df() diff --git a/src/psycop_feature_generation/application_modules/loggers.py b/src/psycop_feature_generation/application_modules/loggers.py new file mode 100644 index 00000000..f5dc61d9 --- /dev/null +++ b/src/psycop_feature_generation/application_modules/loggers.py @@ -0,0 +1,42 @@ +"""Recommended logger to use.""" +import logging +from datetime import datetime + +import coloredlogs + +from psycop_feature_generation.application_modules.project_setup import ProjectInfo + + +def init_root_logger( + project_info: ProjectInfo, + stdout_log_level: int = logging.INFO, + log_file_level: int = logging.DEBUG, +) -> None: + """Initializes the root logger with a file handler and a stream handler.""" + # Get the root logger + root_log = logging.getLogger() + + # Set the root logger's level to the minimum of the stdout and file log + # The root logger acts as the trunk of a tree, only messages with a level + # equal to or higher than the root logger's level will be passed on to + # the root logger's its branches (its handlers) + root_log.setLevel(min(stdout_log_level, log_file_level)) + + # Install the coloredlogs module on the root logger + # to get prettier console output and to add a streamhandler, + # which will write all logging messages from the root logger to + # stdout + coloredlogs.install( + level=stdout_log_level, + fmt="%(asctime)s [%(levelname)s] %(message)s", + ) + + # Create a timestamped file handler which writes all logging messages from + # the root logger to a file + now = datetime.now().strftime("%Y_%m_%d_%H_%M_%S") + log_dir_path = project_info.project_path / "logs" + log_dir_path.mkdir(exist_ok=True, parents=True) + + file_handler = logging.FileHandler(filename=log_dir_path / f"{now}.log") + file_handler.setLevel(log_file_level) + root_log.addHandler(file_handler) diff --git a/src/psycop_feature_generation/application_modules/project_setup.py b/src/psycop_feature_generation/application_modules/project_setup.py index 6694538e..08a50b99 100644 --- a/src/psycop_feature_generation/application_modules/project_setup.py +++ b/src/psycop_feature_generation/application_modules/project_setup.py @@ -1,14 +1,19 @@ """Setup for the project.""" +import logging import sys import tempfile import time from pathlib import Path -from typing import Literal, Sequence +from typing import Literal import wandb -from timeseriesflattener.feature_spec_objects import BaseModel, PredictorSpec from psycop_feature_generation.utils import RELATIVE_PROJECT_ROOT, SHARED_RESOURCES_PATH +from timeseriesflattener.feature_spec_objects import ( # pylint: disable=no-name-in-module + BaseModel, +) + +log = logging.getLogger(__name__) class Prefixes(BaseModel): @@ -79,6 +84,7 @@ def get_project_info( Returns: tuple[Path, str]: Tuple of project path, and feature_set_id """ + log.info("Setting up project") proj_path = SHARED_RESOURCES_PATH / project_name current_user = Path().home().name @@ -98,24 +104,17 @@ def get_project_info( def init_wandb( - feature_specs: Sequence[PredictorSpec], project_info: ProjectInfo, ) -> None: """Initialise wandb logging. Allows to use wandb to track progress, send Slack notifications if failing, and track logs. Args: - feature_specs (Iterable[dict[str, Any]]): List of predictor specs. project_info (ProjectInfo): Project info. """ feature_settings = { "feature_set_path": project_info.feature_set_path, - "predictor_list": [ - spec.__dict__ - for spec in feature_specs - if spec.prefix == project_info.prefix.predictor - ], } # on Overtaci, the wandb tmp directory is not automatically created, diff --git a/src/psycop_feature_generation/application_modules/wandb_utils.py b/src/psycop_feature_generation/application_modules/wandb_utils.py index c768f09f..b14e7a85 100644 --- a/src/psycop_feature_generation/application_modules/wandb_utils.py +++ b/src/psycop_feature_generation/application_modules/wandb_utils.py @@ -11,7 +11,7 @@ def wrapper(*args, **kwargs): try: return func(*args, **kwargs) except Exception as e: - wandb.alert(traceback.format_exc()) + wandb.alert(title="Run crashed", text=traceback.format_exc()) raise e return wrapper diff --git a/src/psycop_feature_generation/data_checks/flattened/feature_describer.py b/src/psycop_feature_generation/data_checks/flattened/feature_describer.py index 1bb804c0..725a5b63 100644 --- a/src/psycop_feature_generation/data_checks/flattened/feature_describer.py +++ b/src/psycop_feature_generation/data_checks/flattened/feature_describer.py @@ -7,7 +7,7 @@ import numpy as np import pandas as pd from timeseriesflattener.feature_spec_objects import ( - AnySpec, + _AnySpec, PredictorSpec, StaticSpec, TemporalSpec, @@ -134,7 +134,7 @@ def generate_static_feature_description(series: pd.Series, predictor_spec: Stati def generate_feature_description_row( series: pd.Series, - predictor_spec: AnySpec, + predictor_spec: _AnySpec, ) -> dict: """Generate a row with feature description. diff --git a/src/psycop_feature_generation/loaders/raw/load_medications.py b/src/psycop_feature_generation/loaders/raw/load_medications.py index c4c95210..0eedfdca 100644 --- a/src/psycop_feature_generation/loaders/raw/load_medications.py +++ b/src/psycop_feature_generation/loaders/raw/load_medications.py @@ -1,4 +1,6 @@ """Loaders for medications.""" +# pylint: disable=missing-function-docstring +import logging from typing import Optional, Union import pandas as pd @@ -7,7 +9,7 @@ from psycop_feature_generation.loaders.raw.utils import load_from_codes from psycop_feature_generation.utils import data_loaders -# pylint: disable=missing-function-docstring +log = logging.getLogger(__name__) def load( @@ -43,7 +45,7 @@ def load( """ if load_prescribed: - msg.warn( + log.warning( "Beware, there are missing prescriptions until september 2016. " "Hereafter, data is complete. See the wiki (OBS: Medication) for more details.", ) diff --git a/src/psycop_feature_generation/loaders/raw/load_t2d_outcomes.py b/src/psycop_feature_generation/loaders/raw/load_t2d_outcomes.py index d707fcc9..dbdcf328 100644 --- a/src/psycop_feature_generation/loaders/raw/load_t2d_outcomes.py +++ b/src/psycop_feature_generation/loaders/raw/load_t2d_outcomes.py @@ -5,7 +5,6 @@ from typing import Optional import pandas as pd -from wasabi import msg from psycop_feature_generation.loaders.raw.sql_load import sql_load from psycop_feature_generation.utils import data_loaders @@ -13,8 +12,6 @@ @data_loaders.register("t2d") def t2d(n_rows: Optional[int] = None) -> pd.DataFrame: - msg.info("Loading t2d event times") - df = sql_load( "SELECT dw_ek_borger, timestamp FROM [fct].[psycop_t2d_first_diabetes_t2d] WHERE timestamp IS NOT NULL", database="USR_PS_FORSK", @@ -27,7 +24,6 @@ def t2d(n_rows: Optional[int] = None) -> pd.DataFrame: # 2 duplicates, dropping df = df.drop_duplicates(keep="first") - msg.good("Finished loading t2d event times") return df.reset_index(drop=True) @@ -46,6 +42,5 @@ def any_diabetes(n_rows: Optional[int] = None): df.rename(columns={"datotid_first_diabetes_any": "timestamp"}, inplace=True) df["timestamp"] = pd.to_datetime(df["timestamp"]).dt.tz_localize(None) - msg.good("Finished loading any_diabetes event times") output = df[["dw_ek_borger", "timestamp", "value"]] return output.reset_index(drop=True) diff --git a/src/psycop_feature_generation/loaders/raw/load_visits.py b/src/psycop_feature_generation/loaders/raw/load_visits.py index 3d9797a1..2b56856b 100644 --- a/src/psycop_feature_generation/loaders/raw/load_visits.py +++ b/src/psycop_feature_generation/loaders/raw/load_visits.py @@ -1,5 +1,6 @@ """Loaders for visits to psychiatry.""" +import logging from typing import Optional import pandas as pd @@ -8,6 +9,8 @@ from psycop_feature_generation.loaders.raw.sql_load import sql_load from psycop_feature_generation.utils import data_loaders +log = logging.getLogger(__name__) + @data_loaders.register("physical_visits") def physical_visits( @@ -88,15 +91,22 @@ def physical_visits( output_df["value"] = 1 - msg.good("Loaded physical visits") + log.info("Loaded physical visits") return output_df.reset_index(drop=True) @data_loaders.register("physical_visits_to_psychiatry") -def physical_visits_to_psychiatry(n_rows: Optional[int] = None) -> pd.DataFrame: +def physical_visits_to_psychiatry( + n_rows: Optional[int] = None, timestamps_only: bool = True +) -> pd.DataFrame: """Load physical visits to psychiatry.""" - return physical_visits(shak_code=6600, shak_sql_operator="=", n_rows=n_rows) + df = physical_visits(shak_code=6600, shak_sql_operator="=", n_rows=n_rows) + + if timestamps_only: + df = df.drop("value", axis=1) + + return df @data_loaders.register("physical_visits_to_somatic") diff --git a/src/timeseriesflattener b/src/timeseriesflattener new file mode 160000 index 00000000..087f843e --- /dev/null +++ b/src/timeseriesflattener @@ -0,0 +1 @@ +Subproject commit 087f843e48be74ddd35bd1b9afcb89b6c6bf1cc0