Merge pull request #118 from Aarhus-Psychiatry-Research/martbern/appl…

…ication_to_t2d Misc. changes from usage
Aarhus-Psychiatry-Research · Dec 15, 2022 · 7674e4a · 7674e4a
2 parents 363af5a + 8ae9f69
commit 7674e4a
Show file tree

Hide file tree

Showing 14 changed files with 268 additions and 103 deletions.
diff --git a/.github/workflows/cache_version b/.github/workflows/cache_version
diff --git a/.github/workflows/main_test_and_release.yml b/.github/workflows/main_test_and_release.yml
@@ -9,10 +9,10 @@ on:
   pull_request:
   push:
     branches:
-      - main 
+      - main
 
 env:
-  cache-version: 0.0.4
+  cache-version: 0.0.6
   poetry-version: 1.1.15
   python-version: 3.9 # Change this number if you want to manually invalidate all caches
 
@@ -26,7 +26,7 @@ jobs:
 
     # This allows a subsequently queued workflow run to interrupt previous runs
     concurrency:
-      group: '${{ github.workflow }} - ${{ matrix.os }} @ ${{ github.ref }}'
+      group: "${{ github.workflow }} - ${{ matrix.os }} @ ${{ github.ref }}"
       cancel-in-progress: true
 
     steps:
@@ -46,21 +46,21 @@ jobs:
     needs: test
     if: ${{ github.ref == 'refs/heads/main' }}
     steps:
-    # Checkout action is required for token to persist
-    - uses: actions/checkout@v2
-      with:
-        fetch-depth: 0
-        token: ${{ secrets.RELEASE_BOT }}
+      # Checkout action is required for token to persist
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.RELEASE_BOT }}
 
-    - name: Python Semantic Release
-      uses: relekang/python-semantic-release@v7.32.0
-      with: 
-        github_token: ${{ secrets.RELEASE_BOT }}
-        # Remember to copy the tool.semantic_release section from pyproject.toml 
-        # as well
-        # To enable pypi, 
-        # 1) Set upload_to_pypi to true in pyproject.toml and
-        # 2) Set the pypi_token in the repo
-        # 3) Uncomment the two lines below
-        repository_username: __token__
-        repository_password: ${{ secrets.PYPI_TOKEN }}
+      - name: Python Semantic Release
+        uses: relekang/python-semantic-release@v7.32.0
+        with:
+          github_token: ${{ secrets.RELEASE_BOT }}
+          # Remember to copy the tool.semantic_release section from pyproject.toml
+          # as well
+          # To enable pypi,
+          # 1) Set upload_to_pypi to true in pyproject.toml and
+          # 2) Set the pypi_token in the repo
+          # 3) Uncomment the two lines below
+          repository_username: __token__
+          repository_password: ${{ secrets.PYPI_TOKEN }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -63,7 +63,7 @@
 * Reimplement ([`c99585f`](https://github.com/Aarhus-Psychiatry-Research/psycop-feature-generation/commit/c99585fdf9f9f407a69e0ead05f935d34ed86a63))
 * Use lru cache decorator for values_df loading ([`4006818`](https://github.com/Aarhus-Psychiatry-Research/psycop-feature-generation/commit/40068187da20854fcca980872bc42b8a3a096cc9))
 * Add support for loader kwargs ([`127f821`](https://github.com/Aarhus-Psychiatry-Research/psycop-feature-generation/commit/127f8215c35b792390595b890210baa0e8cf3591))
-* Move values_df resolution to anyspec object ([`714e83f`](https://github.com/Aarhus-Psychiatry-Research/psycop-feature-generation/commit/714e83fd3722b298cdd256b06915659ca7a34259))
+* Move values_df resolution to _AnySpec object ([`714e83f`](https://github.com/Aarhus-Psychiatry-Research/psycop-feature-generation/commit/714e83fd3722b298cdd256b06915659ca7a34259))
 * Make date of birth output prefix a param ([`0ed1198`](https://github.com/Aarhus-Psychiatry-Research/psycop-feature-generation/commit/0ed11982ba1b239e5650d23dbfab707100e38137))
 * Ensure that dfs are sorted and of same length before concat ([`84a4d65`](https://github.com/Aarhus-Psychiatry-Research/psycop-feature-generation/commit/84a4d65b731a6822d0a8f6313d01b7de9c574afe))
 * Use pandas with set_index for concat ([`b93290a`](https://github.com/Aarhus-Psychiatry-Research/psycop-feature-generation/commit/b93290ae733857855abe8197291dd047cf6c6fa8))

diff --git a/src/application/t2d/main.py b/src/application/t2d/main.py
@@ -4,62 +4,79 @@
 maturity.
 """
 
+import logging
+
 import wandb
 
-import psycop_feature_generation.loaders.raw  # noqa pylint: disable=unused-import
 from application.t2d.modules.specify_features import get_feature_specs
 from psycop_feature_generation.application_modules.describe_flattened_dataset import (
     save_flattened_dataset_description_to_disk,
 )
 from psycop_feature_generation.application_modules.flatten_dataset import (
     create_flattened_dataset,
 )
+from psycop_feature_generation.application_modules.loggers import init_root_logger
 from psycop_feature_generation.application_modules.project_setup import (
     get_project_info,
     init_wandb,
 )
 from psycop_feature_generation.application_modules.save_dataset_to_disk import (
     split_and_save_dataset_to_disk,
 )
+from psycop_feature_generation.application_modules.wandb_utils import (
+    wandb_alert_on_exception,
+)
 from psycop_feature_generation.loaders.raw.load_visits import (
     physical_visits_to_psychiatry,
 )
 
+log = logging.getLogger()
 
+
+@wandb_alert_on_exception
 def main():
     """Main function for loading, generating and evaluating a flattened
     dataset."""
-    project_info = get_project_info(
-        project_name="t2d",
-    )
-
     feature_specs = get_feature_specs(project_info=project_info)
 
-    # Use wandb to keep track of your dataset generations
-    # Makes it easier to find paths on wandb, as well as
-    # allows monitoring and automatic slack alert on failure
-    init_wandb(
-        feature_specs=feature_specs,
-        project_info=project_info,
-    )
-
     flattened_df = create_flattened_dataset(
         feature_specs=feature_specs,
-        prediction_times_df=physical_visits_to_psychiatry(),
+        prediction_times_df=physical_visits_to_psychiatry(timestamps_only=True),
         drop_pred_times_with_insufficient_look_distance=False,
         project_info=project_info,
     )
 
     split_and_save_dataset_to_disk(
         flattened_df=flattened_df,
         project_info=project_info,
-        output_format="parquet",
     )
 
     save_flattened_dataset_description_to_disk(
-        feature_specs=feature_specs,
-        load_file_format="parquet",
         project_info=project_info,
     )
 
     wandb.log_artifact("poetry.lock", name="poetry_lock_file", type="poetry_lock")
+
+
+if __name__ == "__main__":
+    # Run elements that are required before wandb init first,
+    # then run the rest in main so you can wrap it all in
+    # wandb_alert_on_exception, which will send a slack alert
+    # if you have wandb alerts set up in wandb
+    project_info = get_project_info(
+        project_name="t2d",
+    )
+
+    init_root_logger(project_info=project_info)
+
+    log.info(f"Stdout level is {logging.getLevelName(log.level)}")
+    log.debug("Debugging is still captured in the log file")
+
+    # Use wandb to keep track of your dataset generations
+    # Makes it easier to find paths on wandb, as well as
+    # allows monitoring and automatic slack alert on failure
+    init_wandb(
+        project_info=project_info,
+    )
+
+    main()