From a4ad03c0f42adc25b6f30d1ff6a812e6d6c1fef6 Mon Sep 17 00:00:00 2001
From: Nassim Oufattole <noufattole@gmail.com>
Date: Mon, 9 Sep 2024 06:05:25 +0000
Subject: [PATCH] resolved review feedback. Added a based_model docstring.
 Added version for polars. Added github workflow test matrix over python
 versions, removed redundant run_command definition from test_configs

---
 .github/workflows/tests.yaml          |  6 +++--
 pyproject.toml                        |  2 +-
 src/MEDS_tabular_automl/base_model.py |  2 ++
 tests/__init__.py                     |  0
 tests/test_configs.py                 | 32 ++-------------------------
 tests/test_integration.py             | 20 ++++++++---------
 tests/test_tabularize.py              |  3 ---
 7 files changed, 19 insertions(+), 46 deletions(-)
 create mode 100644 tests/__init__.py

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index c96be0e..0ac7cd5 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -12,6 +12,8 @@ jobs:
 
     strategy:
       fail-fast: false
+      matrix:
+        python-version: ["3.11", "3.12"]
 
     timeout-minutes: 30
 
@@ -19,10 +21,10 @@ jobs:
       - name: Checkout
         uses: actions/checkout@v3
 
-      - name: Set up Python 3.12
+      - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v3
         with:
-          python-version: "3.12"
+          python-version: ${{ matrix.python-version }}
 
       - name: Install packages
         run: |
diff --git a/pyproject.toml b/pyproject.toml
index 1b75489..a30e14b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,7 +15,7 @@ classifiers = [
     "Operating System :: OS Independent",
 ]
 dependencies = [
-  "polars", "pyarrow", "loguru", "hydra-core==1.3.2", "numpy", "scipy<1.14.0", "pandas", "tqdm", "xgboost",
+  "polars==1.6.0", "pyarrow", "loguru", "hydra-core==1.3.2", "numpy", "scipy<1.14.0", "pandas", "tqdm", "xgboost",
   "scikit-learn", "hydra-optuna-sweeper", "hydra-joblib-launcher", "ml-mixins", "meds==0.3.3", "meds-transforms==0.0.7",
 ]
 
diff --git a/src/MEDS_tabular_automl/base_model.py b/src/MEDS_tabular_automl/base_model.py
index 35a9ccf..4663542 100644
--- a/src/MEDS_tabular_automl/base_model.py
+++ b/src/MEDS_tabular_automl/base_model.py
@@ -9,6 +9,8 @@
 
 
 class BaseModel(ABC, TimeableMixin):
+    """Defines the interface for a model that can be trained and evaluated via the launch_model script."""
+
     @abstractmethod
     def __init__(self):
         pass
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_configs.py b/tests/test_configs.py
index d0bbc45..5183214 100644
--- a/tests/test_configs.py
+++ b/tests/test_configs.py
@@ -2,44 +2,16 @@
 
 root = rootutils.setup_root(__file__, dotenv=True, pythonpath=True, cwd=True)
 
-import subprocess
 
 import hydra
 import polars as pl
 import pytest
 from hydra import compose, initialize
-from omegaconf import DictConfig, OmegaConf
+from omegaconf import DictConfig
 
 from MEDS_tabular_automl.sklearn_model import SklearnModel
 from MEDS_tabular_automl.xgboost_model import XGBoostModel
-
-
-def run_command(script: str, args: list[str], hydra_kwargs: dict[str, str], test_name: str):
-    command_parts = [script] + args + [f"{k}={v}" for k, v in hydra_kwargs.items()]
-    command_out = subprocess.run(" ".join(command_parts), shell=True, capture_output=True)
-    stderr = command_out.stderr.decode()
-    stdout = command_out.stdout.decode()
-    if command_out.returncode != 0:
-        raise AssertionError(f"{test_name} failed!\nstdout:\n{stdout}\nstderr:\n{stderr}")
-    return stderr, stdout
-
-
-def make_config_mutable(cfg):
-    if OmegaConf.is_config(cfg):
-        OmegaConf.set_readonly(cfg, False)
-        for key in cfg.keys():
-            print(key)
-            # try:
-            cfg[key] = make_config_mutable(cfg[key])
-            # except:
-            #     import pdb; pdb.set_trace()
-        return cfg
-    # elif isinstance(cfg, list):
-    #     return [make_config_mutable(item) for item in cfg]
-    # elif isinstance(cfg, dict):
-    #     return {key: make_config_mutable(value) for key, value in cfg.items()}
-    else:
-        return cfg
+from tests.test_integration import run_command
 
 
 @pytest.mark.parametrize(
diff --git a/tests/test_integration.py b/tests/test_integration.py
index d231be1..d623914 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -9,16 +9,6 @@
 
 import polars as pl
 from hydra import compose, initialize
-from test_tabularize import (
-    CODE_COLS,
-    EXPECTED_STATIC_FILES,
-    MEDS_OUTPUTS,
-    NUM_SHARDS,
-    SPLITS_JSON,
-    STATIC_FIRST_COLS,
-    STATIC_PRESENT_COLS,
-    VALUE_COLS,
-)
 
 from MEDS_tabular_automl.describe_codes import get_feature_columns
 from MEDS_tabular_automl.file_name import list_subdir_files
@@ -30,6 +20,16 @@
     get_unique_time_events_df,
     load_matrix,
 )
+from tests.test_tabularize import (
+    CODE_COLS,
+    EXPECTED_STATIC_FILES,
+    MEDS_OUTPUTS,
+    NUM_SHARDS,
+    SPLITS_JSON,
+    STATIC_FIRST_COLS,
+    STATIC_PRESENT_COLS,
+    VALUE_COLS,
+)
 
 
 def run_command(script: str, args: list[str], hydra_kwargs: dict[str, str], test_name: str):
diff --git a/tests/test_tabularize.py b/tests/test_tabularize.py
index 006252d..7cf975a 100644
--- a/tests/test_tabularize.py
+++ b/tests/test_tabularize.py
@@ -10,7 +10,6 @@
 
 import polars as pl
 from hydra import compose, initialize
-from loguru import logger
 
 from MEDS_tabular_automl.describe_codes import get_feature_columns
 from MEDS_tabular_automl.file_name import list_subdir_files
@@ -30,8 +29,6 @@
     load_matrix,
 )
 
-logger.disable("MEDS_tabular_automl")
-
 SPLITS_JSON = """{"train/0": [239684, 1195293], "train/1": [68729, 814703], "tuning/0": [754281], "held_out/0": [1500733]}"""  # noqa: E501
 NUM_SHARDS = 4