From a4ad03c0f42adc25b6f30d1ff6a812e6d6c1fef6 Mon Sep 17 00:00:00 2001 From: Nassim Oufattole Date: Mon, 9 Sep 2024 06:05:25 +0000 Subject: [PATCH] resolved review feedback. Added a based_model docstring. Added version for polars. Added github workflow test matrix over python versions, removed redundant run_command definition from test_configs --- .github/workflows/tests.yaml | 6 +++-- pyproject.toml | 2 +- src/MEDS_tabular_automl/base_model.py | 2 ++ tests/__init__.py | 0 tests/test_configs.py | 32 ++------------------------- tests/test_integration.py | 20 ++++++++--------- tests/test_tabularize.py | 3 --- 7 files changed, 19 insertions(+), 46 deletions(-) create mode 100644 tests/__init__.py diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index c96be0e..0ac7cd5 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -12,6 +12,8 @@ jobs: strategy: fail-fast: false + matrix: + python-version: ["3.11", "3.12"] timeout-minutes: 30 @@ -19,10 +21,10 @@ jobs: - name: Checkout uses: actions/checkout@v3 - - name: Set up Python 3.12 + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v3 with: - python-version: "3.12" + python-version: ${{ matrix.python-version }} - name: Install packages run: | diff --git a/pyproject.toml b/pyproject.toml index 1b75489..a30e14b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ - "polars", "pyarrow", "loguru", "hydra-core==1.3.2", "numpy", "scipy<1.14.0", "pandas", "tqdm", "xgboost", + "polars==1.6.0", "pyarrow", "loguru", "hydra-core==1.3.2", "numpy", "scipy<1.14.0", "pandas", "tqdm", "xgboost", "scikit-learn", "hydra-optuna-sweeper", "hydra-joblib-launcher", "ml-mixins", "meds==0.3.3", "meds-transforms==0.0.7", ] diff --git a/src/MEDS_tabular_automl/base_model.py b/src/MEDS_tabular_automl/base_model.py index 35a9ccf..4663542 100644 --- a/src/MEDS_tabular_automl/base_model.py +++ b/src/MEDS_tabular_automl/base_model.py @@ -9,6 +9,8 @@ class BaseModel(ABC, TimeableMixin): + """Defines the interface for a model that can be trained and evaluated via the launch_model script.""" + @abstractmethod def __init__(self): pass diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_configs.py b/tests/test_configs.py index d0bbc45..5183214 100644 --- a/tests/test_configs.py +++ b/tests/test_configs.py @@ -2,44 +2,16 @@ root = rootutils.setup_root(__file__, dotenv=True, pythonpath=True, cwd=True) -import subprocess import hydra import polars as pl import pytest from hydra import compose, initialize -from omegaconf import DictConfig, OmegaConf +from omegaconf import DictConfig from MEDS_tabular_automl.sklearn_model import SklearnModel from MEDS_tabular_automl.xgboost_model import XGBoostModel - - -def run_command(script: str, args: list[str], hydra_kwargs: dict[str, str], test_name: str): - command_parts = [script] + args + [f"{k}={v}" for k, v in hydra_kwargs.items()] - command_out = subprocess.run(" ".join(command_parts), shell=True, capture_output=True) - stderr = command_out.stderr.decode() - stdout = command_out.stdout.decode() - if command_out.returncode != 0: - raise AssertionError(f"{test_name} failed!\nstdout:\n{stdout}\nstderr:\n{stderr}") - return stderr, stdout - - -def make_config_mutable(cfg): - if OmegaConf.is_config(cfg): - OmegaConf.set_readonly(cfg, False) - for key in cfg.keys(): - print(key) - # try: - cfg[key] = make_config_mutable(cfg[key]) - # except: - # import pdb; pdb.set_trace() - return cfg - # elif isinstance(cfg, list): - # return [make_config_mutable(item) for item in cfg] - # elif isinstance(cfg, dict): - # return {key: make_config_mutable(value) for key, value in cfg.items()} - else: - return cfg +from tests.test_integration import run_command @pytest.mark.parametrize( diff --git a/tests/test_integration.py b/tests/test_integration.py index d231be1..d623914 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -9,16 +9,6 @@ import polars as pl from hydra import compose, initialize -from test_tabularize import ( - CODE_COLS, - EXPECTED_STATIC_FILES, - MEDS_OUTPUTS, - NUM_SHARDS, - SPLITS_JSON, - STATIC_FIRST_COLS, - STATIC_PRESENT_COLS, - VALUE_COLS, -) from MEDS_tabular_automl.describe_codes import get_feature_columns from MEDS_tabular_automl.file_name import list_subdir_files @@ -30,6 +20,16 @@ get_unique_time_events_df, load_matrix, ) +from tests.test_tabularize import ( + CODE_COLS, + EXPECTED_STATIC_FILES, + MEDS_OUTPUTS, + NUM_SHARDS, + SPLITS_JSON, + STATIC_FIRST_COLS, + STATIC_PRESENT_COLS, + VALUE_COLS, +) def run_command(script: str, args: list[str], hydra_kwargs: dict[str, str], test_name: str): diff --git a/tests/test_tabularize.py b/tests/test_tabularize.py index 006252d..7cf975a 100644 --- a/tests/test_tabularize.py +++ b/tests/test_tabularize.py @@ -10,7 +10,6 @@ import polars as pl from hydra import compose, initialize -from loguru import logger from MEDS_tabular_automl.describe_codes import get_feature_columns from MEDS_tabular_automl.file_name import list_subdir_files @@ -30,8 +29,6 @@ load_matrix, ) -logger.disable("MEDS_tabular_automl") - SPLITS_JSON = """{"train/0": [239684, 1195293], "train/1": [68729, 814703], "tuning/0": [754281], "held_out/0": [1500733]}""" # noqa: E501 NUM_SHARDS = 4