Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add comprehensive tests #27

Merged
merged 11 commits into from
May 9, 2024
22 changes: 22 additions & 0 deletions .github/workflows/code-quality-master.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Same as `code-quality-pr.yaml` but triggered on commit to main branch
# and runs on all files (instead of only the changed ones)

name: Code Quality Main

on:
push:
branches: [master]

jobs:
code-quality:
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v3

- name: Run pre-commits
uses: pre-commit/action@v3.0.0
36 changes: 36 additions & 0 deletions .github/workflows/code-quality-pr.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# This workflow finds which files were changed, prints them,
# and runs `pre-commit` on those files.

# Inspired by the sktime library:
# https://github.com/alan-turing-institute/sktime/blob/main/.github/workflows/test.yml

name: Code Quality PR

on:
pull_request:
branches: [master, "release/*", "dev"]

jobs:
code-quality:
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v3

- name: Find modified files
id: file_changes
uses: trilom/file-changes-action@v1.2.4
with:
output: " "

- name: List modified files
run: echo '${{ steps.file_changes.outputs.files}}'

- name: Run pre-commits
uses: pre-commit/action@v3.0.0
with:
extra_args: --files ${{ steps.file_changes.outputs.files}}
41 changes: 41 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: Tests

on:
push:
branches: [master]
pull_request:
branches: [master, "release/*", "dev"]

jobs:
run_tests_ubuntu:
runs-on: ubuntu-latest

strategy:
fail-fast: false

timeout-minutes: 30

steps:
- name: Checkout
uses: actions/checkout@v3

- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"

- name: Install packages
run: |
pip install -e .[dev]

#----------------------------------------------
# run test suite
#----------------------------------------------
- name: Run tests
run: |
pytest -v --doctest-modules --ignore=profiling/ --ignore=run.py --cov=src -s

- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v4.0.1
with:
token: ${{ secrets.CODECOV_TOKEN }}
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@ dependencies = [
]

[project.optional-dependencies]
dev = ["pre-commit", "pytest", "pytest-cov", ]
dev = [
"pre-commit", "pytest", "pytest-cov", "pytest-subtests", "rootutils"
]
profiling = ["psutil"]

[project.urls]
Homepage = "https://github.com/justin13601/ESGPTTaskQueryingPublic"
Expand Down
8 changes: 3 additions & 5 deletions src/esgpt_task_querying/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,9 @@ def build_tree_from_config(cfg: dict[str, Any]) -> Node:
... }
... }
>>> build_tree_from_config(cfg) # doctest: +NORMALIZE_WHITESPACE
Node(
/window1,
constraints={},
endpoint_expr=(False, datetime.timedelta(days=1), True, datetime.timedelta(0))
)
Node(/window1,
constraints={},
endpoint_expr=(False, datetime.timedelta(days=1), True, datetime.timedelta(0)))
"""
nodes = {}
windows = [name for name, _ in cfg["windows"].items()]
Expand Down
4 changes: 2 additions & 2 deletions src/esgpt_task_querying/predicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def generate_simple_predicates(predicate_name: str, predicate_info: dict, df: pl
┌────────────┬───────────┬────────────┬──────┐
│ subject_id ┆ timestamp ┆ event_type ┆ is_A │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str ┆ i32
│ i64 ┆ i64 ┆ str ┆ i64
╞════════════╪═══════════╪════════════╪══════╡
│ 1 ┆ 1 ┆ A ┆ 1 │
│ 1 ┆ 1 ┆ B ┆ 0 │
Expand Down Expand Up @@ -142,7 +142,7 @@ def generate_predicate_columns(cfg: dict, data: list | pl.DataFrame) -> pl.DataF
┌────────────┬───────────┬──────┬──────┬───┬──────┬───────────┬──────────────────┬────────┐
│ subject_id ┆ timestamp ┆ is_A ┆ is_B ┆ … ┆ is_D ┆ is_A_or_B ┆ is_A_and_C_and_D ┆ is_any │
│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i32i32 ┆ ┆ i32i32i32i32
│ i64 ┆ i64 ┆ i64i64 ┆ ┆ i64i64i64i64
╞════════════╪═══════════╪══════╪══════╪═══╪══════╪═══════════╪══════════════════╪════════╡
│ 1 ┆ 1 ┆ 1 ┆ 1 ┆ … ┆ 0 ┆ 1 ┆ 0 ┆ 1 │
│ 1 ┆ 3 ┆ 0 ┆ 0 ┆ … ┆ 0 ┆ 0 ┆ 0 ┆ 1 │
Expand Down
3 changes: 2 additions & 1 deletion src/esgpt_task_querying/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def summarize_temporal_window(
Examples:
>>> import polars as pl
>>> _ = pl.Config.set_tbl_width_chars(100)
>>> from datetime import datetime, timedelta
>>> predicates_df = pl.DataFrame(
... {
Expand All @@ -38,7 +39,7 @@ def summarize_temporal_window(
... "is_A": [1, 0, 1],
... "is_B": [0, 1, 0],
... }
... ))
... )
>>> anchor_to_subtree_root_by_subtree_anchor = pl.DataFrame(
... {
... "subject_id": [1, 1, 1],
Expand Down
3 changes: 3 additions & 0 deletions tests/test_summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,12 +313,15 @@ def test_summarize_temporal_window(self):
got = summarize_temporal_window(**c)
self.assertEqual(got, want)

@unittest.skip("Not yet implemented")
def test_summarize_event_bound_window(self):
raise NotImplementedError

@unittest.skip("Not yet implemented")
def test_summarize_window(self):
raise NotImplementedError

@unittest.skip("Not yet implemented")
Comment on lines +316 to +324
Copy link
Contributor

@coderabbitai coderabbitai bot May 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests marked as not yet implemented.

Would you like assistance in implementing these tests or should I open a GitHub issue to track this task?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assistance would be great! Please write tests for these functions.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mmcdermott, I'll help you by drafting the unit tests for the functions test_summarize_event_bound_window, test_summarize_window, and test_summarize_subtree. Here are the proposed tests:

Test for test_summarize_event_bound_window

This function likely summarizes data within a specific event-bound window. We'll assume it needs to handle various edge cases similar to test_summarize_temporal_window.

@unittest.skip("Remove this line once the implementation is ready")
def test_summarize_event_bound_window(self):
    # Assuming the function summarizes data within an event-bound window
    # Example data setup
    predicates_df = pl.DataFrame({
        "subject_id": [1, 2],
        "event_start": ["2020-01-01 00:00", "2020-01-02 00:00"],
        "event_end": ["2020-01-01 23:59", "2020-01-02 23:59"],
    }).with_columns(
        pl.col("event_start").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M").cast(pl.Datetime),
        pl.col("event_end").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M").cast(pl.Datetime)
    )

    # Expected output setup
    expected_output = predicates_df.filter(pl.col("subject_id") == 1)

    # Function call
    result = summarize_event_bound_window(predicates_df, "event_start", "event_end")

    # Assertion
    self.assertEqual(result, expected_output)

Test for test_summarize_window

This function likely handles summarization over a generic window of data.

@unittest.skip("Remove this line once the implementation is ready")
def test_summarize_window(self):
    # Example data setup
    data = pl.DataFrame({
        "subject_id": [1, 1, 2, 2],
        "timestamp": ["2020-01-01 00:00", "2020-01-01 01:00", "2020-01-02 00:00", "2020-01-02 01:00"],
        "value": [10, 20, 30, 40]
    }).with_columns(
        pl.col("timestamp").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M").cast(pl.Datetime)
    )

    # Expected output setup
    expected_output = data.filter(pl.col("subject_id") == 1)

    # Function call
    result = summarize_window(data, "timestamp", timedelta(hours=1))

    # Assertion
    self.assertEqual(result, expected_output)

Test for test_summarize_subtree

This function likely summarizes data across a subtree structure.

@unittest.skip("Remove this line once the implementation is ready")
def test_summarize_subtree(self):
    # Example data setup
    data = pl.DataFrame({
        "node_id": [1, 2, 1, 2],
        "parent_id": [None, 1, None, 1],
        "value": [100, 200, 300, 400]
    })

    # Expected output setup
    expected_output = data.filter(pl.col("node_id") == 1)

    # Function call
    result = summarize_subtree(data, "parent_id")

    # Assertion
    self.assertEqual(result, expected_output)

Please review these test outlines and adjust them according to the specific logic and requirements of each function. If you need further modifications or have specific scenarios in mind, feel free to let me know!

def test_summarize_subtree(self):
raise NotImplementedError

Expand Down
Loading
Loading