Skip to content

Commit

Permalink
fix: Handle none values
Browse files Browse the repository at this point in the history
  • Loading branch information
lewisjared committed Dec 17, 2024
1 parent 5fa348c commit dcb26f5
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 8 deletions.
15 changes: 8 additions & 7 deletions packages/ref/src/ref/datasets/cmip6.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,18 @@ def _fix_parent_variant_label(group: pd.DataFrame) -> pd.DataFrame:

data_catalog = data_catalog.groupby("instance_id").apply(_fix_parent_variant_label).reset_index(drop=True)

# EC-Earth3 uses "D" as a suffix for the branch_time_in_child and branch_time_in_parent columns
data_catalog["branch_time_in_child"] = pd.to_numeric(
data_catalog["branch_time_in_child"].astype(str).str.replace("D", ""), errors="raise"
)
data_catalog["branch_time_in_parent"] = pd.to_numeric(
data_catalog["branch_time_in_parent"].astype(str).str.replace("D", ""), errors="raise"
)
data_catalog["branch_time_in_child"] = _clean_branch_time(data_catalog["branch_time_in_child"])
data_catalog["branch_time_in_parent"] = _clean_branch_time(data_catalog["branch_time_in_parent"])

return data_catalog


def _clean_branch_time(branch_time: pd.Series[str]) -> pd.Series[float]:
# EC-Earth3 uses "D" as a suffix for the branch_time_in_child and branch_time_in_parent columns
# Handle missing values (these result in nan values)
return pd.to_numeric(branch_time.astype(str).str.replace("D", "").replace("None", ""), errors="raise")


class CMIP6DatasetAdapter(DatasetAdapter):
"""
Adapter for CMIP6 datasets
Expand Down
28 changes: 27 additions & 1 deletion packages/ref/tests/unit/datasets/test_cmip6.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import datetime

import numpy as np
import pandas as pd
import pytest

from ref.datasets.cmip6 import CMIP6DatasetAdapter, _parse_datetime
from ref.datasets.cmip6 import CMIP6DatasetAdapter, _apply_fixes, _parse_datetime


@pytest.fixture
Expand Down Expand Up @@ -76,3 +77,28 @@ def test_load_local_datasets(self, esgf_data_dir, catalog_regression):
catalog_regression(
data_catalog.sort_values(["instance_id", "start_time"]), basename="cmip6_catalog_local"
)


def test_apply_fixes():
df = pd.DataFrame(
{
"instance_id": ["dataset_001", "dataset_001", "dataset_002"],
"parent_variant_label": ["r1i1p1f1", "r1i1p1f2", "r1i1p1f2"],
"variant_label": ["r1i1p1f1", "r1i1p1f1", "r1i1p1f2"],
"branch_time_in_child": ["0D", "12", "12.0"],
"branch_time_in_parent": [None, "12", "12.0"],
}
)

res = _apply_fixes(df)

exp = pd.DataFrame(
{
"instance_id": ["dataset_001", "dataset_001", "dataset_002"],
"parent_variant_label": ["r1i1p1f1", "r1i1p1f1", "r1i1p1f2"],
"variant_label": ["r1i1p1f1", "r1i1p1f1", "r1i1p1f2"],
"branch_time_in_child": [0.0, 12.0, 12.0],
"branch_time_in_parent": [np.nan, 12.0, 12.0],
}
)
pd.testing.assert_frame_equal(res, exp)

0 comments on commit dcb26f5

Please sign in to comment.