Skip to content

Commit

Permalink
Change NestedDtype.from_string to raise TypeError
Browse files Browse the repository at this point in the history
Previously it was ValueError which is inconsistent with the behavior
expected by pandas.

Fixes #22
  • Loading branch information
hombit committed Apr 10, 2024
1 parent 136f2c5 commit ae46e58
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 6 deletions.
12 changes: 6 additions & 6 deletions src/nested_pandas/series/dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,17 +70,17 @@ def construct_from_string(cls, string: str) -> Self: # type: ignore[name-define
Raises
------
ValueError
TypeError
If the string is not a valid nested type string or if the element types
are parametric pyarrow types.
"""
if not string.startswith("nested<") or not string.endswith(">"):
raise ValueError("Not a valid nested type string, expected 'nested<...>'")
raise TypeError("Not a valid nested type string, expected 'nested<...>'")
fields_str = string.removeprefix("nested<").removesuffix(">")

field_strings = fields_str.split(", ")
if len(field_strings) == 0:
raise ValueError(
raise TypeError(

Check warning on line 83 in src/nested_pandas/series/dtype.py

View check run for this annotation

Codecov / codecov/patch

src/nested_pandas/series/dtype.py#L83

Added line #L83 was not covered by tests
"Not a valid nested type string, expected at least a single field inside "
"'nested<x: [type], ...>'"
)
Expand All @@ -90,12 +90,12 @@ def construct_from_string(cls, string: str) -> Self: # type: ignore[name-define
try:
field_name, field_type = field_string.split(": ", maxsplit=1)
except ValueError as e:
raise ValueError(
raise TypeError(
"Not a valid nested type string, expected 'nested<x: [type], ...>', got invalid field "
f"string '{field_string}'"
) from e
if not field_type.startswith("[") or not field_type.endswith("]"):
raise ValueError(
raise TypeError(
"Not a valid nested type string, expected 'nested<x: [type], ...>', got invalid field "
f"type string '{field_type}'"
)
Expand All @@ -105,7 +105,7 @@ def construct_from_string(cls, string: str) -> Self: # type: ignore[name-define
try:
pa_value_type = pa.type_for_alias(value_type)
except ValueError as e:
raise ValueError(
raise TypeError(
f"Parsing pyarrow specific parameters in the string is not supported yet: {value_type}. "
"Please use NestedDtype() or NestedDtype.from_fields() instead."
) from e
Expand Down
26 changes: 26 additions & 0 deletions tests/nested_pandas/series/test_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest
from nested_pandas import NestedDtype
from nested_pandas.series.ext_array import NestedExtensionArray
from nested_pandas.series.packer import pack_flat
from numpy.testing import assert_array_equal
from pandas.testing import assert_frame_equal, assert_series_equal

Expand Down Expand Up @@ -503,3 +504,28 @@ def test___len__():
series = pd.Series(struct_array, dtype=NestedDtype(struct_array.type), index=[0, 1])

assert len(series.nest) == 2


def test_to_flat_dropna():
"""Test that to_flat() gives a valid dataframe, based on GH22
https://github.com/lincc-frameworks/nested-pandas/issues/22
"""

flat = pd.DataFrame(
data={"c": [0.0, 2, 4, 1, np.NaN, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]},
index=[0, 0, 0, 1, 1, 1, 2, 2, 2],
)
nested = pack_flat(flat, name="nested")

new_flat = nested.nest.to_flat()
# .dropna() was failing in the issue report
filtered = new_flat.dropna(subset="c")

assert_frame_equal(
filtered,
pd.DataFrame(
data={"c": [0.0, 2, 4, 1, 3, 1, 4, 1], "d": [5, 4, 7, 5, 1, 9, 3, 4]},
index=[0, 0, 0, 1, 1, 2, 2, 2],
),
)
23 changes: 23 additions & 0 deletions tests/nested_pandas/series/test_dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,29 @@ def test_name_vs_construct_from_string(fields):
assert dtype == NestedDtype.construct_from_string(dtype.name)


@pytest.mark.parametrize(
"s",
[
"float", # not a nested type
"nested(f: [int64])", # must be <> instead
"ts<in64>", # 'ts' was a previous name, now we use 'nested'
"nested", # no type specified
"nested<a: [int64]", # missed closing bracket
"nested<>", # no field specified
"nested<int64>", # no field name specified
"nested<[int64]>", # no field name specified
"nested<a:[int64]>", # separator must be ": " with space
"nested<a: int64>", # missed [] - nested list
"nested<a: [complex64]>", # not an arrow type
"nested<a: [list<item: double>]>", # complex arrow types are not supported
],
)
def test_construct_from_string_raises(s):
"""Test that we raise an error when constructing NestedDtype from invalid string."""
with pytest.raises(TypeError):
NestedDtype.construct_from_string(s)


def test_construct_array_type():
"""Test that NestedDtype.construct_array_type() returns NestedExtensionArray."""
assert NestedDtype.construct_array_type() is NestedExtensionArray

0 comments on commit ae46e58

Please sign in to comment.