Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change NestedDtype.construct_from_string to raise TypeError #23

Merged
merged 2 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 5 additions & 10 deletions src/nested_pandas/series/dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,32 +70,27 @@ def construct_from_string(cls, string: str) -> Self: # type: ignore[name-define

Raises
------
ValueError
TypeError
If the string is not a valid nested type string or if the element types
are parametric pyarrow types.
"""
if not string.startswith("nested<") or not string.endswith(">"):
raise ValueError("Not a valid nested type string, expected 'nested<...>'")
raise TypeError("Not a valid nested type string, expected 'nested<...>'")
fields_str = string.removeprefix("nested<").removesuffix(">")

field_strings = fields_str.split(", ")
if len(field_strings) == 0:
raise ValueError(
"Not a valid nested type string, expected at least a single field inside "
"'nested<x: [type], ...>'"
)

fields = {}
for field_string in field_strings:
try:
field_name, field_type = field_string.split(": ", maxsplit=1)
except ValueError as e:
raise ValueError(
raise TypeError(
"Not a valid nested type string, expected 'nested<x: [type], ...>', got invalid field "
f"string '{field_string}'"
) from e
if not field_type.startswith("[") or not field_type.endswith("]"):
raise ValueError(
raise TypeError(
"Not a valid nested type string, expected 'nested<x: [type], ...>', got invalid field "
f"type string '{field_type}'"
)
Expand All @@ -105,7 +100,7 @@ def construct_from_string(cls, string: str) -> Self: # type: ignore[name-define
try:
pa_value_type = pa.type_for_alias(value_type)
except ValueError as e:
raise ValueError(
raise TypeError(
f"Parsing pyarrow specific parameters in the string is not supported yet: {value_type}. "
"Please use NestedDtype() or NestedDtype.from_fields() instead."
) from e
Expand Down
26 changes: 26 additions & 0 deletions tests/nested_pandas/series/test_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest
from nested_pandas import NestedDtype
from nested_pandas.series.ext_array import NestedExtensionArray
from nested_pandas.series.packer import pack_flat
from numpy.testing import assert_array_equal
from pandas.testing import assert_frame_equal, assert_series_equal

Expand Down Expand Up @@ -503,3 +504,28 @@ def test___len__():
series = pd.Series(struct_array, dtype=NestedDtype(struct_array.type), index=[0, 1])

assert len(series.nest) == 2


def test_to_flat_dropna():
"""Test that to_flat() gives a valid dataframe, based on GH22

https://github.com/lincc-frameworks/nested-pandas/issues/22
"""

flat = pd.DataFrame(
data={"c": [0.0, 2, 4, 1, np.NaN, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]},
index=[0, 0, 0, 1, 1, 1, 2, 2, 2],
)
nested = pack_flat(flat, name="nested")

new_flat = nested.nest.to_flat()
# .dropna() was failing in the issue report
filtered = new_flat.dropna(subset="c")

assert_frame_equal(
filtered,
pd.DataFrame(
data={"c": [0.0, 2, 4, 1, 3, 1, 4, 1], "d": [5, 4, 7, 5, 1, 9, 3, 4]},
index=[0, 0, 0, 1, 1, 2, 2, 2],
),
)
24 changes: 24 additions & 0 deletions tests/nested_pandas/series/test_dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,30 @@ def test_name_vs_construct_from_string(fields):
assert dtype == NestedDtype.construct_from_string(dtype.name)


@pytest.mark.parametrize(
"s",
[
"float", # not a nested type
"nested(f: [int64])", # must be <> instead
"ts<in64>", # 'ts' was a previous name, now we use 'nested'
"nested", # no type specified
"nested<a: [int64]", # missed closing bracket
"nested<>", # no field specified
"nested<int64>", # no field name specified
"nested<[int64]>", # no field name specified
"nested<a:[int64]>", # separator must be ": " with space
"nested<a: [int64],b: [float32]>", # separator must be ", " with space
"nested<a: int64>", # missed [] - nested list
"nested<a: [complex64]>", # not an arrow type
"nested<a: [list<item: double>]>", # complex arrow types are not supported
],
)
def test_construct_from_string_raises(s):
"""Test that we raise an error when constructing NestedDtype from invalid string."""
with pytest.raises(TypeError):
NestedDtype.construct_from_string(s)


def test_construct_array_type():
"""Test that NestedDtype.construct_array_type() returns NestedExtensionArray."""
assert NestedDtype.construct_array_type() is NestedExtensionArray