Skip to content

Commit

Permalink
Merge pull request #73 from lincc-frameworks/fix/issue/66
Browse files Browse the repository at this point in the history
Fix .to_flat() to return df with ArrowDtype'd Series
  • Loading branch information
hombit authored May 9, 2024
2 parents a196bd6 + c081fef commit 88c2aee
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 3 deletions.
4 changes: 3 additions & 1 deletion src/nested_pandas/series/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,15 @@ def to_flat(self, fields: list[str] | None = None) -> pd.DataFrame:
index = None
for field in fields:
list_array = cast(pa.ListArray, struct_array.field(field))
flat_array = list_array.flatten()
if index is None:
index = self.get_flat_index()
flat_series[field] = pd.Series(
list_array.flatten(),
flat_array,
index=pd.Series(index, name=self._series.index.name),
name=field,
copy=False,
dtype=pd.ArrowDtype(flat_array.type),
)

return pd.DataFrame(flat_series)
Expand Down
3 changes: 2 additions & 1 deletion tests/nested_pandas/nestedframe/test_nestedframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ def test_add_nested_with_flat_df():
base = base.add_nested(nested, "nested")

assert "nested" in base.columns
assert base.nested.nest.to_flat().equals(nested)
# to_flat() gives pd.ArrowDtype, so we skip dtype check here
assert_frame_equal(base.nested.nest.to_flat(), nested, check_dtype=False)


def test_add_nested_with_flat_df_and_mismatched_index():
Expand Down
6 changes: 5 additions & 1 deletion tests/nested_pandas/series/test_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,14 @@ def test_to_flat():
index=[0, 0, 0, 1, 1, 1],
name="a",
copy=False,
dtype=pd.ArrowDtype(pa.float64()),
),
"b": pd.Series(
data=[-4.0, -5.0, -6.0, -3.0, -4.0, -5.0],
index=[0, 0, 0, 1, 1, 1],
name="b",
copy=False,
dtype=pd.ArrowDtype(pa.float64()),
),
},
index=pd.Index([0, 0, 0, 1, 1, 1], name="idx"),
Expand Down Expand Up @@ -140,6 +142,7 @@ def test_to_flat_with_fields():
index=[0, 0, 0, 1, 1, 1],
name="a",
copy=False,
dtype=pd.ArrowDtype(pa.float64()),
),
},
)
Expand Down Expand Up @@ -527,7 +530,7 @@ def test_to_flat_dropna():
"""

flat = pd.DataFrame(
data={"c": [0.0, 2, 4, 1, np.NaN, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]},
data={"c": [0, 2, 4, 1, np.NaN, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]},
index=[0, 0, 0, 1, 1, 1, 2, 2, 2],
)
nested = pack_flat(flat, name="nested")
Expand All @@ -542,4 +545,5 @@ def test_to_flat_dropna():
data={"c": [0.0, 2, 4, 1, 3, 1, 4, 1], "d": [5, 4, 7, 5, 1, 9, 3, 4]},
index=[0, 0, 0, 1, 1, 2, 2, 2],
),
check_dtype=False, # filtered's Series are pd.ArrowDtype
)

0 comments on commit 88c2aee

Please sign in to comment.