Skip to content

Commit

Permalink
Merge pull request #148 from lincc-frameworks/handle_dot_cols
Browse files Browse the repository at this point in the history
better support '.' character in names outside of nested context
  • Loading branch information
dougbrn authored Sep 30, 2024
2 parents aa34af1 + e8f805c commit 252765a
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 12 deletions.
31 changes: 19 additions & 12 deletions src/nested_pandas/nestedframe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def all_columns(self) -> dict:
"""returns a dictionary of columns for each base/nested dataframe"""
all_columns = {"base": self.columns}
for column in self.columns:
if isinstance(self[column].dtype, NestedDtype):
if isinstance(self.dtypes[column], NestedDtype):
nest_cols = self[column].nest.fields
all_columns[column] = nest_cols
return all_columns
Expand All @@ -48,16 +48,18 @@ def nested_columns(self) -> list:
"""retrieves the base column names for all nested dataframes"""
nest_cols = []
for column in self.columns:
if isinstance(self[column].dtype, NestedDtype):
if isinstance(self.dtypes[column], NestedDtype):
nest_cols.append(column)
return nest_cols

def _is_known_hierarchical_column(self, colname) -> bool:
"""Determine whether a string is a known hierarchical column name"""
if "." in colname:
left, right = colname.split(".")
if left in self.nested_columns:
return right in self.all_columns[left]
base_name = colname.split(".")[0]
if base_name in self.nested_columns:
# TODO: only handles one level of nesting for now
nested_name = ".".join(colname.split(".")[1:])
return nested_name in self.all_columns[base_name]
return False
return False

Expand All @@ -68,13 +70,18 @@ def _is_known_column(self, colname) -> bool:
def __getitem__(self, item):
"""Adds custom __getitem__ behavior for nested columns"""

# If a nested column name is passed, return a flat series for that column
# flat series is chosen over list series for utility
# e.g. native ability to do something like ndf["nested.a"] + 3
if isinstance(item, str) and self._is_known_hierarchical_column(item):
nested, col = item.split(".")
return self[nested].nest.get_flat_series(col)
# Otherwise, do __getitem__ as normal
if isinstance(item, str):
# Pre-empt the nested check if the item is a base column
if item in self.columns:
return super().__getitem__(item)
# If a nested column name is passed, return a flat series for that column
# flat series is chosen over list series for utility
# e.g. native ability to do something like ndf["nested.a"] + 3
elif self._is_known_hierarchical_column(item):
# TODO: only handles one level of nesting for now
nested = item.split(".")[0]
col = ".".join(item.split(".")[1:])
return self[nested].nest.get_flat_series(col)
else:
return super().__getitem__(item)

Expand Down
11 changes: 11 additions & 0 deletions tests/nested_pandas/nestedframe/test_nestedframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,17 @@ def test_set_new_nested_col():
)


def test_get_dot_names():
"""Test the ability to still work with column names with '.' characters outside of nesting"""
nf = NestedFrame.from_flat(
NestedFrame({"a": [1, 2, 3, 4], ".b.": [1, 1, 3, 3], "R.A.": [3, None, 6, 5]}, index=[1, 1, 2, 2]),
base_columns=[".b."],
)

assert len(nf[".b."]) == 2
assert len(nf["nested.R.A."]) == 4


def test_add_nested_with_flat_df():
"""Test that add_nested correctly adds a nested column to the base df"""

Expand Down

0 comments on commit 252765a

Please sign in to comment.