Skip to content

Commit

Permalink
initialize nestedframe
Browse files Browse the repository at this point in the history
  • Loading branch information
dougbrn committed Apr 3, 2024
1 parent 05cc195 commit 5391e15
Show file tree
Hide file tree
Showing 4 changed files with 139 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/nested_pandas/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .example_module import greetings, meaning
from .nestedframe import NestedFrame # noqa

# Import for registering
from .series.accessor import NestSeriesAccessor # noqa: F401
Expand Down
1 change: 1 addition & 0 deletions src/nested_pandas/nestedframe/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .core import * # noqa
61 changes: 61 additions & 0 deletions src/nested_pandas/nestedframe/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# typing.Self and "|" union syntax don't exist in Python 3.9
from __future__ import annotations

import pandas as pd

from nested_pandas.series import packer


class NestedFrame(pd.DataFrame):
"""A Pandas Dataframe extension with support for nested structure.
See https://pandas.pydata.org/docs/development/extending.html#subclassing-pandas-data-structures
"""

# normal properties
_metadata = ["added_property"]

@property
def _constructor(self) -> Self: # type: ignore[name-defined] # noqa: F821
return NestedFrame

@property
def _constructor_expanddim(self) -> Self: # type: ignore[name-defined] # noqa: F821
return NestedFrame

@property
def all_columns(self) -> dict:
"""returns a dictionary of columns for each base/nested dataframe"""
all_columns = {"base": self.columns}
for column in self.columns:
if hasattr(self[column], "nest"):
nest_cols = self[column].iloc[0].columns # TODO: Improve access to columns
all_columns[column] = nest_cols
return all_columns

@property
def nested_columns(self) -> list:
"""retrieves the base column names for all nested dataframes"""
nest_cols = []
for column in self.columns:
if hasattr(self[column], "nest"):
nest_cols.append(column)
return nest_cols

def _is_known_hierarchical_column(self, colname) -> bool:
"""Determine whether a string is a known hierarchical column name"""
if "." in colname:
left, right = colname.split(".")
if left in self.nested_columns:
return right in self.all_columns[left]
else:
return False
else:
return False

def add_nested(self, nested, name) -> Self: # type: ignore[name-defined] # noqa: F821
"""Packs a dataframe into a nested column"""
# Add sources to objects
packed = packer.pack_flat(nested, name=name)
label = packed.name
return self.assign(**{f"{label}": packed})
76 changes: 76 additions & 0 deletions tests/nested_pandas/nestedframe/test_nestedframe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import pandas as pd
from nested_pandas import NestedFrame


def test_nestedframe_construction():
"""Test NestedFrame construction"""
base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2])

assert isinstance(base, NestedFrame)


def test_all_columns():
"""Test the all_columns function"""

base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2])

assert list(base.all_columns.keys()) == ["base"]
assert list(base.all_columns["base"]) == list(base.columns)

nested = pd.DataFrame(
data={"c": [0, 2, 4, 1, 4, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]},
index=[0, 0, 0, 1, 1, 1, 2, 2, 2],
)

base = base.add_nested(nested, "nested")

assert list(base.all_columns.keys()) == ["base", "nested"]
assert list(base.all_columns["nested"]) == list(nested.columns)


def test_nested_columns():
"""Test that nested_columns correctly retrieves the nested base columns"""

base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2])

nested = pd.DataFrame(
data={"c": [0, 2, 4, 1, 4, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]},
index=[0, 0, 0, 1, 1, 1, 2, 2, 2],
)

base = base.add_nested(nested, "nested")

assert base.nested_columns == ["nested"]


def test_is_known_hierarchical_column():
"""Test that hierarchical column labels can be identified"""

base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2])

nested = pd.DataFrame(
data={"c": [0, 2, 4, 1, 4, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]},
index=[0, 0, 0, 1, 1, 1, 2, 2, 2],
)

base = base.add_nested(nested, "nested")

assert base._is_known_hierarchical_column("nested.c")
assert not base._is_known_hierarchical_column("nested.b")
assert not base._is_known_hierarchical_column("base.a")


def test_add_nested():
"""Test that add_nested correctly adds a nested column to the base df"""

base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2])

nested = pd.DataFrame(
data={"c": [0, 2, 4, 1, 4, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]},
index=[0, 0, 0, 1, 1, 1, 2, 2, 2],
)

base = base.add_nested(nested, "nested")

assert "nested" in base.columns
assert base.nested.nest.to_flat().equals(nested)

0 comments on commit 5391e15

Please sign in to comment.