From cb16c96cb0732327d8c36783ab2142661cd32cd4 Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Wed, 10 Apr 2024 08:39:23 -0700 Subject: [PATCH 1/9] WIP: dropna --- src/nested_pandas/nestedframe/core.py | 134 ++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/src/nested_pandas/nestedframe/core.py b/src/nested_pandas/nestedframe/core.py index 4970930..638552f 100644 --- a/src/nested_pandas/nestedframe/core.py +++ b/src/nested_pandas/nestedframe/core.py @@ -1,7 +1,10 @@ # typing.Self and "|" union syntax don't exist in Python 3.9 from __future__ import annotations +import numpy as np import pandas as pd +from pandas._libs import lib +from pandas._typing import AnyAll, Axis, IndexLabel from nested_pandas.series import packer from nested_pandas.series.dtype import NestedDtype @@ -154,3 +157,134 @@ def query(self, expr) -> Self: # type: ignore[name-defined] # noqa: F821 # TODO: does not work with queries that empty the dataframe result[expr] = result[expr].nest.query_flat(exprs_to_use[expr]) return result + + def dropna( + self, + *, + axis: Axis = 0, + how: AnyAll | lib.NoDefault = lib.no_default, + thresh: int | lib.NoDefault = lib.no_default, + on_nested: bool = False, + subset: IndexLabel | None = None, + inplace: bool = False, + ignore_index: bool = False, + ) -> NestedFrame | None: + """ + Remove missing values. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + Determine if rows or columns which contain missing values are + removed. + + * 0, or 'index' : Drop rows which contain missing values. + * 1, or 'columns' : Drop columns which contain missing value. + + Only a single axis is allowed. + + how : {'any', 'all'}, default 'any' + Determine if row or column is removed from DataFrame, when we have + at least one NA or all NA. + + * 'any' : If any NA values are present, drop that row or column. + * 'all' : If all values are NA, drop that row or column. + thresh : int, optional + Require that many non-NA values. Cannot be combined with how. + on_nested : str or bool, optional + If not False, applies the call to the nested dataframe in the + column with label equal to the provided string. If specified, + the nested dataframe should align with any columns given in + `subset`. + subset : column label or sequence of labels, optional + Labels along other axis to consider, e.g. if you are dropping rows + these would be a list of columns to include. + + Access nested columns using `nested_df.nested_col` (where + `nested_df` refers to a particular nested dataframe and + `nested_col` is a column of that nested dataframe). + inplace : bool, default False + Whether to modify the DataFrame rather than creating a new one. + ignore_index : bool, default ``False`` + If ``True``, the resulting axis will be labeled 0, 1, …, n - 1. + + .. versionadded:: 2.0.0 + + Returns + ------- + DataFrame or None + DataFrame with NA entries dropped from it or None if ``inplace=True``. + + Notes + ----- + Operations that target a particular nested structure return a dataframe + with rows of that particular nested structure affected. + """ + + # determine target dataframe + + # first check the subset kwarg input + subset_target = [] + if subset: + if type(subset) is str: + subset = [subset] + for col in subset: + col = col.split(".")[0] if "." in col else col + if col in self.nested_columns: + subset_target.append(col) + elif col in self.columns: + subset_target.append("base") + + # Check for 1 target + subset_target = np.unique(subset_target) + if len(subset_target) > 1: # prohibit multi-target operations + raise ValueError( + f"Targeted multiple nested structures ({target}), write one command per target dataframe" + ) + elif len(subset_target) == 0: + raise ValueError( + "Provided base columns or nested layer did not match any found in the nestedframe" + ) + subset_target = subset_target[0] + + # Next check the on_nested kwarg input + # import pdb;pdb.set_trace() + if on_nested: + if on_nested not in self.nested_columns: + raise ValueError("Provided nested layer not found in nested dataframes") + + # Resolve target layer + target = "base" + if on_nested and subset_target: + if on_nested != subset_target: + raise ValueError( + f"Provided on_nested={on_nested}, but subset columns are from {subset_target}. Make sure these are aligned or just use subset." + ) + else: + target = subset_target + elif on_nested: + target = on_nested + elif subset_target: + target = subset_target + + if target == "base": + return super().dropna( + axis=axis, how=how, thresh=thresh, subset=subset, inplace=inplace, ignore_index=ignore_index + ) + else: + if subset is not None: + subset = [col.split(".")[-1] for col in subset] + # import pdb;pdb.set_trace() + self[target] = packer.pack_flat( + self[target] + .nest.to_flat() + .dropna( + axis=axis, + how=how, + thresh=thresh, + subset=subset, + inplace=inplace, + ignore_index=ignore_index, + ) + ) + return self From 8d902f1696116439cad61edc44945a94dadc2698 Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Wed, 10 Apr 2024 13:29:39 -0700 Subject: [PATCH 2/9] add initial tests; formatting --- src/nested_pandas/nestedframe/core.py | 17 +++++------ .../nestedframe/test_nestedframe.py | 29 +++++++++++++++++++ 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/src/nested_pandas/nestedframe/core.py b/src/nested_pandas/nestedframe/core.py index 638552f..4667be0 100644 --- a/src/nested_pandas/nestedframe/core.py +++ b/src/nested_pandas/nestedframe/core.py @@ -226,7 +226,7 @@ def dropna( # first check the subset kwarg input subset_target = [] if subset: - if type(subset) is str: + if isinstance(subset, str): subset = [subset] for col in subset: col = col.split(".")[0] if "." in col else col @@ -239,7 +239,7 @@ def dropna( subset_target = np.unique(subset_target) if len(subset_target) > 1: # prohibit multi-target operations raise ValueError( - f"Targeted multiple nested structures ({target}), write one command per target dataframe" + f"Targeted multiple nested structures ({subset_target}), write one command per target dataframe" # noqa ) elif len(subset_target) == 0: raise ValueError( @@ -248,24 +248,22 @@ def dropna( subset_target = subset_target[0] # Next check the on_nested kwarg input - # import pdb;pdb.set_trace() - if on_nested: - if on_nested not in self.nested_columns: - raise ValueError("Provided nested layer not found in nested dataframes") + if on_nested not in self.nested_columns: + raise ValueError("Provided nested layer not found in nested dataframes") # Resolve target layer target = "base" if on_nested and subset_target: if on_nested != subset_target: raise ValueError( - f"Provided on_nested={on_nested}, but subset columns are from {subset_target}. Make sure these are aligned or just use subset." + f"Provided on_nested={on_nested}, but subset columns are from {subset_target}. Make sure these are aligned or just use subset." # noqa ) else: target = subset_target elif on_nested: - target = on_nested + target = str(on_nested) elif subset_target: - target = subset_target + target = str(subset_target) if target == "base": return super().dropna( @@ -274,7 +272,6 @@ def dropna( else: if subset is not None: subset = [col.split(".")[-1] for col in subset] - # import pdb;pdb.set_trace() self[target] = packer.pack_flat( self[target] .nest.to_flat() diff --git a/tests/nested_pandas/nestedframe/test_nestedframe.py b/tests/nested_pandas/nestedframe/test_nestedframe.py index 15f9d9f..8e3f1d7 100644 --- a/tests/nested_pandas/nestedframe/test_nestedframe.py +++ b/tests/nested_pandas/nestedframe/test_nestedframe.py @@ -1,3 +1,4 @@ +import numpy as np import pandas as pd import pytest from nested_pandas import NestedFrame @@ -101,3 +102,31 @@ def test_query(): nest_queried = base.query("(nested.c > 1) and (nested.d>2)") assert len(nest_queried.nested.nest.to_flat()) == 4 + + +def test_dropna(): + """Test that dropna works on all layers""" + + base = NestedFrame(data={"a": [1, 2, 3], "b": [2, np.NaN, 6]}, index=[0, 1, 2]) + + nested = pd.DataFrame( + data={"c": [0, 2, 4, 1, np.NaN, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]}, + index=[0, 0, 0, 1, 1, 1, 2, 2, 2], + ) + + base = base.add_nested(nested, "nested") + + # Test basic functionality + dn_base = base.dropna(subset=["b"]) + assert len(dn_base) == 2 + assert len(dn_base["nested"].nest.to_flat() == 6) + + # Test on_nested kwarg + dn_on_nested = base.dropna(on_nested="nested") + assert len(dn_on_nested) == 3 + assert len(dn_on_nested["nested"].nest.to_flat() == 8) + + # Test hierarchical column subset + dn_hierarchical = base.dropna(subset="nested.c") + assert len(dn_hierarchical) == 3 + assert len(dn_hierarchical["nested"].nest.to_flat() == 8) From 26575386248b5e36eeae066e874c294778cc0dd3 Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Wed, 10 Apr 2024 13:57:15 -0700 Subject: [PATCH 3/9] full test suite --- src/nested_pandas/nestedframe/core.py | 5 +++- .../nestedframe/test_nestedframe.py | 29 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/nested_pandas/nestedframe/core.py b/src/nested_pandas/nestedframe/core.py index 4667be0..7371114 100644 --- a/src/nested_pandas/nestedframe/core.py +++ b/src/nested_pandas/nestedframe/core.py @@ -228,6 +228,7 @@ def dropna( if subset: if isinstance(subset, str): subset = [subset] + for col in subset: col = col.split(".")[0] if "." in col else col if col in self.nested_columns: @@ -235,6 +236,8 @@ def dropna( elif col in self.columns: subset_target.append("base") + # import pdb;pdb.set_trace() + # Check for 1 target subset_target = np.unique(subset_target) if len(subset_target) > 1: # prohibit multi-target operations @@ -248,7 +251,7 @@ def dropna( subset_target = subset_target[0] # Next check the on_nested kwarg input - if on_nested not in self.nested_columns: + if on_nested and on_nested not in self.nested_columns: raise ValueError("Provided nested layer not found in nested dataframes") # Resolve target layer diff --git a/tests/nested_pandas/nestedframe/test_nestedframe.py b/tests/nested_pandas/nestedframe/test_nestedframe.py index 8e3f1d7..d8f34c9 100644 --- a/tests/nested_pandas/nestedframe/test_nestedframe.py +++ b/tests/nested_pandas/nestedframe/test_nestedframe.py @@ -130,3 +130,32 @@ def test_dropna(): dn_hierarchical = base.dropna(subset="nested.c") assert len(dn_hierarchical) == 3 assert len(dn_hierarchical["nested"].nest.to_flat() == 8) + + +def test_dropna_errors(): + """Test that the various dropna exceptions trigger""" + + base = NestedFrame(data={"a": [1, 2, 3], "b": [2, np.NaN, 6]}, index=[0, 1, 2]) + + nested = pd.DataFrame( + data={"c": [0, 2, 4, 1, np.NaN, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]}, + index=[0, 0, 0, 1, 1, 1, 2, 2, 2], + ) + + base = base.add_nested(nested, "nested") + + # Test multi-target + with pytest.raises(ValueError): + base.dropna(subset=["b", "nested.c"]) + + # Test no-target + with pytest.raises(ValueError): + base.dropna(subset=["not_nested.c"]) + + # Test bad on-nested value + with pytest.raises(ValueError): + base.dropna(on_nested="not_nested") + + # Test on-nested + subset disagreement + with pytest.raises(ValueError): + base.dropna(on_nested="nested", subset=["b"]) From e5458b17fcafc7614bc2103bb663587aaa3b06d3 Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Wed, 10 Apr 2024 14:07:43 -0700 Subject: [PATCH 4/9] cover on_nested+subset case --- tests/nested_pandas/nestedframe/test_nestedframe.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/nested_pandas/nestedframe/test_nestedframe.py b/tests/nested_pandas/nestedframe/test_nestedframe.py index d8f34c9..b30015a 100644 --- a/tests/nested_pandas/nestedframe/test_nestedframe.py +++ b/tests/nested_pandas/nestedframe/test_nestedframe.py @@ -131,6 +131,11 @@ def test_dropna(): assert len(dn_hierarchical) == 3 assert len(dn_hierarchical["nested"].nest.to_flat() == 8) + # Test hierarchical column subset and on_nested + dn_hierarchical = base.dropna(on_nested="nested", subset="nested.c") + assert len(dn_hierarchical) == 3 + assert len(dn_hierarchical["nested"].nest.to_flat() == 8) + def test_dropna_errors(): """Test that the various dropna exceptions trigger""" From cc25440f28249ce932ff00f68aeadc65a24de6f7 Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Thu, 11 Apr 2024 09:11:32 -0700 Subject: [PATCH 5/9] Update src/nested_pandas/nestedframe/core.py Co-authored-by: Konstantin Malanchev --- src/nested_pandas/nestedframe/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nested_pandas/nestedframe/core.py b/src/nested_pandas/nestedframe/core.py index 7371114..5e16427 100644 --- a/src/nested_pandas/nestedframe/core.py +++ b/src/nested_pandas/nestedframe/core.py @@ -230,7 +230,7 @@ def dropna( subset = [subset] for col in subset: - col = col.split(".")[0] if "." in col else col + col = col.split(".")[0] if col in self.nested_columns: subset_target.append(col) elif col in self.columns: From 879d6caf173a17de65e73aece71b05dde545e11b Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Thu, 11 Apr 2024 09:11:39 -0700 Subject: [PATCH 6/9] Update src/nested_pandas/nestedframe/core.py Co-authored-by: Konstantin Malanchev --- src/nested_pandas/nestedframe/core.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/nested_pandas/nestedframe/core.py b/src/nested_pandas/nestedframe/core.py index 5e16427..731e569 100644 --- a/src/nested_pandas/nestedframe/core.py +++ b/src/nested_pandas/nestedframe/core.py @@ -236,8 +236,6 @@ def dropna( elif col in self.columns: subset_target.append("base") - # import pdb;pdb.set_trace() - # Check for 1 target subset_target = np.unique(subset_target) if len(subset_target) > 1: # prohibit multi-target operations From a014a01bd7b827f9589f1f7f60e979b45acccb9e Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Thu, 11 Apr 2024 10:37:01 -0700 Subject: [PATCH 7/9] address review comments; add with failing inplace test --- src/nested_pandas/nestedframe/core.py | 115 +++++++++++------- .../nestedframe/test_nestedframe.py | 29 +++++ 2 files changed, 97 insertions(+), 47 deletions(-) diff --git a/src/nested_pandas/nestedframe/core.py b/src/nested_pandas/nestedframe/core.py index 731e569..eeebd9c 100644 --- a/src/nested_pandas/nestedframe/core.py +++ b/src/nested_pandas/nestedframe/core.py @@ -158,6 +158,50 @@ def query(self, expr) -> Self: # type: ignore[name-defined] # noqa: F821 result[expr] = result[expr].nest.query_flat(exprs_to_use[expr]) return result + def _resolve_dropna_target(self, on_nested, subset): + """resolves the target layer for a given set of dropna kwargs""" + # first check the subset kwarg input + subset_target = [] + if subset: + if isinstance(subset, str): + subset = [subset] + + for col in subset: + col = col.split(".")[0] + if col in self.nested_columns: + subset_target.append(col) + elif col in self.columns: + subset_target.append("base") + else: + raise ValueError(f"Column name {col} not found in any base or nested columns") + + # Check for 1 target + subset_target = np.unique(subset_target) + if len(subset_target) > 1: # prohibit multi-target operations + raise ValueError( + f"Targeted multiple nested structures ({subset_target}), write one command per target dataframe" # noqa + ) + subset_target = str(subset_target[0]) + + # Next check the on_nested kwarg input + if on_nested and on_nested not in self.nested_columns: + raise ValueError("Provided nested layer not found in nested dataframes") + + # Resolve target layer + target = "base" + if on_nested and subset_target: + if on_nested != subset_target: + raise ValueError( + f"Provided on_nested={on_nested}, but subset columns are from {subset_target}. Make sure these are aligned or just use subset." # noqa + ) + else: + target = subset_target + elif on_nested: + target = str(on_nested) + elif subset_target: + target = str(subset_target) + return target, subset + def dropna( self, *, @@ -170,7 +214,7 @@ def dropna( ignore_index: bool = False, ) -> NestedFrame | None: """ - Remove missing values. + Remove missing values for one layer of the NestedFrame. Parameters ---------- @@ -219,60 +263,22 @@ def dropna( ----- Operations that target a particular nested structure return a dataframe with rows of that particular nested structure affected. + + Values for `on_nested` and `subset` should be consistent in pointing + to a single layer, multi-layer operations are not supported at this + time. """ # determine target dataframe - - # first check the subset kwarg input - subset_target = [] - if subset: - if isinstance(subset, str): - subset = [subset] - - for col in subset: - col = col.split(".")[0] - if col in self.nested_columns: - subset_target.append(col) - elif col in self.columns: - subset_target.append("base") - - # Check for 1 target - subset_target = np.unique(subset_target) - if len(subset_target) > 1: # prohibit multi-target operations - raise ValueError( - f"Targeted multiple nested structures ({subset_target}), write one command per target dataframe" # noqa - ) - elif len(subset_target) == 0: - raise ValueError( - "Provided base columns or nested layer did not match any found in the nestedframe" - ) - subset_target = subset_target[0] - - # Next check the on_nested kwarg input - if on_nested and on_nested not in self.nested_columns: - raise ValueError("Provided nested layer not found in nested dataframes") - - # Resolve target layer - target = "base" - if on_nested and subset_target: - if on_nested != subset_target: - raise ValueError( - f"Provided on_nested={on_nested}, but subset columns are from {subset_target}. Make sure these are aligned or just use subset." # noqa - ) - else: - target = subset_target - elif on_nested: - target = str(on_nested) - elif subset_target: - target = str(subset_target) + target, subset = self._resolve_dropna_target(on_nested, subset) if target == "base": return super().dropna( axis=axis, how=how, thresh=thresh, subset=subset, inplace=inplace, ignore_index=ignore_index ) - else: - if subset is not None: - subset = [col.split(".")[-1] for col in subset] + if subset is not None: + subset = [col.split(".")[-1] for col in subset] + if inplace: self[target] = packer.pack_flat( self[target] .nest.to_flat() @@ -286,3 +292,18 @@ def dropna( ) ) return self + new_df = self.copy() + new_df[target] = packer.pack_flat( + new_df[target] + .nest.to_flat() + .copy() + .dropna( + axis=axis, + how=how, + thresh=thresh, + subset=subset, + inplace=inplace, + ignore_index=ignore_index, + ) + ) + return new_df diff --git a/tests/nested_pandas/nestedframe/test_nestedframe.py b/tests/nested_pandas/nestedframe/test_nestedframe.py index b30015a..544282a 100644 --- a/tests/nested_pandas/nestedframe/test_nestedframe.py +++ b/tests/nested_pandas/nestedframe/test_nestedframe.py @@ -137,6 +137,35 @@ def test_dropna(): assert len(dn_hierarchical["nested"].nest.to_flat() == 8) +def test_dropna_inplace(): + """Test in-place behavior of dropna""" + + base = NestedFrame(data={"a": [1, 2, 3], "b": [2, np.NaN, 6]}, index=[0, 1, 2]) + + nested = pd.DataFrame( + data={"c": [0, 2, 4, 1, np.NaN, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]}, + index=[0, 0, 0, 1, 1, 1, 2, 2, 2], + ) + + base = base.add_nested(nested, "nested") + + # Test inplace=False with base layer + dn_base = base.dropna(subset=["b"], inplace=False) + assert not dn_base.equals(base) + + # Test inplace=True with base layer + base.dropna(subset=["b"], inplace=True) + assert dn_base.equals(base) + + # Test inplace=False with nested layer + dn_base = base.dropna(on_nested="nested", inplace=False) + assert not dn_base.nested.nest.to_flat().equals(base.nested.nest.to_flat()) + + # Test inplace=True with nested layer + base.dropna(on_nested="nested", inplace=True) + assert dn_base.equals(base) + + def test_dropna_errors(): """Test that the various dropna exceptions trigger""" From 2bda706accd624af9f4f3e13ff3b2d32834be830 Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Thu, 11 Apr 2024 12:52:20 -0700 Subject: [PATCH 8/9] fix inplace behavior --- src/nested_pandas/nestedframe/core.py | 27 +++++++++---------- .../nestedframe/test_nestedframe.py | 17 ++++++++++-- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/src/nested_pandas/nestedframe/core.py b/src/nested_pandas/nestedframe/core.py index eeebd9c..604a350 100644 --- a/src/nested_pandas/nestedframe/core.py +++ b/src/nested_pandas/nestedframe/core.py @@ -5,6 +5,7 @@ import pandas as pd from pandas._libs import lib from pandas._typing import AnyAll, Axis, IndexLabel +from pandas.api.extensions import no_default from nested_pandas.series import packer from nested_pandas.series.dtype import NestedDtype @@ -206,8 +207,8 @@ def dropna( self, *, axis: Axis = 0, - how: AnyAll | lib.NoDefault = lib.no_default, - thresh: int | lib.NoDefault = lib.no_default, + how: AnyAll | lib.NoDefault = no_default, + thresh: int | lib.NoDefault = no_default, on_nested: bool = False, subset: IndexLabel | None = None, inplace: bool = False, @@ -279,24 +280,22 @@ def dropna( if subset is not None: subset = [col.split(".")[-1] for col in subset] if inplace: - self[target] = packer.pack_flat( - self[target] - .nest.to_flat() - .dropna( - axis=axis, - how=how, - thresh=thresh, - subset=subset, - inplace=inplace, - ignore_index=ignore_index, - ) + target_flat = self[target].nest.to_flat() + target_flat.dropna( + axis=axis, + how=how, + thresh=thresh, + subset=subset, + inplace=inplace, + ignore_index=ignore_index, ) + self[target] = packer.pack_flat(target_flat) return self + # Or if not inplace new_df = self.copy() new_df[target] = packer.pack_flat( new_df[target] .nest.to_flat() - .copy() .dropna( axis=axis, how=how, diff --git a/tests/nested_pandas/nestedframe/test_nestedframe.py b/tests/nested_pandas/nestedframe/test_nestedframe.py index 544282a..36afdad 100644 --- a/tests/nested_pandas/nestedframe/test_nestedframe.py +++ b/tests/nested_pandas/nestedframe/test_nestedframe.py @@ -137,10 +137,10 @@ def test_dropna(): assert len(dn_hierarchical["nested"].nest.to_flat() == 8) -def test_dropna_inplace(): +def test_dropna_inplace_base(): """Test in-place behavior of dropna""" - base = NestedFrame(data={"a": [1, 2, 3], "b": [2, np.NaN, 6]}, index=[0, 1, 2]) + base = NestedFrame(data={"a": [1, 2, 3], "b": [np.NaN, 4, 6]}, index=[0, 1, 2]) nested = pd.DataFrame( data={"c": [0, 2, 4, 1, np.NaN, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]}, @@ -157,6 +157,19 @@ def test_dropna_inplace(): base.dropna(subset=["b"], inplace=True) assert dn_base.equals(base) + +def test_dropna_inplace_nested(): + """Test in-place behavior of dropna""" + + base = NestedFrame(data={"a": [1, 2, 3], "b": [np.NaN, 4, 6]}, index=[0, 1, 2]) + + nested = pd.DataFrame( + data={"c": [0, 2, 4, 1, np.NaN, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]}, + index=[0, 0, 0, 1, 1, 1, 2, 2, 2], + ) + + base = base.add_nested(nested, "nested") + # Test inplace=False with nested layer dn_base = base.dropna(on_nested="nested", inplace=False) assert not dn_base.nested.nest.to_flat().equals(base.nested.nest.to_flat()) From b63a49ca406b240b6f616c41838ce5a4f0df5264 Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Thu, 11 Apr 2024 12:56:26 -0700 Subject: [PATCH 9/9] use properties once --- src/nested_pandas/nestedframe/core.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/nested_pandas/nestedframe/core.py b/src/nested_pandas/nestedframe/core.py index 604a350..6ebef11 100644 --- a/src/nested_pandas/nestedframe/core.py +++ b/src/nested_pandas/nestedframe/core.py @@ -161,6 +161,10 @@ def query(self, expr) -> Self: # type: ignore[name-defined] # noqa: F821 def _resolve_dropna_target(self, on_nested, subset): """resolves the target layer for a given set of dropna kwargs""" + + nested_cols = self.nested_columns + columns = self.columns + # first check the subset kwarg input subset_target = [] if subset: @@ -169,9 +173,9 @@ def _resolve_dropna_target(self, on_nested, subset): for col in subset: col = col.split(".")[0] - if col in self.nested_columns: + if col in nested_cols: subset_target.append(col) - elif col in self.columns: + elif col in columns: subset_target.append("base") else: raise ValueError(f"Column name {col} not found in any base or nested columns") @@ -185,7 +189,7 @@ def _resolve_dropna_target(self, on_nested, subset): subset_target = str(subset_target[0]) # Next check the on_nested kwarg input - if on_nested and on_nested not in self.nested_columns: + if on_nested and on_nested not in nested_cols: raise ValueError("Provided nested layer not found in nested dataframes") # Resolve target layer