From 5a7ec3d27ee1fee1bb0cc70a6cd53a83ad7a49ed Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Mon, 10 Jun 2024 11:46:24 -0700 Subject: [PATCH 1/3] update reduce docstring --- src/nested_pandas/nestedframe/core.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/nested_pandas/nestedframe/core.py b/src/nested_pandas/nestedframe/core.py index b3be03e..3b0a43f 100644 --- a/src/nested_pandas/nestedframe/core.py +++ b/src/nested_pandas/nestedframe/core.py @@ -362,7 +362,8 @@ def reduce(self, func, *args, **kwargs) -> NestedFrame: # type: ignore[override ---------- func : callable Function to apply to each nested dataframe. The first arguments to `func` should be which - columns to apply the function to. + columns to apply the function to. See the Notes for recommendations + on writing func outputs. args : positional arguments Positional arguments to pass to the function, the first *args should be the names of the columns to apply the function to. @@ -376,20 +377,18 @@ def reduce(self, func, *args, **kwargs) -> NestedFrame: # type: ignore[override Notes ----- - The recommend return value of func should be a `pd.Series` where the indices are the names of the - output columns in the dataframe returned by `reduce`. Note however that in cases where func - returns a single value there may be a performance benefit to returning the scalar value - rather than a `pd.Series`. + By default, `reduce` will produce a `NestedFrame` with enumerated + column names for each returned value of the function. For more useful + naming, it's recommended to have `func` return a dictionary where each + key is an output column of the dataframe returned by `reduce`. Example User Function: ``` import pandas as pd def my_sum(col1, col2): - return pd.Series( - [sum(col1), sum(col2)], - index=["sum_col1", "sum_col2"], - ) + '''reduce will return a NestedFrame with two columns''' + return {"sum_col1": sum(col1), "sum_col2": sum(col2)} ``` From a71aaf4cb1bfa7bdc11471617feddbe11a22360a Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Mon, 10 Jun 2024 11:52:29 -0700 Subject: [PATCH 2/3] update example --- src/nested_pandas/nestedframe/core.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/nested_pandas/nestedframe/core.py b/src/nested_pandas/nestedframe/core.py index 3b0a43f..32f0d46 100644 --- a/src/nested_pandas/nestedframe/core.py +++ b/src/nested_pandas/nestedframe/core.py @@ -383,12 +383,12 @@ def reduce(self, func, *args, **kwargs) -> NestedFrame: # type: ignore[override key is an output column of the dataframe returned by `reduce`. Example User Function: - ``` - import pandas as pd - def my_sum(col1, col2): - '''reduce will return a NestedFrame with two columns''' - return {"sum_col1": sum(col1), "sum_col2": sum(col2)} + >>> import pandas as pd + + >>> def my_sum(col1, col2): + >>> '''reduce will return a NestedFrame with two columns''' + >>> return {"sum_col1": sum(col1), "sum_col2": sum(col2)} ``` From 93571af28ddc192dd338147450761afc7a50d1fc Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Mon, 10 Jun 2024 11:55:16 -0700 Subject: [PATCH 3/3] tidy function --- src/nested_pandas/nestedframe/core.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/nested_pandas/nestedframe/core.py b/src/nested_pandas/nestedframe/core.py index 32f0d46..f0169b5 100644 --- a/src/nested_pandas/nestedframe/core.py +++ b/src/nested_pandas/nestedframe/core.py @@ -384,14 +384,10 @@ def reduce(self, func, *args, **kwargs) -> NestedFrame: # type: ignore[override Example User Function: - >>> import pandas as pd - >>> def my_sum(col1, col2): >>> '''reduce will return a NestedFrame with two columns''' >>> return {"sum_col1": sum(col1), "sum_col2": sum(col2)} - ``` - """ # Parse through the initial args to determine the columns to apply the function to requested_columns = []