From 661b7f365e496c54fd1f16cd2c3d1cb3042db27f Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Mon, 16 Dec 2024 22:39:19 +1100
Subject: [PATCH 1/7] add pivot_wider_spec for pandas

---
 janitor/functions/__init__.py            |   8 +-
 janitor/functions/pivot.py               | 119 ++++++++++++++++++++
 tests/functions/test_pivot_wider_spec.py | 133 +++++++++++++++++++++++
 3 files changed, 259 insertions(+), 1 deletion(-)
 create mode 100644 tests/functions/test_pivot_wider_spec.py

diff --git a/janitor/functions/__init__.py b/janitor/functions/__init__.py
index 8af27b3ff..6e0f14fc6 100644
--- a/janitor/functions/__init__.py
+++ b/janitor/functions/__init__.py
@@ -57,7 +57,12 @@
 from .limit_column_characters import limit_column_characters
 from .min_max_scale import min_max_scale
 from .move import move
-from .pivot import pivot_longer, pivot_longer_spec, pivot_wider
+from .pivot import (
+    pivot_longer,
+    pivot_longer_spec,
+    pivot_wider,
+    pivot_wider_spec,
+)
 from .process_text import process_text
 from .remove_columns import remove_columns
 from .remove_empty import remove_empty
@@ -138,6 +143,7 @@
     "pivot_longer",
     "pivot_longer_spec",
     "pivot_wider",
+    "pivot_wider_spec",
     "process_text",
     "remove_columns",
     "remove_empty",
diff --git a/janitor/functions/pivot.py b/janitor/functions/pivot.py
index 9522ad084..893b41f6d 100644
--- a/janitor/functions/pivot.py
+++ b/janitor/functions/pivot.py
@@ -327,10 +327,14 @@ def pivot_longer(
             Should be either a single column name, or a list/tuple of
             column names.
             `index` should be a list of tuples if the columns are a MultiIndex.
+            Column selection is possible using the
+            [`select`][janitor.functions.select.select] syntax.
         column_names: Name(s) of columns to unpivot. Should be either
             a single column name or a list/tuple of column names.
             `column_names` should be a list of tuples
             if the columns are a MultiIndex.
+            Column selection is possible using the
+            [`select`][janitor.functions.select.select] syntax.
         names_to: Name of new column as a string that will contain
             what were previously the column names in `column_names`.
             The default is `variable` if no value is provided. It can
@@ -2380,3 +2384,118 @@ def _check_tuples_multiindex(indexer, args, param):
         )
 
     return args
+
+
+def pivot_wider_spec(
+    df: pd.DataFrame,
+    spec: pd.DataFrame,
+    index: list | tuple | str | Pattern = None,
+    reset_index: bool = True,
+) -> pd.DataFrame:
+    """
+    Provide specification to convert DataFrame from long to wide form.
+
+    !!! abstract "Version Changed"
+
+        - 0.24.0
+            - Added `reset_index`, `names_expand` and `index_expand` parameters.
+
+    Args:
+        df: A pandas DataFrame.
+        spec: A specification DataFrame.
+            At a minimum, the spec DataFrame
+            must have a '.name' and a '.value' columns.
+            The '.name' column  should contain the
+            the names of the columns in the output DataFrame.
+            The '.value' column should contain the name of the column(s)
+            in the source DataFrame that will be serve as the values.
+            Additional columns in spec will serves as the columns
+            to be flipped to wide form.
+            Note that these additional columns should already exist
+            in the source DataFrame.
+        index: Name(s) of columns to use as identifier variables.
+            It should be either a single column name, or a list of column names.
+            If `index` is not provided, the DataFrame's index is used.
+            Column selection is possible using the
+            [`select`][janitor.functions.select.select] syntax.
+        reset_index: Determines whether to reset the `index`.
+            Applicable only if `index` is provided.
+
+    Returns:
+        A pandas DataFrame that has been unpivoted from long to wide form.
+    """  # noqa: E501
+    check("spec", spec, [pd.DataFrame])
+    check("reset_index", reset_index, [bool])
+    if not spec.columns.is_unique:
+        raise ValueError("Kindly ensure the spec's columns is unique.")
+    if ".name" not in spec.columns:
+        raise KeyError(
+            "Kindly ensure the spec DataFrame has a `.name` column."
+        )
+    if ".value" not in spec.columns:
+        raise KeyError(
+            "Kindly ensure the spec DataFrame has a `.value` column."
+        )
+    if spec.columns.tolist()[:2] != [".name", ".value"]:
+        raise ValueError(
+            "The first two columns of the spec DataFrame "
+            "should be '.name' and '.value', "
+            "with '.name' coming before '.value'."
+        )
+    if spec.columns.size == 2:
+        raise ValueError(
+            "Kindly provide the column(s) "
+            "to use to make new frame’s columns"
+        )
+    columns = spec.columns[2:]
+    values = spec[".value"].unique()
+    if index is not None:
+        index = _select_index([index], df, axis="columns")
+        index = df.columns[index].tolist()
+    df = df.pivot(index=index, columns=columns, values=values)
+    _index = spec.columns[1:].tolist()
+    spec = spec.set_index(_index).squeeze()
+    df = df.reindex(columns=spec.index)
+    df.columns = df.columns.map(spec)
+    if reset_index and index:
+        return df.reset_index()
+    return df
+    # if _index:
+    #     df = df.set_index(_index)
+    # # use a pivot, then rename
+    # # the below code may work for polars?
+    # if len(grouper) == 1:
+    #     _grouper = grouper[0]
+    # else:
+    #     _grouper = grouper
+    # grouped = df.groupby(_grouper, sort=False, observed=True)
+    # mapper = defaultdict(dict)
+    # if len(grouper) > 1:
+    #     spec_grouper = pd.MultiIndex.from_frame(spec.loc[:, grouper])
+    # else:
+    #     spec_grouper = spec[grouper[0]]
+    # for grouper, old_name, new_name in zip(
+    #     spec_grouper, spec[".value"], spec[".name"]
+    # ):
+    #     mapper[grouper].update({old_name: new_name})
+    # frames = []
+    # for grouper, frame in grouped:
+    #     mapping = mapper[grouper]
+    #     frame = frame.loc[:, [*mapping]]
+    #     frame.columns = frame.columns.map(mapping)
+    #     frames.append(frame)
+    # frames = pd.concat(frames, axis=1)
+    # return frames
+
+
+# names_from -> .name -> columns(pandas)
+# values_from -> .value-> values(pandas)
+# index = df.columns
+# - .name.unique()
+# - .value.unique()
+# - remaining columns
+# if no idex, then df.index is used
+
+# where does .value and other columns intersect?
+# group by other columns, and select .value
+# df.groupby(index + other column)[.value.unique()]
diff --git a/tests/functions/test_pivot_wider_spec.py b/tests/functions/test_pivot_wider_spec.py
new file mode 100644
index 000000000..6e2a82ac2
--- /dev/null
+++ b/tests/functions/test_pivot_wider_spec.py
@@ -0,0 +1,133 @@
+import re
+
+import pandas as pd
+import pytest
+from pandas.testing import assert_frame_equal
+
+from janitor import pivot_wider_spec
+
+
+@pytest.fixture
+def df_checks():
+    """pytest fixture"""
+    return pd.DataFrame(
+        [
+            {"famid": 1, "birth": 1, "age": 1, "ht": 2.8},
+            {"famid": 1, "birth": 1, "age": 2, "ht": 3.4},
+            {"famid": 1, "birth": 2, "age": 1, "ht": 2.9},
+            {"famid": 1, "birth": 2, "age": 2, "ht": 3.8},
+            {"famid": 1, "birth": 3, "age": 1, "ht": 2.2},
+            {"famid": 1, "birth": 3, "age": 2, "ht": 2.9},
+            {"famid": 2, "birth": 1, "age": 1, "ht": 2.0},
+            {"famid": 2, "birth": 1, "age": 2, "ht": 3.2},
+            {"famid": 2, "birth": 2, "age": 1, "ht": 1.8},
+            {"famid": 2, "birth": 2, "age": 2, "ht": 2.8},
+            {"famid": 2, "birth": 3, "age": 1, "ht": 1.9},
+            {"famid": 2, "birth": 3, "age": 2, "ht": 2.4},
+            {"famid": 3, "birth": 1, "age": 1, "ht": 2.2},
+            {"famid": 3, "birth": 1, "age": 2, "ht": 3.3},
+            {"famid": 3, "birth": 2, "age": 1, "ht": 2.3},
+            {"famid": 3, "birth": 2, "age": 2, "ht": 3.4},
+            {"famid": 3, "birth": 3, "age": 1, "ht": 2.1},
+            {"famid": 3, "birth": 3, "age": 2, "ht": 2.9},
+        ]
+    )
+
+
+spec = {".name": ["ht1", "ht2"], ".value": ["ht", "ht"], "age": [1, 2]}
+spec = pd.DataFrame(spec)
+
+
+def test_spec_is_a_dataframe(df_checks):
+    """Raise Error if spec is not a DataFrame."""
+    with pytest.raises(
+        TypeError,
+        match="spec should be one of.+",
+    ):
+        df_checks.pipe(pivot_wider_spec, spec={".name": "name"})
+
+
+def test_spec_columns_has_dot_name(df_checks):
+    """Raise KeyError if '.name' not in spec's columns."""
+    with pytest.raises(
+        KeyError,
+        match="Kindly ensure the spec DataFrame has a `.name` column.",
+    ):
+        df_checks.pipe(
+            pivot_wider_spec,
+            spec=spec.set_axis(labels=[".value", ".blabla", "age"], axis=1),
+        )
+
+
+def test_spec_columns_has_dot_value(df_checks):
+    """Raise KeyError if '.value' not in spec's columns."""
+    with pytest.raises(
+        KeyError,
+        match="Kindly ensure the spec DataFrame has a `.value` column.",
+    ):
+        df_checks.pipe(
+            pivot_wider_spec,
+            spec=spec.set_axis(labels=[".name", ".blabla", "age"], axis=1),
+        )
+
+
+def test_spec_columns_name_value_order(df_checks):
+    """
+    Raise ValueError if '.name' and '.value'
+    are not the first two labels
+    in spec's columns.
+    """
+    msg = "The first two columns of the spec DataFrame "
+    msg += "should be '.name' and '.value',.+"
+    with pytest.raises(
+        ValueError,
+        match=msg,
+    ):
+        df_checks.pipe(
+            pivot_wider_spec,
+            spec=spec.loc[:, [".value", ".name", "age"]],
+        )
+
+
+def test_spec_columns_len_2(df_checks):
+    """
+    Raise ValueError if '.name' and '.value'
+    are the only columns in spec.
+    """
+    msg = "Kindly provide the column(s) "
+    msg += "to use to make new frame’s columns"
+    with pytest.raises(
+        ValueError,
+        match=re.escape(msg),
+    ):
+        df_checks.pipe(
+            pivot_wider_spec,
+            spec=spec.loc[:, [".name", ".value"]],
+        )
+
+
+def test_spec_columns_not_unique(df_checks):
+    """Raise ValueError if the spec's columns is not unique."""
+    with pytest.raises(
+        ValueError, match="Kindly ensure the spec's columns is unique."
+    ):
+        df_checks.pipe(
+            pivot_wider_spec,
+            spec=spec.set_axis(labels=[".name", ".name", "age"], axis=1),
+        )
+
+
+def test_pivot_wider_spec(df_checks):
+    """
+    Test output
+    """
+    expected = (
+        df_checks.pivot(index=["famid", "birth"], columns="age", values="ht")
+        .add_prefix("ht")
+        .rename_axis(columns=None)
+        .reset_index()
+    )
+    actual = df_checks.pipe(
+        pivot_wider_spec, spec=spec, index=["famid", "birth"]
+    )
+    assert_frame_equal(actual, expected)

From 0391ad93a46b61580bb65b57a9d0522c5c7a4906 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Mon, 16 Dec 2024 22:54:35 +1100
Subject: [PATCH 2/7] add examples

---
 janitor/functions/pivot.py | 79 +++++++++++++++++++++++++++++++++++---
 1 file changed, 74 insertions(+), 5 deletions(-)

diff --git a/janitor/functions/pivot.py b/janitor/functions/pivot.py
index 893b41f6d..8f2851f8c 100644
--- a/janitor/functions/pivot.py
+++ b/janitor/functions/pivot.py
@@ -2392,13 +2392,82 @@ def pivot_wider_spec(
     index: list | tuple | str | Pattern = None,
     reset_index: bool = True,
 ) -> pd.DataFrame:
-    """
-    Provide specification to convert DataFrame from long to wide form.
+    """A declarative interface to pivot a DataFrame from long to wide form,
+    where you describe how the data will be pivoted,
+    using a DataFrame. This gives you, the user,
+    more control over pivoting, where you create a “spec”
+    data frame that describes exactly how data stored
+    in the column names becomes variables.
+    It can come in handy for situations where
+    `pd.DataFrame.pivot`
+    seems inadequate for the transformation.
 
-    !!! abstract "Version Changed"
+    !!! info "New in version 0.31.0"
 
-        - 0.24.0
-            - Added `reset_index`, `names_expand` and `index_expand` parameters.
+    Examples:
+        >>> import pandas as pd
+        >>> import janitor
+        >>> df = pd.DataFrame(
+        ... [
+        ...    {"famid": 1, "birth": 1, "age": 1, "ht": 2.8},
+        ...    {"famid": 1, "birth": 1, "age": 2, "ht": 3.4},
+        ...    {"famid": 1, "birth": 2, "age": 1, "ht": 2.9},
+        ...    {"famid": 1, "birth": 2, "age": 2, "ht": 3.8},
+        ...    {"famid": 1, "birth": 3, "age": 1, "ht": 2.2},
+        ...    {"famid": 1, "birth": 3, "age": 2, "ht": 2.9},
+        ...    {"famid": 2, "birth": 1, "age": 1, "ht": 2.0},
+        ...    {"famid": 2, "birth": 1, "age": 2, "ht": 3.2},
+        ...    {"famid": 2, "birth": 2, "age": 1, "ht": 1.8},
+        ...    {"famid": 2, "birth": 2, "age": 2, "ht": 2.8},
+        ...    {"famid": 2, "birth": 3, "age": 1, "ht": 1.9},
+        ...    {"famid": 2, "birth": 3, "age": 2, "ht": 2.4},
+        ...    {"famid": 3, "birth": 1, "age": 1, "ht": 2.2},
+        ...    {"famid": 3, "birth": 1, "age": 2, "ht": 3.3},
+        ...    {"famid": 3, "birth": 2, "age": 1, "ht": 2.3},
+        ...    {"famid": 3, "birth": 2, "age": 2, "ht": 3.4},
+        ...    {"famid": 3, "birth": 3, "age": 1, "ht": 2.1},
+        ...    {"famid": 3, "birth": 3, "age": 2, "ht": 2.9},
+        ... ]
+        ... )
+        >>> df
+            famid  birth  age   ht
+        0       1      1    1  2.8
+        1       1      1    2  3.4
+        2       1      2    1  2.9
+        3       1      2    2  3.8
+        4       1      3    1  2.2
+        5       1      3    2  2.9
+        6       2      1    1  2.0
+        7       2      1    2  3.2
+        8       2      2    1  1.8
+        9       2      2    2  2.8
+        10      2      3    1  1.9
+        11      2      3    2  2.4
+        12      3      1    1  2.2
+        13      3      1    2  3.3
+        14      3      2    1  2.3
+        15      3      2    2  3.4
+        16      3      3    1  2.1
+        17      3      3    2  2.9
+        >>> spec = {".name": ["ht1", "ht2"],
+        ...         ".value": ["ht", "ht"],
+        ...         "age": [1, 2]}
+        ... spec = pd.DataFrame(spec)
+        >>> spec
+          .name .value  age
+        0   ht1     ht    1
+        1   ht2     ht    2
+        >>> pivot_wider_spec(df=df,spec=spec)
+           famid  birth  ht1  ht2
+        0      1      1  2.8  3.4
+        1      1      2  2.9  3.8
+        2      1      3  2.2  2.9
+        3      2      1  2.0  3.2
+        4      2      2  1.8  2.8
+        5      2      3  1.9  2.4
+        6      3      1  2.2  3.3
+        7      3      2  2.3  3.4
+        8      3      3  2.1  2.9
 
     Args:
         df: A pandas DataFrame.

From 929d98553f8c429d1286e3752f215635a438e0f9 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Mon, 16 Dec 2024 22:58:30 +1100
Subject: [PATCH 3/7] cleanup

---
 janitor/functions/pivot.py | 41 +-------------------------------------
 1 file changed, 1 insertion(+), 40 deletions(-)

diff --git a/janitor/functions/pivot.py b/janitor/functions/pivot.py
index 8f2851f8c..d73109078 100644
--- a/janitor/functions/pivot.py
+++ b/janitor/functions/pivot.py
@@ -2406,7 +2406,7 @@ def pivot_wider_spec(
 
     Examples:
         >>> import pandas as pd
-        >>> import janitor
+        >>> from janitor import pivot_wider_spec
         >>> df = pd.DataFrame(
         ... [
         ...    {"famid": 1, "birth": 1, "age": 1, "ht": 2.8},
@@ -2529,42 +2529,3 @@ def pivot_wider_spec(
     if reset_index and index:
         return df.reset_index()
     return df
-    # if _index:
-    #     df = df.set_index(_index)
-    # # use a pivot, then rename
-    # # the below code may work for polars?
-    # if len(grouper) == 1:
-    #     _grouper = grouper[0]
-    # else:
-    #     _grouper = grouper
-    # grouped = df.groupby(_grouper, sort=False, observed=True)
-    # mapper = defaultdict(dict)
-    # if len(grouper) > 1:
-    #     spec_grouper = pd.MultiIndex.from_frame(spec.loc[:, grouper])
-    # else:
-    #     spec_grouper = spec[grouper[0]]
-    # for grouper, old_name, new_name in zip(
-    #     spec_grouper, spec[".value"], spec[".name"]
-    # ):
-    #     mapper[grouper].update({old_name: new_name})
-    # frames = []
-    # for grouper, frame in grouped:
-    #     mapping = mapper[grouper]
-    #     frame = frame.loc[:, [*mapping]]
-    #     frame.columns = frame.columns.map(mapping)
-    #     frames.append(frame)
-    # frames = pd.concat(frames, axis=1)
-    # return frames
-
-
-# names_from -> .name -> columns(pandas)
-# values_from -> .value-> values(pandas)
-# index = df.columns
-# - .name.unique()
-# - .value.unique()
-# - remaining columns
-# if no idex, then df.index is used
-
-# where does .value and other columns intersect?
-# group by other columns, and select .value
-# df.groupby(index + other column)[.value.unique()]

From b74fe7e2160f7edd08f5188613d52e73e2cc2f2d Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Mon, 16 Dec 2024 23:02:49 +1100
Subject: [PATCH 4/7] fix example

---
 janitor/functions/pivot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/janitor/functions/pivot.py b/janitor/functions/pivot.py
index d73109078..71843c3a9 100644
--- a/janitor/functions/pivot.py
+++ b/janitor/functions/pivot.py
@@ -2452,7 +2452,7 @@ def pivot_wider_spec(
         >>> spec = {".name": ["ht1", "ht2"],
         ...         ".value": ["ht", "ht"],
         ...         "age": [1, 2]}
-        ... spec = pd.DataFrame(spec)
+        >>> spec = pd.DataFrame(spec)
         >>> spec
           .name .value  age
         0   ht1     ht    1

From a9a5965dc7ba60f2bb59334cb5b841e8045650be Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Mon, 16 Dec 2024 23:03:45 +1100
Subject: [PATCH 5/7] fix example

---
 janitor/functions/pivot.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/janitor/functions/pivot.py b/janitor/functions/pivot.py
index 71843c3a9..859e35bfd 100644
--- a/janitor/functions/pivot.py
+++ b/janitor/functions/pivot.py
@@ -2394,10 +2394,13 @@ def pivot_wider_spec(
 ) -> pd.DataFrame:
     """A declarative interface to pivot a DataFrame from long to wide form,
     where you describe how the data will be pivoted,
-    using a DataFrame. This gives you, the user,
+    using a DataFrame.
+
+    This gives you, the user,
     more control over pivoting, where you create a “spec”
     data frame that describes exactly how data stored
     in the column names becomes variables.
+
     It can come in handy for situations where
     `pd.DataFrame.pivot`
     seems inadequate for the transformation.

From b7ffe750cdc2d3d3af788cfb5b0103734e65f7a8 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Mon, 16 Dec 2024 23:07:35 +1100
Subject: [PATCH 6/7] fix failing test

---
 tests/functions/test_pivot_wider_spec.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/functions/test_pivot_wider_spec.py b/tests/functions/test_pivot_wider_spec.py
index 6e2a82ac2..a67b59be2 100644
--- a/tests/functions/test_pivot_wider_spec.py
+++ b/tests/functions/test_pivot_wider_spec.py
@@ -130,4 +130,7 @@ def test_pivot_wider_spec(df_checks):
     actual = df_checks.pipe(
         pivot_wider_spec, spec=spec, index=["famid", "birth"]
     )
-    assert_frame_equal(actual, expected)
+    assert_frame_equal(
+        actual.sort_values(expected.columns.tolist(), ignore_index=True),
+        expected.sort_values(expected.columns.tolist(), ignore_index=True),
+    )

From f629419ebd559d9ea3ec2632a98eb8c7fa206225 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Mon, 16 Dec 2024 23:10:54 +1100
Subject: [PATCH 7/7] fix example

---
 janitor/functions/pivot.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/janitor/functions/pivot.py b/janitor/functions/pivot.py
index 859e35bfd..810243fb0 100644
--- a/janitor/functions/pivot.py
+++ b/janitor/functions/pivot.py
@@ -424,10 +424,13 @@ def pivot_longer_spec(
 ) -> pd.DataFrame:
     """A declarative interface to pivot a DataFrame from wide to long form,
     where you describe how the data will be unpivoted,
-    using a DataFrame. This gives you, the user,
+    using a DataFrame.
+
+    This gives you, the user,
     more control over unpivoting, where you create a “spec”
     data frame that describes exactly how data stored
     in the column names becomes variables.
+
     It can come in handy for situations where
     [`pivot_longer`][janitor.functions.pivot.pivot_longer]
     seems inadequate for the transformation.
@@ -2460,7 +2463,7 @@ def pivot_wider_spec(
           .name .value  age
         0   ht1     ht    1
         1   ht2     ht    2
-        >>> pivot_wider_spec(df=df,spec=spec)
+        >>> pivot_wider_spec(df=df,spec=spec, index=['famid','birth'])
            famid  birth  ht1  ht2
         0      1      1  2.8  3.4
         1      1      2  2.9  3.8