feat(python): column selector support for DataFrame.melt and `LazyF…

…rame.unnest` (#11662)
pola-rs · Oct 11, 2023 · 987afb8 · 987afb8
1 parent 468dd7d
commit 987afb8
Show file tree

Hide file tree

Showing 4 changed files with 28 additions and 26 deletions.
diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py
@@ -7041,8 +7041,8 @@ def pivot(
 
     def melt(
         self,
-        id_vars: Sequence[str] | str | None = None,
-        value_vars: Sequence[str] | str | None = None,
+        id_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None,
+        value_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None,
         variable_name: str | None = None,
         value_name: str | None = None,
     ) -> Self:
@@ -7052,17 +7052,17 @@ def melt(
         Optionally leaves identifiers set.
 
         This function is useful to massage a DataFrame into a format where one or more
-        columns are identifier variables (id_vars), while all other columns, considered
-        measured variables (value_vars), are "unpivoted" to the row axis, leaving just
+        columns are identifier variables (id_vars) while all other columns, considered
+        measured variables (value_vars), are "unpivoted" to the row axis leaving just
         two non-identifier columns, 'variable' and 'value'.
 
         Parameters
         ----------
         id_vars
-            Columns to use as identifier variables.
+            Column(s) or selector(s) to use as identifier variables.
         value_vars
-            Values to use as identifier variables.
-            If `value_vars` is empty all columns that are not in `id_vars` will be used.
+            Column(s) or selector(s) to use as values variables; if `value_vars`
+            is empty all columns that are not in `id_vars` will be used.
         variable_name
             Name to give to the `variable` column. Defaults to "variable"
         value_name
@@ -7077,7 +7077,8 @@ def melt(
         ...         "c": [2, 4, 6],
         ...     }
         ... )
-        >>> df.melt(id_vars="a", value_vars=["b", "c"])
+        >>> import polars.selectors as cs
+        >>> df.melt(id_vars="a", value_vars=cs.numeric())
         shape: (6, 3)
         ┌─────┬──────────┬───────┐
         │ a   ┆ variable ┆ value │
@@ -7093,14 +7094,9 @@ def melt(
         └─────┴──────────┴───────┘
 
         """
-        if isinstance(value_vars, str):
-            value_vars = [value_vars]
-        if isinstance(id_vars, str):
-            id_vars = [id_vars]
-        if value_vars is None:
-            value_vars = []
-        if id_vars is None:
-            id_vars = []
+        value_vars = [] if value_vars is None else _expand_selectors(self, value_vars)
+        id_vars = [] if id_vars is None else _expand_selectors(self, id_vars)
+
         return self._from_pydf(
             self._df.melt(id_vars, value_vars, value_name, variable_name)
         )

diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py
@@ -5375,7 +5375,11 @@ def interpolate(self) -> Self:
         """
         return self.select(F.col("*").interpolate())
 
-    def unnest(self, columns: str | Sequence[str], *more_columns: str) -> Self:
+    def unnest(
+        self,
+        columns: ColumnNameOrSelector | Collection[ColumnNameOrSelector],
+        *more_columns: ColumnNameOrSelector,
+    ) -> Self:
         """
         Decompose struct columns into separate columns for each of their fields.
 
@@ -5423,11 +5427,7 @@ def unnest(self, columns: str | Sequence[str], *more_columns: str) -> Self:
         └────────┴─────┴─────┴──────┴───────────┴───────┘
 
         """
-        if isinstance(columns, str):
-            columns = [columns]
-        if more_columns:
-            columns = list(columns)
-            columns.extend(more_columns)
+        columns = _expand_selectors(self, columns, *more_columns)
         return self._from_pyldf(self._ldf.unnest(columns))
 
     def merge_sorted(self, other: LazyFrame, key: str) -> Self:

diff --git a/py-polars/tests/unit/datatypes/test_struct.py b/py-polars/tests/unit/datatypes/test_struct.py
@@ -122,8 +122,11 @@ def test_struct_unnesting() -> None:
         }
     )
     for cols in ("foo", cs.ends_with("oo")):
-        out = df.unnest(cols)  # type: ignore[arg-type]
-        assert_frame_equal(out, expected)
+        out_eager = df.unnest(cols)  # type: ignore[arg-type]
+        assert_frame_equal(out_eager, expected)
+
+        out_lazy = df.lazy().unnest(cols)  # type: ignore[arg-type]
+        assert_frame_equal(out_lazy, expected.lazy())
 
     out = (
         df_base.lazy()

diff --git a/py-polars/tests/unit/operations/test_melt.py b/py-polars/tests/unit/operations/test_melt.py
@@ -6,8 +6,11 @@
 def test_melt() -> None:
     df = pl.DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]})
     for _idv, _vv in (("A", ("B", "C")), (cs.string(), cs.integer())):
-        melted = df.melt(id_vars="A", value_vars=["B", "C"])
-        assert all(melted["value"] == [1, 3, 5, 2, 4, 6])
+        melted_eager = df.melt(id_vars="A", value_vars=["B", "C"])
+        assert all(melted_eager["value"] == [1, 3, 5, 2, 4, 6])
+
+        melted_lazy = df.lazy().melt(id_vars="A", value_vars=["B", "C"])
+        assert all(melted_lazy.collect()["value"] == [1, 3, 5, 2, 4, 6])
 
     melted = df.melt(id_vars="A", value_vars="B")
     assert all(melted["value"] == [1, 3, 5])