diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 175e1028dbb6..312594fac845 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -7041,8 +7041,8 @@ def pivot( def melt( self, - id_vars: Sequence[str] | str | None = None, - value_vars: Sequence[str] | str | None = None, + id_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, + value_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, variable_name: str | None = None, value_name: str | None = None, ) -> Self: @@ -7052,17 +7052,17 @@ def melt( Optionally leaves identifiers set. This function is useful to massage a DataFrame into a format where one or more - columns are identifier variables (id_vars), while all other columns, considered - measured variables (value_vars), are "unpivoted" to the row axis, leaving just + columns are identifier variables (id_vars) while all other columns, considered + measured variables (value_vars), are "unpivoted" to the row axis leaving just two non-identifier columns, 'variable' and 'value'. Parameters ---------- id_vars - Columns to use as identifier variables. + Column(s) or selector(s) to use as identifier variables. value_vars - Values to use as identifier variables. - If `value_vars` is empty all columns that are not in `id_vars` will be used. + Column(s) or selector(s) to use as values variables; if `value_vars` + is empty all columns that are not in `id_vars` will be used. variable_name Name to give to the `variable` column. Defaults to "variable" value_name @@ -7077,7 +7077,8 @@ def melt( ... "c": [2, 4, 6], ... } ... ) - >>> df.melt(id_vars="a", value_vars=["b", "c"]) + >>> import polars.selectors as cs + >>> df.melt(id_vars="a", value_vars=cs.numeric()) shape: (6, 3) ┌─────┬──────────┬───────┐ │ a ┆ variable ┆ value │ @@ -7093,14 +7094,9 @@ def melt( └─────┴──────────┴───────┘ """ - if isinstance(value_vars, str): - value_vars = [value_vars] - if isinstance(id_vars, str): - id_vars = [id_vars] - if value_vars is None: - value_vars = [] - if id_vars is None: - id_vars = [] + value_vars = [] if value_vars is None else _expand_selectors(self, value_vars) + id_vars = [] if id_vars is None else _expand_selectors(self, id_vars) + return self._from_pydf( self._df.melt(id_vars, value_vars, value_name, variable_name) ) diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 2a01752a789e..7ed78e610b16 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -5375,7 +5375,11 @@ def interpolate(self) -> Self: """ return self.select(F.col("*").interpolate()) - def unnest(self, columns: str | Sequence[str], *more_columns: str) -> Self: + def unnest( + self, + columns: ColumnNameOrSelector | Collection[ColumnNameOrSelector], + *more_columns: ColumnNameOrSelector, + ) -> Self: """ Decompose struct columns into separate columns for each of their fields. @@ -5423,11 +5427,7 @@ def unnest(self, columns: str | Sequence[str], *more_columns: str) -> Self: └────────┴─────┴─────┴──────┴───────────┴───────┘ """ - if isinstance(columns, str): - columns = [columns] - if more_columns: - columns = list(columns) - columns.extend(more_columns) + columns = _expand_selectors(self, columns, *more_columns) return self._from_pyldf(self._ldf.unnest(columns)) def merge_sorted(self, other: LazyFrame, key: str) -> Self: diff --git a/py-polars/tests/unit/datatypes/test_struct.py b/py-polars/tests/unit/datatypes/test_struct.py index ffbd56836d55..b372be41f045 100644 --- a/py-polars/tests/unit/datatypes/test_struct.py +++ b/py-polars/tests/unit/datatypes/test_struct.py @@ -122,8 +122,11 @@ def test_struct_unnesting() -> None: } ) for cols in ("foo", cs.ends_with("oo")): - out = df.unnest(cols) # type: ignore[arg-type] - assert_frame_equal(out, expected) + out_eager = df.unnest(cols) # type: ignore[arg-type] + assert_frame_equal(out_eager, expected) + + out_lazy = df.lazy().unnest(cols) # type: ignore[arg-type] + assert_frame_equal(out_lazy, expected.lazy()) out = ( df_base.lazy() diff --git a/py-polars/tests/unit/operations/test_melt.py b/py-polars/tests/unit/operations/test_melt.py index 104e5d12dc02..12c12c45a581 100644 --- a/py-polars/tests/unit/operations/test_melt.py +++ b/py-polars/tests/unit/operations/test_melt.py @@ -6,8 +6,11 @@ def test_melt() -> None: df = pl.DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]}) for _idv, _vv in (("A", ("B", "C")), (cs.string(), cs.integer())): - melted = df.melt(id_vars="A", value_vars=["B", "C"]) - assert all(melted["value"] == [1, 3, 5, 2, 4, 6]) + melted_eager = df.melt(id_vars="A", value_vars=["B", "C"]) + assert all(melted_eager["value"] == [1, 3, 5, 2, 4, 6]) + + melted_lazy = df.lazy().melt(id_vars="A", value_vars=["B", "C"]) + assert all(melted_lazy.collect()["value"] == [1, 3, 5, 2, 4, 6]) melted = df.melt(id_vars="A", value_vars="B") assert all(melted["value"] == [1, 3, 5])