Skip to content

Commit

Permalink
feat(python): column selector support for DataFrame.melt and `LazyF…
Browse files Browse the repository at this point in the history
…rame.unnest` (#11662)
  • Loading branch information
alexander-beedie authored Oct 11, 2023
1 parent 468dd7d commit 987afb8
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 26 deletions.
28 changes: 12 additions & 16 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7041,8 +7041,8 @@ def pivot(

def melt(
self,
id_vars: Sequence[str] | str | None = None,
value_vars: Sequence[str] | str | None = None,
id_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None,
value_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None,
variable_name: str | None = None,
value_name: str | None = None,
) -> Self:
Expand All @@ -7052,17 +7052,17 @@ def melt(
Optionally leaves identifiers set.
This function is useful to massage a DataFrame into a format where one or more
columns are identifier variables (id_vars), while all other columns, considered
measured variables (value_vars), are "unpivoted" to the row axis, leaving just
columns are identifier variables (id_vars) while all other columns, considered
measured variables (value_vars), are "unpivoted" to the row axis leaving just
two non-identifier columns, 'variable' and 'value'.
Parameters
----------
id_vars
Columns to use as identifier variables.
Column(s) or selector(s) to use as identifier variables.
value_vars
Values to use as identifier variables.
If `value_vars` is empty all columns that are not in `id_vars` will be used.
Column(s) or selector(s) to use as values variables; if `value_vars`
is empty all columns that are not in `id_vars` will be used.
variable_name
Name to give to the `variable` column. Defaults to "variable"
value_name
Expand All @@ -7077,7 +7077,8 @@ def melt(
... "c": [2, 4, 6],
... }
... )
>>> df.melt(id_vars="a", value_vars=["b", "c"])
>>> import polars.selectors as cs
>>> df.melt(id_vars="a", value_vars=cs.numeric())
shape: (6, 3)
┌─────┬──────────┬───────┐
│ a ┆ variable ┆ value │
Expand All @@ -7093,14 +7094,9 @@ def melt(
└─────┴──────────┴───────┘
"""
if isinstance(value_vars, str):
value_vars = [value_vars]
if isinstance(id_vars, str):
id_vars = [id_vars]
if value_vars is None:
value_vars = []
if id_vars is None:
id_vars = []
value_vars = [] if value_vars is None else _expand_selectors(self, value_vars)
id_vars = [] if id_vars is None else _expand_selectors(self, id_vars)

return self._from_pydf(
self._df.melt(id_vars, value_vars, value_name, variable_name)
)
Expand Down
12 changes: 6 additions & 6 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5375,7 +5375,11 @@ def interpolate(self) -> Self:
"""
return self.select(F.col("*").interpolate())

def unnest(self, columns: str | Sequence[str], *more_columns: str) -> Self:
def unnest(
self,
columns: ColumnNameOrSelector | Collection[ColumnNameOrSelector],
*more_columns: ColumnNameOrSelector,
) -> Self:
"""
Decompose struct columns into separate columns for each of their fields.
Expand Down Expand Up @@ -5423,11 +5427,7 @@ def unnest(self, columns: str | Sequence[str], *more_columns: str) -> Self:
└────────┴─────┴─────┴──────┴───────────┴───────┘
"""
if isinstance(columns, str):
columns = [columns]
if more_columns:
columns = list(columns)
columns.extend(more_columns)
columns = _expand_selectors(self, columns, *more_columns)
return self._from_pyldf(self._ldf.unnest(columns))

def merge_sorted(self, other: LazyFrame, key: str) -> Self:
Expand Down
7 changes: 5 additions & 2 deletions py-polars/tests/unit/datatypes/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,11 @@ def test_struct_unnesting() -> None:
}
)
for cols in ("foo", cs.ends_with("oo")):
out = df.unnest(cols) # type: ignore[arg-type]
assert_frame_equal(out, expected)
out_eager = df.unnest(cols) # type: ignore[arg-type]
assert_frame_equal(out_eager, expected)

out_lazy = df.lazy().unnest(cols) # type: ignore[arg-type]
assert_frame_equal(out_lazy, expected.lazy())

out = (
df_base.lazy()
Expand Down
7 changes: 5 additions & 2 deletions py-polars/tests/unit/operations/test_melt.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@
def test_melt() -> None:
df = pl.DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]})
for _idv, _vv in (("A", ("B", "C")), (cs.string(), cs.integer())):
melted = df.melt(id_vars="A", value_vars=["B", "C"])
assert all(melted["value"] == [1, 3, 5, 2, 4, 6])
melted_eager = df.melt(id_vars="A", value_vars=["B", "C"])
assert all(melted_eager["value"] == [1, 3, 5, 2, 4, 6])

melted_lazy = df.lazy().melt(id_vars="A", value_vars=["B", "C"])
assert all(melted_lazy.collect()["value"] == [1, 3, 5, 2, 4, 6])

melted = df.melt(id_vars="A", value_vars="B")
assert all(melted["value"] == [1, 3, 5])
Expand Down

0 comments on commit 987afb8

Please sign in to comment.