Skip to content

Commit

Permalink
fix: Fix order observability of group-by-dyn (#20615)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Jan 8, 2025
1 parent 92fd75d commit 94847c3
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 42 deletions.
10 changes: 5 additions & 5 deletions crates/polars-plan/src/plans/optimizer/set_order.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,6 @@ pub(super) fn set_order_flags(
..
} => {
debug_assert!(options.slice.is_none());
if !maintain_order_above && *maintain_order {
*maintain_order = false;
continue;
}

if apply.is_some()
|| *maintain_order
|| options.is_rolling()
Expand All @@ -136,6 +131,11 @@ pub(super) fn set_order_flags(
maintain_order_above = true;
continue;
}
if !maintain_order_above && *maintain_order {
*maintain_order = false;
continue;
}

if all_elementwise(keys, expr_arena)
&& all_order_independent(aggs, expr_arena, Context::Aggregation)
{
Expand Down
37 changes: 0 additions & 37 deletions py-polars/tests/unit/lazyframe/optimizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,6 @@
from polars.testing import assert_frame_equal


def test_remove_double_sort() -> None:
assert (
pl.LazyFrame({"a": [1, 2, 3, 3]}).sort("a").sort("a").explain().count("SORT")
== 1
)


def test_double_sort_maintain_order_18558() -> None:
df = pl.DataFrame(
{
"col1": [1, 2, 2, 4, 5, 6],
"col2": [2, 2, 0, 0, 2, None],
}
)

lf = df.lazy().sort("col2").sort("col1", maintain_order=True)

expect = pl.DataFrame(
[
pl.Series("col1", [1, 2, 2, 4, 5, 6], dtype=pl.Int64),
pl.Series("col2", [2, 0, 2, 0, 2, None], dtype=pl.Int64),
]
)

assert_frame_equal(lf.collect(), expect)


def test_fast_count_alias_18581() -> None:
f = io.BytesIO()
f.write(b"a,b,c\n1,2,3\n4,5,6")
Expand All @@ -40,13 +13,3 @@ def test_fast_count_alias_18581() -> None:
df = pl.scan_csv(f).select(pl.len().alias("weird_name")).collect()

assert_frame_equal(pl.DataFrame({"weird_name": 2}), df)


def test_order_observability() -> None:
q = pl.LazyFrame({"a": [1, 2, 3], "b": [1, 2, 3]}).sort("a")

assert "SORT" not in q.group_by("a").sum().explain(_check_order=True)
assert "SORT" not in q.group_by("a").min().explain(_check_order=True)
assert "SORT" not in q.group_by("a").max().explain(_check_order=True)
assert "SORT" in q.group_by("a").last().explain(_check_order=True)
assert "SORT" in q.group_by("a").first().explain(_check_order=True)
53 changes: 53 additions & 0 deletions py-polars/tests/unit/lazyframe/test_order_observability.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import polars as pl
from polars.testing import assert_frame_equal


def test_order_observability() -> None:
q = pl.LazyFrame({"a": [1, 2, 3], "b": [1, 2, 3]}).sort("a")

assert "SORT" not in q.group_by("a").sum().explain(_check_order=True)
assert "SORT" not in q.group_by("a").min().explain(_check_order=True)
assert "SORT" not in q.group_by("a").max().explain(_check_order=True)
assert "SORT" in q.group_by("a").last().explain(_check_order=True)
assert "SORT" in q.group_by("a").first().explain(_check_order=True)


def test_order_observability_group_by_dynamic() -> None:
assert (
pl.LazyFrame(
{"REGIONID": [1, 23, 4], "INTERVAL_END": [32, 43, 12], "POWER": [12, 3, 1]}
)
.sort("REGIONID", "INTERVAL_END")
.group_by_dynamic(index_column="INTERVAL_END", every="1i", group_by="REGIONID")
.agg(pl.col("POWER").sum())
.sort("POWER")
.head()
.explain()
).count("SORT") == 2


def test_remove_double_sort() -> None:
assert (
pl.LazyFrame({"a": [1, 2, 3, 3]}).sort("a").sort("a").explain().count("SORT")
== 1
)


def test_double_sort_maintain_order_18558() -> None:
df = pl.DataFrame(
{
"col1": [1, 2, 2, 4, 5, 6],
"col2": [2, 2, 0, 0, 2, None],
}
)

lf = df.lazy().sort("col2").sort("col1", maintain_order=True)

expect = pl.DataFrame(
[
pl.Series("col1", [1, 2, 2, 4, 5, 6], dtype=pl.Int64),
pl.Series("col2", [2, 0, 2, 0, 2, None], dtype=pl.Int64),
]
)

assert_frame_equal(lf.collect(), expect)

0 comments on commit 94847c3

Please sign in to comment.