From 0b8be40ccee5c5dcef43db15bd43a77d228afbca Mon Sep 17 00:00:00 2001 From: Weijie Guo Date: Fri, 20 Oct 2023 13:03:07 +0800 Subject: [PATCH] fix: fix project pushdown for double projection contains count (#11843) --- .../projection_pushdown/projection.rs | 52 ++++++++++++------- py-polars/tests/unit/test_projections.py | 6 +++ 2 files changed, 40 insertions(+), 18 deletions(-) diff --git a/crates/polars-plan/src/logical_plan/optimizer/projection_pushdown/projection.rs b/crates/polars-plan/src/logical_plan/optimizer/projection_pushdown/projection.rs index e49ba5cb5642..8cf418011888 100644 --- a/crates/polars-plan/src/logical_plan/optimizer/projection_pushdown/projection.rs +++ b/crates/polars-plan/src/logical_plan/optimizer/projection_pushdown/projection.rs @@ -8,6 +8,31 @@ fn is_count(node: Node, expr_arena: &Arena) -> bool { } } +/// In this function we check a double projection case +/// df +/// .select(col("foo").alias("bar")) +/// .select(col("bar") +/// +/// In this query, bar cannot pass this projection, as it would not exist in DF. +/// THE ORDER IS IMPORTANT HERE! +/// this removes projection names, so any checks to upstream names should +/// be done before this branch. +fn check_double_projection( + expr: &Node, + expr_arena: &mut Arena, + acc_projections: &mut Vec, + projected_names: &mut PlHashSet>, +) { + for (_, ae) in (&*expr_arena).iter(*expr) { + if let AExpr::Alias(_, name) = ae { + if projected_names.remove(name) { + acc_projections + .retain(|expr| !aexpr_to_leaf_names(*expr, expr_arena).contains(name)); + } + } + } +} + #[allow(clippy::too_many_arguments)] pub(super) fn process_projection( proj_pd: &mut ProjectionPushDown, @@ -29,6 +54,14 @@ pub(super) fn process_projection( // simply select the first column let (first_name, _) = input_schema.try_get_at_index(0)?; let expr = expr_arena.add(AExpr::Column(Arc::from(first_name.as_str()))); + if !acc_projections.is_empty() { + check_double_projection( + &exprs[0], + expr_arena, + &mut acc_projections, + &mut projected_names, + ); + } add_expr_to_accumulated(expr, &mut acc_projections, &mut projected_names, expr_arena); local_projection.push(exprs[0]); } else { @@ -48,24 +81,7 @@ pub(super) fn process_projection( continue; } - // in this branch we check a double projection case - // df - // .select(col("foo").alias("bar")) - // .select(col("bar") - // - // In this query, bar cannot pass this projection, as it would not exist in DF. - // THE ORDER IS IMPORTANT HERE! - // this removes projection names, so any checks to upstream names should - // be done before this branch. - for (_, ae) in (&*expr_arena).iter(*e) { - if let AExpr::Alias(_, name) = ae { - if projected_names.remove(name) { - acc_projections.retain(|expr| { - !aexpr_to_leaf_names(*expr, expr_arena).contains(name) - }); - } - } - } + check_double_projection(e, expr_arena, &mut acc_projections, &mut projected_names); } // do local as we still need the effect of the projection // e.g. a projection is more than selecting a column, it can diff --git a/py-polars/tests/unit/test_projections.py b/py-polars/tests/unit/test_projections.py index 2e276b837b01..85dc01ce006d 100644 --- a/py-polars/tests/unit/test_projections.py +++ b/py-polars/tests/unit/test_projections.py @@ -320,3 +320,9 @@ def test_projection_rename_10595() -> None: assert lf.select("a", "b").rename({"b": "a", "a": "b"}).select( "a" ).collect().schema == {"a": pl.Float32} + + +def test_projection_count_11841() -> None: + pl.LazyFrame({"x": 1}).select(records=pl.count()).select( + pl.lit(1).alias("x"), pl.all() + ).collect()