From bb762f386b6ae8f4a92ede4a056dba3a85e93010 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Thu, 1 Aug 2024 10:35:36 +0200 Subject: [PATCH] fix: Fix Asof join by schema (#17988) --- crates/polars-plan/src/plans/schema.rs | 25 ++++++++----------- .../tests/unit/operations/test_join_asof.py | 15 +++++++++++ 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/crates/polars-plan/src/plans/schema.rs b/crates/polars-plan/src/plans/schema.rs index 0cfa854fb520..96e6c1b0d2c2 100644 --- a/crates/polars-plan/src/plans/schema.rs +++ b/crates/polars-plan/src/plans/schema.rs @@ -329,26 +329,23 @@ pub(crate) fn det_join_schema( join_on_right.insert(field.name); } - // Asof joins are special, if the names are equal they will not be coalesced. for (name, dtype) in schema_right.iter() { - if !join_on_right.contains(name.as_str()) || (!should_coalesce) - // The names that are joined on are merged - { - if schema_left.contains(name.as_str()) { - #[cfg(feature = "asof_join")] - if let JoinType::AsOf(asof_options) = &options.args.how { - if let (Some(left_by), Some(right_by)) = - (&asof_options.left_by, &asof_options.right_by) + if !join_on_right.contains(name.as_str()) || (!should_coalesce) { + // Asof join by columns are coalesced + #[cfg(feature = "asof_join")] + if let JoinType::AsOf(asof_options) = &options.args.how { + if let Some(right_by) = &asof_options.right_by { { - { - // Do not add suffix. The column of the left table will be used - if left_by.contains(name) && right_by.contains(name) { - continue; - } + // Do not add suffix. The column of the left table will be used + if right_by.contains(name) { + continue; } } } + } + // The names that are joined on are merged + if schema_left.contains(name.as_str()) { let new_name = format_smartstring!("{}{}", name, options.args.suffix()); new_schema.with_column(new_name, dtype.clone()); } else { diff --git a/py-polars/tests/unit/operations/test_join_asof.py b/py-polars/tests/unit/operations/test_join_asof.py index 59e6cefb8227..58b29207dac7 100644 --- a/py-polars/tests/unit/operations/test_join_asof.py +++ b/py-polars/tests/unit/operations/test_join_asof.py @@ -1180,3 +1180,18 @@ def test_join_as_of_by_schema() -> None: b = pl.DataFrame({"a": [1], "b": [2], "d": [4]}).lazy() q = a.join_asof(b, on=pl.col("a").set_sorted(), by="b") assert q.collect_schema().names() == q.collect().columns + + +def test_asof_join_by_schema() -> None: + # different `by` names. + df1 = pl.DataFrame({"on1": 0, "by1": 0}) + df2 = pl.DataFrame({"on1": 0, "by2": 0}) + + q = df1.lazy().join_asof( + df2.lazy(), + on="on1", + by_left="by1", + by_right="by2", + ) + + assert q.collect_schema() == q.collect().schema