Skip to content

Commit

Permalink
revert: Revert change to left join default coalesce behavior (#16979)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored Jun 16, 2024
1 parent 4ce4c6b commit 6e57c84
Show file tree
Hide file tree
Showing 19 changed files with 40 additions and 117 deletions.
14 changes: 2 additions & 12 deletions crates/polars-lazy/src/tests/cse.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
use std::collections::BTreeSet;

use polars_ops::prelude::JoinCoalesce;

use super::*;

fn cached_before_root(q: LazyFrame) {
Expand Down Expand Up @@ -200,11 +198,7 @@ fn test_cse_joins_4954() -> PolarsResult<()> {
b,
&[col("a"), col("b")],
&[col("a"), col("b")],
JoinArgs {
how: JoinType::Left,
coalesce: JoinCoalesce::CoalesceColumns,
..Default::default()
},
JoinType::Left.into(),
);

let (mut expr_arena, mut lp_arena) = get_arenas();
Expand Down Expand Up @@ -316,11 +310,7 @@ fn test_cse_columns_projections() -> PolarsResult<()> {
right.rename(["B"], ["C"]),
[col("A"), col("C")],
[col("A"), col("C")],
JoinArgs {
how: JoinType::Left,
coalesce: JoinCoalesce::CoalesceColumns,
..Default::default()
},
JoinType::Left.into(),
);

let out = q.collect()?;
Expand Down
14 changes: 2 additions & 12 deletions crates/polars-lazy/src/tests/optimization_checks.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use polars_ops::prelude::JoinCoalesce;

use super::*;

#[cfg(feature = "parquet")]
Expand Down Expand Up @@ -156,11 +154,7 @@ fn test_no_left_join_pass() -> PolarsResult<()> {
df2.lazy(),
[col("idx1")],
[col("idx2")],
JoinArgs {
how: JoinType::Left,
coalesce: JoinCoalesce::CoalesceColumns,
..Default::default()
},
JoinType::Left.into(),
)
.filter(col("bar").eq(lit(5i32)))
.collect()?;
Expand Down Expand Up @@ -208,11 +202,7 @@ pub fn test_slice_pushdown_join() -> PolarsResult<()> {
q2,
[col("category")],
[col("category")],
JoinArgs {
how: JoinType::Left,
coalesce: JoinCoalesce::CoalesceColumns,
..Default::default()
},
JoinType::Left.into(),
)
.slice(1, 3)
// this inserts a cache and blocks slice pushdown
Expand Down
24 changes: 2 additions & 22 deletions crates/polars-lazy/src/tests/predicate_queries.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use polars_ops::prelude::JoinCoalesce;

use super::*;

#[test]
Expand Down Expand Up @@ -181,16 +179,7 @@ fn test_filter_nulls_created_by_join() -> PolarsResult<()> {
let out = a
.clone()
.lazy()
.join(
b.clone(),
[col("key")],
[col("key")],
JoinArgs {
how: JoinType::Left,
coalesce: JoinCoalesce::CoalesceColumns,
..Default::default()
},
)
.join(b.clone(), [col("key")], [col("key")], JoinType::Left.into())
.filter(col("flag").is_null())
.collect()?;
let expected = df![
Expand All @@ -202,16 +191,7 @@ fn test_filter_nulls_created_by_join() -> PolarsResult<()> {

let out = a
.lazy()
.join(
b,
[col("key")],
[col("key")],
JoinArgs {
how: JoinType::Left,
coalesce: JoinCoalesce::CoalesceColumns,
..Default::default()
},
)
.join(b, [col("key")], [col("key")], JoinType::Left.into())
.filter(col("flag").is_null())
.with_predicate_pushdown(false)
.collect()?;
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-lazy/src/tests/streaming.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ fn test_streaming_aggregate_join() -> PolarsResult<()> {
let q = q.clone().left_join(q, col("sugars_g"), col("sugars_g"));
let q1 = q.with_streaming(true);
let out_streaming = q1.collect()?;
assert_eq!(out_streaming.shape(), (3, 4));
assert_eq!(out_streaming.shape(), (3, 3));
Ok(())
}

Expand Down
4 changes: 2 additions & 2 deletions crates/polars-ops/src/frame/join/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ impl JoinCoalesce {
use JoinCoalesce::*;
use JoinType::*;
match join_type {
Inner => {
Left | Inner => {
matches!(self, JoinSpecific | CoalesceColumns)
},
Left | Full { .. } => {
Full { .. } => {
matches!(self, CoalesceColumns)
},
#[cfg(feature = "asof_join")]
Expand Down
1 change: 0 additions & 1 deletion crates/polars-ops/src/series/ops/replace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ fn replace_by_multiple(
["__POLARS_REPLACE_OLD"],
JoinArgs {
how: JoinType::Left,
coalesce: JoinCoalesce::CoalesceColumns,
join_nulls: true,
..Default::default()
},
Expand Down
6 changes: 1 addition & 5 deletions crates/polars-time/src/upsample.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,11 +215,7 @@ fn upsample_single_impl(
source,
&[index_col_name],
&[index_col_name],
JoinArgs {
how: JoinType::Left,
coalesce: JoinCoalesce::CoalesceColumns,
..Default::default()
},
JoinArgs::new(JoinType::Left),
)
},
_ => polars_bail!(
Expand Down
11 changes: 1 addition & 10 deletions crates/polars/tests/it/chunks/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,7 @@ fn test_cast_join_14872() {
let df2 = ParquetReader::new(buf).finish().unwrap();

let out = df1
.join(
&df2,
["ints"],
["ints"],
JoinArgs {
how: JoinType::Left,
coalesce: JoinCoalesce::CoalesceColumns,
..Default::default()
},
)
.join(&df2, ["ints"], ["ints"], JoinArgs::new(JoinType::Left))
.unwrap();

let expected = df![
Expand Down
11 changes: 3 additions & 8 deletions crates/polars/tests/it/core/joins.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,7 @@ fn test_chunked_left_join() -> PolarsResult<()> {
&band_members,
["name"],
["name"],
JoinArgs {
how: JoinType::Left,
coalesce: JoinCoalesce::CoalesceColumns,
..Default::default()
},
JoinArgs::new(JoinType::Left),
)?;
let expected = df![
"name" => ["john", "paul", "keith"],
Expand Down Expand Up @@ -290,7 +286,7 @@ fn test_join_categorical() {
let out = df_a
.join(&df_b, ["b"], ["bar"], JoinType::Left.into())
.unwrap();
assert_eq!(out.shape(), (6, 6));
assert_eq!(out.shape(), (6, 5));
let correct_ham = &[
Some("let"),
None,
Expand Down Expand Up @@ -380,7 +376,7 @@ fn test_empty_df_join() -> PolarsResult<()> {
])?;

let out = df.left_join(&empty_df, ["key"], ["key"])?;
assert_eq!(out.shape(), (2, 5));
assert_eq!(out.shape(), (2, 4));

Ok(())
}
Expand All @@ -402,7 +398,6 @@ fn test_unit_df_join() -> PolarsResult<()> {
let expected = df![
"a" => [1],
"b" => [2],
"a_right" => [1],
"b_right" => [1]
]?;
assert!(out.equals(&expected));
Expand Down
2 changes: 1 addition & 1 deletion crates/polars/tests/it/lazy/predicate_queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ fn test_filter_block_join() -> PolarsResult<()> {
// mean is influence by join
.filter(col("c").mean().eq(col("d")))
.collect()?;
assert_eq!(out.shape(), (1, 4));
assert_eq!(out.shape(), (1, 3));

Ok(())
}
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/datatypes/test_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def test_null_obj_str_13512() -> None:
)
df2 = pl.DataFrame({"key": [2], "a": pl.Series([1], dtype=pl.Object)})

out = df1.join(df2, on="key", how="left", coalesce=True)
out = df1.join(df2, on="key", how="left")
s = str(out)
assert s == (
"shape: (1, 2)\n"
Expand Down
1 change: 0 additions & 1 deletion py-polars/tests/unit/io/test_hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ def test_hive_partitioned_predicate_pushdown_skips_correct_number_of_files(
expected = {
"a": [3, 4],
"d": [3, 4],
"a_right": [3, 4],
"d_right": [3, 4],
}
assert result.to_dict(as_series=False) == expected
Expand Down
7 changes: 2 additions & 5 deletions py-polars/tests/unit/operations/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,6 @@ def test_join_chunks_alignment_4720() -> None:
df3,
on=["index1", "index2", "index3"],
how="left",
coalesce=True,
)
).to_dict(as_series=False) == {
"index1": [0, 0, 1, 1],
Expand All @@ -293,7 +292,6 @@ def test_join_chunks_alignment_4720() -> None:
df3,
on=["index3", "index1", "index2"],
how="left",
coalesce=True,
)
).to_dict(as_series=False) == {
"index1": [0, 0, 1, 1],
Expand Down Expand Up @@ -328,7 +326,7 @@ def test_jit_sort_joins() -> None:
pd_result.columns = pd.Index(["a", "b", "b_right"])

# left key sorted right is not
pl_result = dfa_pl.join(dfb_pl, on="a", how=how, coalesce=True).sort(
pl_result = dfa_pl.join(dfb_pl, on="a", how=how).sort(
["a", "b"], maintain_order=True
)

Expand All @@ -343,7 +341,7 @@ def test_jit_sort_joins() -> None:
# left key sorted right is not
pd_result = dfb.merge(dfa, on="a", how=how)
pd_result.columns = pd.Index(["a", "b", "b_right"])
pl_result = dfb_pl.join(dfa_pl, on="a", how=how, coalesce=True).sort(
pl_result = dfb_pl.join(dfa_pl, on="a", how=how).sort(
["a", "b"], maintain_order=True
)

Expand Down Expand Up @@ -591,7 +589,6 @@ def test_join_sorted_fast_paths_null() -> None:
}
assert df1.join(df2, on="x", how="left").to_dict(as_series=False) == {
"x": [0, 0, 1],
"x_right": [0, 0, None],
"y": [0, 0, None],
}
assert df1.join(df2, on="x", how="anti").to_dict(as_series=False) == {"x": [1]}
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/operations/test_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def test_sorted_join_and_dtypes(dtype: pl.PolarsDataType) -> None:
"a": [-2, 3, 3, 10],
}

result_left = df_a.join(df_b, on="a", how="left", coalesce=True)
result_left = df_a.join(df_b, on="a", how="left")
assert result_left.to_dict(as_series=False) == {
"index": [0, 1, 2, 3, 4, 5],
"a": [-5, -2, 3, 3, 9, 10],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ def test_streaming_cat_14933() -> None:
pl.Series("l", [None, None], dtype=pl.Categorical(ordering="physical")),
]
)
result = df1.join(df2, on="a", how="left", coalesce=True)
result = df1.join(df2, on="a", how="left")
expected = {"a": [0], "l": [None]}
assert result.collect(streaming=True).to_dict(as_series=False) == expected
18 changes: 5 additions & 13 deletions py-polars/tests/unit/streaming/test_streaming_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def test_streaming_joins() -> None:

pl_result = (
dfa_pl.lazy()
.join(dfb_pl.lazy(), on="a", how=how, coalesce=True)
.join(dfb_pl.lazy(), on="a", how=how)
.sort(["a", "b"], maintain_order=True)
.collect(streaming=True)
)
Expand All @@ -92,7 +92,7 @@ def test_streaming_joins() -> None:

pl_result = (
dfa_pl.lazy()
.join(dfb_pl.lazy(), on=["a", "b"], how=how, coalesce=True)
.join(dfb_pl.lazy(), on=["a", "b"], how=how)
.sort(["a", "b"])
.collect(streaming=True)
)
Expand Down Expand Up @@ -172,16 +172,10 @@ def test_join_null_matches(streaming: bool) -> None:

# Left outer
expected = pl.DataFrame(
{
"idx_a": [0, 1, 2],
"a": [None, 1, 2],
"idx_b": [None, 2, 1],
"a_right": [None, 1, 2],
}
{"idx_a": [0, 1, 2], "a": [None, 1, 2], "idx_b": [None, 2, 1]}
)
assert_frame_equal(
df_a.join(df_b, on="a", how="left").collect(streaming=streaming),
expected,
df_a.join(df_b, on="a", how="left").collect(streaming=streaming), expected
)
# Full outer
expected = pl.DataFrame(
Expand Down Expand Up @@ -221,9 +215,7 @@ def test_join_null_matches_multiple_keys(streaming: bool) -> None:
{"a": [None, 1, 2], "idx": [0, 1, 2], "c": [None, 50, None]}
)
assert_frame_equal(
df_a.join(df_b, on=["a", "idx"], how="left", coalesce=True).collect(
streaming=streaming
),
df_a.join(df_b, on=["a", "idx"], how="left").collect(streaming=streaming),
expected,
)

Expand Down
21 changes: 11 additions & 10 deletions py-polars/tests/unit/test_cse.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ def test_cse_rename_cross_join_5405() -> None:
right = pl.DataFrame({"A": [1, 2], "B": [3, 4], "D": [5, 6]}).lazy()
left = pl.DataFrame({"C": [3, 4]}).lazy().join(right.select("A"), how="cross")

result = left.join(
right.rename({"B": "C"}), on=["A", "C"], how="left", coalesce=True
).collect(comm_subplan_elim=True)
result = left.join(right.rename({"B": "C"}), on=["A", "C"], how="left").collect(
comm_subplan_elim=True
)

expected = pl.DataFrame(
{
Expand All @@ -52,8 +52,9 @@ def test_union_duplicates() -> None:
assert result


# https://github.com/pola-rs/polars/issues/11116
def test_cse_with_struct_expr_11116() -> None:
# https://github.com/pola-rs/polars/issues/11116

df = pl.DataFrame([{"s": {"a": 1, "b": 4}, "c": 3}]).lazy()

result = df.with_columns(
Expand Down Expand Up @@ -94,9 +95,9 @@ def test_cse_schema_6081() -> None:
pl.col("value").min().alias("min_value")
)

result = df.join(
min_value_by_group, on=["date", "id"], how="left", coalesce=True
).collect(comm_subplan_elim=True, projection_pushdown=True)
result = df.join(min_value_by_group, on=["date", "id"], how="left").collect(
comm_subplan_elim=True, projection_pushdown=True
)
expected = pl.DataFrame(
{
"date": [date(2022, 12, 12), date(2022, 12, 12), date(2022, 12, 13)],
Expand Down Expand Up @@ -128,9 +129,9 @@ def test_cse_9630() -> None:
intersected_df1 = all_subsections.join(lf1, on="key")
intersected_df2 = all_subsections.join(lf2, on="key")

result = intersected_df1.join(
intersected_df2, on=["key"], how="left", coalesce=True
).collect(comm_subplan_elim=True)
result = intersected_df1.join(intersected_df2, on=["key"], how="left").collect(
comm_subplan_elim=True
)

expected = pl.DataFrame(
{
Expand Down
Loading

0 comments on commit 6e57c84

Please sign in to comment.