From 5aa9209097dd5e4cd8e2905f6e2c78024dd0b201 Mon Sep 17 00:00:00 2001 From: Alex Chi Z Date: Sat, 14 Dec 2024 15:00:40 -0500 Subject: [PATCH] refactor(df-repr): remove cross join, use inner join (#265) Signed-off-by: Alex Chi Z --- optd-datafusion-bridge/src/from_optd.rs | 5 ++-- optd-datafusion-bridge/src/into_optd.rs | 2 +- .../src/adv_stats/join.rs | 7 ----- optd-datafusion-repr/src/plan_nodes/join.rs | 1 - .../src/properties/column_ref.rs | 2 +- optd-datafusion-repr/src/properties/schema.rs | 2 +- .../src/rules/filter_pushdown.rs | 26 ------------------- optd-datafusion-repr/src/rules/joins.rs | 2 +- .../src/rules/subquery/depjoin_pushdown.rs | 6 ++--- 9 files changed, 10 insertions(+), 43 deletions(-) diff --git a/optd-datafusion-bridge/src/from_optd.rs b/optd-datafusion-bridge/src/from_optd.rs index 6c4b47dd..263976bd 100644 --- a/optd-datafusion-bridge/src/from_optd.rs +++ b/optd-datafusion-bridge/src/from_optd.rs @@ -476,7 +476,9 @@ impl OptdPlanContext<'_> { let physical_expr = self.conv_from_optd_expr(node.cond(), &Arc::new(filter_schema.clone()))?; - if *node.join_type() == JoinType::Cross { + if node.join_type() == &JoinType::Inner + && node.cond() == ConstantPred::bool(true).into_pred_node() + { return Ok(Arc::new(CrossJoinExec::new(left_exec, right_exec)) as Arc); } @@ -491,7 +493,6 @@ impl OptdPlanContext<'_> { JoinType::LeftAnti => datafusion_expr::JoinType::LeftAnti, JoinType::RightAnti => datafusion_expr::JoinType::RightAnti, JoinType::LeftMark => datafusion_expr::JoinType::LeftMark, - _ => unimplemented!(), }; let mut column_idxs = vec![]; diff --git a/optd-datafusion-bridge/src/into_optd.rs b/optd-datafusion-bridge/src/into_optd.rs index 0b8537a1..1c1c9498 100644 --- a/optd-datafusion-bridge/src/into_optd.rs +++ b/optd-datafusion-bridge/src/into_optd.rs @@ -547,7 +547,7 @@ impl OptdPlanContext<'_> { left, right, ConstantPred::bool(true).into_pred_node(), - JoinType::Cross, + JoinType::Inner, )) } else if log_ops.len() == 1 { Ok(LogicalJoin::new(left, right, log_ops.remove(0), join_type)) diff --git a/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs b/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs index 0f0053e0..f50c7025 100644 --- a/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs +++ b/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs @@ -191,13 +191,6 @@ impl< JoinType::Inner => inner_join_selectivity, JoinType::LeftOuter => f64::max(inner_join_selectivity, 1.0 / right_row_cnt), JoinType::RightOuter => f64::max(inner_join_selectivity, 1.0 / left_row_cnt), - JoinType::Cross => { - assert!( - on_col_ref_pairs.is_empty(), - "Cross joins should not have on columns" - ); - join_filter_selectivity - } // TODO: Does this make sense? JoinType::LeftMark => f64::max(inner_join_selectivity, 1.0 / right_row_cnt), _ => unimplemented!("join_typ={} is not implemented", join_typ), diff --git a/optd-datafusion-repr/src/plan_nodes/join.rs b/optd-datafusion-repr/src/plan_nodes/join.rs index d506449f..ede3167d 100644 --- a/optd-datafusion-repr/src/plan_nodes/join.rs +++ b/optd-datafusion-repr/src/plan_nodes/join.rs @@ -15,7 +15,6 @@ pub enum JoinType { FullOuter, LeftOuter, RightOuter, - Cross, LeftSemi, RightSemi, LeftAnti, diff --git a/optd-datafusion-repr/src/properties/column_ref.rs b/optd-datafusion-repr/src/properties/column_ref.rs index 7846f2b7..594ead78 100644 --- a/optd-datafusion-repr/src/properties/column_ref.rs +++ b/optd-datafusion-repr/src/properties/column_ref.rs @@ -471,7 +471,7 @@ impl LogicalPropertyBuilder for ColumnRefPropertyBuilder { // // Otherwise be conservative and discard all correlations. let output_correlation = match join_type { - JoinType::Inner | JoinType::Cross => { + JoinType::Inner => { // Merge the equal columns in the join condition into the those from the // children. if let Some(SemanticCorrelation { diff --git a/optd-datafusion-repr/src/properties/schema.rs b/optd-datafusion-repr/src/properties/schema.rs index de9d2261..7ec4e432 100644 --- a/optd-datafusion-repr/src/properties/schema.rs +++ b/optd-datafusion-repr/src/properties/schema.rs @@ -180,7 +180,7 @@ impl LogicalPropertyBuilder for SchemaPropertyBuilder { DfNodeType::Join(join_type) => { use crate::plan_nodes::JoinType::*; match join_type { - Inner | LeftOuter | RightOuter | FullOuter | Cross => { + Inner | LeftOuter | RightOuter | FullOuter => { let mut schema = children[0].clone(); let schema2 = children[1].clone(); schema.fields.extend(schema2.fields); diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index 439bd28a..dd46734b 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -160,20 +160,6 @@ fn apply_filter_merge( vec![new_filter.into_plan_node().into()] } -// TODO: define_rule! should be able to match on any join type, ideally... -define_rule!( - FilterCrossJoinTransposeRule, - apply_filter_cross_join_transpose, - (Filter, (Join(JoinType::Cross), child_a, child_b)) -); - -fn apply_filter_cross_join_transpose( - optimizer: &impl Optimizer, - binding: ArcDfPlanNode, -) -> Vec> { - filter_join_transpose(optimizer, binding) -} - define_rule!( FilterInnerJoinTransposeRule, apply_filter_inner_join_transpose, @@ -256,18 +242,6 @@ fn filter_join_transpose( let new_conds = merge_conds(and_expr_list_to_expr(join_conds), old_cond); LogicalJoin::new_unchecked(new_left, new_right, new_conds, JoinType::Inner) } - JoinType::Cross => { - if !join_conds.is_empty() { - LogicalJoin::new_unchecked( - new_left, - new_right, - and_expr_list_to_expr(join_conds), - JoinType::Inner, - ) - } else { - LogicalJoin::new_unchecked(new_left, new_right, join_cond, JoinType::Cross) - } - } _ => { // We don't support modifying the join condition for other join types yet LogicalJoin::new_unchecked(new_left, new_right, join_cond, *join_typ) diff --git a/optd-datafusion-repr/src/rules/joins.rs b/optd-datafusion-repr/src/rules/joins.rs index 5a7fdbc3..ebc804ec 100644 --- a/optd-datafusion-repr/src/rules/joins.rs +++ b/optd-datafusion-repr/src/rules/joins.rs @@ -81,7 +81,7 @@ fn apply_eliminate_join( left, right, ConstantPred::bool(true).into_pred_node(), - JoinType::Cross, + JoinType::Inner, ); return vec![node.into_plan_node().into()]; } else { diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index 67a7164f..8f08be19 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -86,7 +86,7 @@ fn apply_dep_initial_distinct( left, right, ConstantPred::bool(true).into_pred_node(), - JoinType::Cross, + JoinType::Inner, ) .into_plan_node(), SubqueryType::Exists => { @@ -122,7 +122,7 @@ fn apply_dep_initial_distinct( left, count_star_to_bool_proj, ConstantPred::bool(true).into_pred_node(), - JoinType::Cross, + JoinType::Inner, ) .into_plan_node() } @@ -147,7 +147,7 @@ fn apply_dep_initial_distinct( left, right, ConstantPred::bool(true).into_pred_node(), - JoinType::Cross, + JoinType::Inner, ); return vec![new_join.into_plan_node().into()];