Skip to content

Commit

Permalink
refactor(df-repr): adjust filter cost to prefer hash join (#265)
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Chi Z <iskyzh@gmail.com>
  • Loading branch information
skyzh committed Dec 18, 2024
1 parent e67776b commit 483981c
Show file tree
Hide file tree
Showing 10 changed files with 189 additions and 202 deletions.
3 changes: 2 additions & 1 deletion optd-core/src/cascades/memo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,8 @@ impl<T: NodeType> NaiveMemo<T> {
}

fn verify_integrity(&self) {
if false {
const ENABLE_INTEGRITY_CHECK: bool = false;
if ENABLE_INTEGRITY_CHECK {
let num_of_exprs = self.expr_id_to_expr_node.len();
assert_eq!(num_of_exprs, self.expr_node_to_expr_id.len());
assert_eq!(num_of_exprs, self.expr_id_to_group_id.len());
Expand Down
5 changes: 2 additions & 3 deletions optd-datafusion-repr/src/cost/base_cost.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,12 @@ impl CostModel<DfNodeType, NaiveMemo<DfNodeType>> for DfCostModel {
}
DfNodeType::PhysicalLimit => {
let row_cnt = Self::row_cnt(children[0]);
let selectivity = 0.001;
Self::stat((row_cnt * selectivity).max(1.0))
Self::stat(row_cnt.max(1.0))
}
DfNodeType::PhysicalEmptyRelation => Self::stat(0.01),
DfNodeType::PhysicalFilter => {
let row_cnt = Self::row_cnt(children[0]);
let selectivity = 0.001;
let selectivity = 0.01;
Self::stat((row_cnt * selectivity).max(1.0))
}
DfNodeType::PhysicalNestedLoopJoin(_) => {
Expand Down
3 changes: 1 addition & 2 deletions optd-datafusion-repr/src/memo_ext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ fn enumerate_join_order_expr_inner<M: Memo<DfNodeType> + ?Sized>(
.take(MAX_JOIN_ORDER_OUTPUT)
.map(|x| (*x).clone())
.collect_vec()
.into()
}
_ => Vec::new(),
}
Expand Down Expand Up @@ -139,7 +138,7 @@ impl<M: Memo<DfNodeType>> MemoExt for M {
let mut visited = HashMap::new();
enumerate_join_order_group_inner(self, entry, &mut visited, &mut false)
.iter()
.map(|x| x.clone())
.cloned()
.collect()
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,14 @@ LogicalProjection { exprs: [ #0, #1 ] }
├── LogicalAgg { exprs: [], groups: [ #0 ] }
│ └── LogicalScan { table: t1 }
└── LogicalScan { table: t2 }
PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=4033008,io=4000}, stat: {row_cnt=1} }
└── PhysicalProjection { exprs: [ #0, #1, #2, #4 ], cost: {compute=4033005,io=4000}, stat: {row_cnt=1} }
PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=4033080,io=4000}, stat: {row_cnt=10} }
└── PhysicalProjection { exprs: [ #0, #1, #2, #4 ], cost: {compute=4033050,io=4000}, stat: {row_cnt=10} }
└── PhysicalFilter
├── cond:Gt
│ ├── #4
│ └── 100(i64)
├── cost: {compute=4033000,io=4000}
├── stat: {row_cnt=1}
├── stat: {row_cnt=10}
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=4030000,io=4000}, stat: {row_cnt=1000} }
├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
└── PhysicalNestedLoopJoin
Expand Down Expand Up @@ -168,14 +168,14 @@ LogicalProjection { exprs: [ #0, #1 ] }
├── LogicalAgg { exprs: [], groups: [ #0 ] }
│ └── LogicalScan { table: t1 }
└── LogicalScan { table: t2 }
PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=44228008,io=5000}, stat: {row_cnt=1} }
└── PhysicalProjection { exprs: [ #0, #1, #2, #4 ], cost: {compute=44228005,io=5000}, stat: {row_cnt=1} }
PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=44228080,io=5000}, stat: {row_cnt=10} }
└── PhysicalProjection { exprs: [ #0, #1, #2, #4 ], cost: {compute=44228050,io=5000}, stat: {row_cnt=10} }
└── PhysicalFilter
├── cond:Gt
│ ├── #4
│ └── 100(i64)
├── cost: {compute=44228000,io=5000}
├── stat: {row_cnt=1}
├── stat: {row_cnt=10}
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=44225000,io=5000}, stat: {row_cnt=1000} }
├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
└── PhysicalNestedLoopJoin
Expand Down Expand Up @@ -359,14 +359,14 @@ LogicalProjection { exprs: [ #0, #1 ] }
└── LogicalJoin { join_type: Inner, cond: true }
├── LogicalScan { table: t2 }
└── LogicalScan { table: t3 }
PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=4036008,io=5000}, stat: {row_cnt=1} }
└── PhysicalProjection { exprs: [ #0, #1, #2, #4 ], cost: {compute=4036005,io=5000}, stat: {row_cnt=1} }
PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=4036080,io=5000}, stat: {row_cnt=10} }
└── PhysicalProjection { exprs: [ #0, #1, #2, #4 ], cost: {compute=4036050,io=5000}, stat: {row_cnt=10} }
└── PhysicalFilter
├── cond:Gt
│ ├── #4
│ └── 100(i64)
├── cost: {compute=4036000,io=5000}
├── stat: {row_cnt=1}
├── stat: {row_cnt=10}
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=4033000,io=5000}, stat: {row_cnt=1000} }
├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
└── PhysicalNestedLoopJoin
Expand Down
80 changes: 40 additions & 40 deletions optd-sqlplannertest/tests/tpch/q11.planner.sql
Original file line number Diff line number Diff line change
Expand Up @@ -92,49 +92,49 @@ PhysicalSort
├── exprs:SortOrder { order: Desc }
│ └── #1
└── PhysicalProjection { exprs: [ #0, #1 ] }
└── PhysicalNestedLoopJoin
├── join_type: Inner
└── PhysicalFilter
├── cond:Gt
│ ├── Cast { cast_to: Decimal128(38, 15), child: #1 }
│ └── #2
├── PhysicalAgg
│ ├── aggrs:Agg(Sum)
│ │ └── Mul
│ │ ├── #3
│ │ └── Cast { cast_to: Decimal128(10, 0), child: #2 }
│ ├── groups: [ #0 ]
│ └── PhysicalProjection { exprs: [ #11, #12, #13, #14, #15, #4, #5, #6, #7, #8, #9, #10, #0, #1, #2, #3 ] }
│ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #4 ], right_keys: [ #1 ] }
│ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #3 ] }
│ │ ├── PhysicalFilter
│ │ │ ├── cond:Eq
│ │ │ │ ├── #1
│ │ │ │ └── "CHINA"
│ │ │ └── PhysicalScan { table: nation }
│ │ └── PhysicalScan { table: supplier }
│ └── PhysicalScan { table: partsupp }
└── PhysicalProjection
├── exprs:Cast
│ ├── cast_to: Decimal128(38, 15)
│ ├── child:Mul
│ │ ├── Cast { cast_to: Float64, child: #0 }
│ │ └── 0.0001(float)
└── PhysicalNestedLoopJoin { join_type: Inner, cond: true }
├── PhysicalAgg
│ ├── aggrs:Agg(Sum)
│ │ └── Mul
│ │ ├── #3
│ │ └── Cast { cast_to: Decimal128(10, 0), child: #2 }
│ ├── groups: [ #0 ]
│ └── PhysicalProjection { exprs: [ #11, #12, #13, #14, #15, #4, #5, #6, #7, #8, #9, #10, #0, #1, #2, #3 ] }
│ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #4 ], right_keys: [ #1 ] }
│ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #3 ] }
│ │ ├── PhysicalFilter
│ │ │ ├── cond:Eq
│ │ │ │ ├── #1
│ │ │ │ └── "CHINA"
│ │ │ └── PhysicalScan { table: nation }
│ │ └── PhysicalScan { table: supplier }
│ └── PhysicalScan { table: partsupp }
└── PhysicalProjection
├── exprs:Cast
│ ├── cast_to: Decimal128(38, 15)
│ ├── child:Mul
│ │ ├── Cast { cast_to: Float64, child: #0 }
│ │ └── 0.0001(float)
└── PhysicalAgg
├── aggrs:Agg(Sum)
│ └── Mul
│ ├── #3
│ └── Cast { cast_to: Decimal128(10, 0), child: #2 }
├── groups: []
└── PhysicalProjection { exprs: [ #11, #12, #13, #14, #15, #4, #5, #6, #7, #8, #9, #10, #0, #1, #2, #3 ] }
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #4 ], right_keys: [ #1 ] }
├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #3 ] }
│ ├── PhysicalFilter
│ │ ├── cond:Eq
│ │ │ ├── #1
│ │ │ └── "CHINA"
│ │ └── PhysicalScan { table: nation }
│ └── PhysicalScan { table: supplier }
└── PhysicalScan { table: partsupp }
└── PhysicalAgg
├── aggrs:Agg(Sum)
│ └── Mul
│ ├── #3
│ └── Cast { cast_to: Decimal128(10, 0), child: #2 }
├── groups: []
└── PhysicalProjection { exprs: [ #11, #12, #13, #14, #15, #4, #5, #6, #7, #8, #9, #10, #0, #1, #2, #3 ] }
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #4 ], right_keys: [ #1 ] }
├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #3 ] }
│ ├── PhysicalFilter
│ │ ├── cond:Eq
│ │ │ ├── #1
│ │ │ └── "CHINA"
│ │ └── PhysicalScan { table: nation }
│ └── PhysicalScan { table: supplier }
└── PhysicalScan { table: partsupp }
*/

23 changes: 12 additions & 11 deletions optd-sqlplannertest/tests/tpch/q2.planner.sql
Original file line number Diff line number Diff line change
Expand Up @@ -243,17 +243,18 @@ PhysicalLimit { skip: 0(i64), fetch: 100(i64) }
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #19, #0 ], right_keys: [ #1, #0 ] }
├── PhysicalHashJoin { join_type: Inner, left_keys: [ #23 ], right_keys: [ #0 ] }
│ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #12 ], right_keys: [ #0 ] }
│ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0, #9 ], right_keys: [ #0, #1 ] }
│ │ │ ├── PhysicalNestedLoopJoin { join_type: Inner, cond: true }
│ │ │ │ ├── PhysicalFilter
│ │ │ │ │ ├── cond:And
│ │ │ │ │ │ ├── Eq
│ │ │ │ │ │ │ ├── Cast { cast_to: Int64, child: #5 }
│ │ │ │ │ │ │ └── 4(i64)
│ │ │ │ │ │ └── Like { expr: #4, pattern: "%TIN", negated: false, case_insensitive: false }
│ │ │ │ │ └── PhysicalScan { table: part }
│ │ │ │ └── PhysicalScan { table: supplier }
│ │ │ └── PhysicalScan { table: partsupp }
│ │ ├── PhysicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6, #7, #8, #14, #15, #16, #17, #18, #19, #20, #9, #10, #11, #12, #13 ] }
│ │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #10 ], right_keys: [ #0 ] }
│ │ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] }
│ │ │ │ ├── PhysicalFilter
│ │ │ │ │ ├── cond:And
│ │ │ │ │ │ ├── Eq
│ │ │ │ │ │ │ ├── Cast { cast_to: Int64, child: #5 }
│ │ │ │ │ │ │ └── 4(i64)
│ │ │ │ │ │ └── Like { expr: #4, pattern: "%TIN", negated: false, case_insensitive: false }
│ │ │ │ │ └── PhysicalScan { table: part }
│ │ │ │ └── PhysicalScan { table: partsupp }
│ │ │ └── PhysicalScan { table: supplier }
│ │ └── PhysicalScan { table: nation }
│ └── PhysicalFilter
│ ├── cond:Eq
Expand Down
Loading

0 comments on commit 483981c

Please sign in to comment.