From e67776b23760a24c410ab8bd8f314381e04f7b6a Mon Sep 17 00:00:00 2001 From: Alex Chi Z Date: Tue, 17 Dec 2024 21:28:45 -0500 Subject: [PATCH] refactor(core): rewrite tasks using generators, better pruning (#265) Signed-off-by: Alex Chi Z --- Cargo.lock | 110 +++- optd-core/Cargo.toml | 2 + optd-core/src/cascades.rs | 4 +- optd-core/src/cascades/memo.rs | 5 + optd-core/src/cascades/optimizer.rs | 200 ++++--- .../{tasks/apply_rule.rs => rule_match.rs} | 97 +-- optd-core/src/cascades/tasks.rs | 28 - optd-core/src/cascades/tasks/explore_group.rs | 59 -- .../src/cascades/tasks/optimize_expression.rs | 89 --- .../src/cascades/tasks/optimize_group.rs | 64 -- .../src/cascades/tasks/optimize_inputs.rs | 302 ---------- optd-core/src/cascades/tasks2.rs | 550 ++++++++++++++++++ optd-datafusion-bridge/src/lib.rs | 1 + optd-datafusion-repr-adv-cost/Cargo.toml | 1 - optd-datafusion-repr/Cargo.toml | 1 - optd-datafusion-repr/src/lib.rs | 55 +- optd-datafusion-repr/src/memo_ext.rs | 54 +- optd-sqlplannertest/Cargo.toml | 3 +- .../src/bin/planner_test_apply.rs | 13 +- .../tests/basic/cross_product.planner.sql | 4 +- .../tests/basic/filter.planner.sql | 16 +- .../tests/joins/join_enumerate.planner.sql | 66 ++- .../tests/joins/join_enumerate.yml | 4 +- .../tests/joins/multi-join.planner.sql | 38 +- .../tests/joins/self-join.planner.sql | 2 +- .../pushdowns/fliter_transpose.planner.sql | 12 +- .../subqueries/subquery_unnesting.planner.sql | 266 ++++----- .../tests/tpch/q10.planner.sql | 46 +- .../tests/tpch/q11.planner.sql | 90 +-- .../tests/tpch/q12.planner.sql | 2 +- .../tests/tpch/q14.planner.sql | 2 +- .../tests/tpch/q15.planner.sql | 49 +- .../tests/tpch/q16.planner.sql | 4 +- .../tests/tpch/q17.planner.sql | 104 ++-- .../tests/tpch/q19.planner.sql | 4 +- optd-sqlplannertest/tests/tpch/q2.planner.sql | 166 +++--- .../tests/tpch/q20.planner.sql | 160 ++--- .../tests/tpch/q22.planner.sql | 70 +-- optd-sqlplannertest/tests/tpch/q3.planner.sql | 38 +- optd-sqlplannertest/tests/tpch/q5.planner.sql | 38 +- optd-sqlplannertest/tests/tpch/q7.planner.sql | 24 +- optd-sqlplannertest/tests/tpch/q8.planner.sql | 48 +- optd-sqlplannertest/tests/tpch/q9.planner.sql | 12 +- 43 files changed, 1510 insertions(+), 1393 deletions(-) rename optd-core/src/cascades/{tasks/apply_rule.rs => rule_match.rs} (51%) delete mode 100644 optd-core/src/cascades/tasks.rs delete mode 100644 optd-core/src/cascades/tasks/explore_group.rs delete mode 100644 optd-core/src/cascades/tasks/optimize_expression.rs delete mode 100644 optd-core/src/cascades/tasks/optimize_group.rs delete mode 100644 optd-core/src/cascades/tasks/optimize_inputs.rs create mode 100644 optd-core/src/cascades/tasks2.rs diff --git a/Cargo.lock b/Cargo.lock index f828d1af..00cc81ef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -403,7 +403,7 @@ dependencies = [ "memchr", "num", "regex", - "regex-syntax", + "regex-syntax 0.8.5", ] [[package]] @@ -829,6 +829,17 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "backtrace-on-stack-overflow" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fd2d70527f3737a1ad17355e260706c1badebabd1fa06a7a053407380df841b" +dependencies = [ + "backtrace", + "libc", + "nix 0.23.2", +] + [[package]] name = "base64" version = "0.21.7" @@ -931,7 +942,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a68f1f47cdf0ec8ee4b941b2eee2a80cb796db73118c0dd09ac63fbe405be22" dependencies = [ "memchr", - "regex-automata", + "regex-automata 0.4.9", "serde", ] @@ -1781,7 +1792,7 @@ dependencies = [ "itertools 0.13.0", "log", "paste", - "regex-syntax", + "regex-syntax 0.8.5", ] [[package]] @@ -3102,6 +3113,15 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + [[package]] name = "md-5" version = "0.10.6" @@ -3118,6 +3138,15 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + [[package]] name = "mimalloc" version = "0.1.43" @@ -3168,6 +3197,19 @@ dependencies = [ "smallvec", ] +[[package]] +name = "nix" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f3790c00a0150112de0f4cd161e3d7fc4b2d8a5542ffc35f099a2562aecb35c" +dependencies = [ + "bitflags 1.3.2", + "cc", + "cfg-if", + "libc", + "memoffset", +] + [[package]] name = "nix" version = "0.28.0" @@ -3368,8 +3410,10 @@ dependencies = [ "erased-serde", "itertools 0.13.0", "ordered-float 4.5.0", + "pollster", "pretty_assertions", "serde", + "stacker", "tracing", ] @@ -3406,7 +3450,6 @@ dependencies = [ "pretty-xmlish", "serde", "tracing", - "tracing-subscriber", ] [[package]] @@ -3427,7 +3470,6 @@ dependencies = [ "serde_with", "test-case", "tracing", - "tracing-subscriber", ] [[package]] @@ -3504,11 +3546,11 @@ version = "0.1.1" dependencies = [ "anyhow", "async-trait", + "backtrace-on-stack-overflow", "clap", "criterion", "datafusion", "datafusion-optd-cli", - "env_logger 0.9.3", "itertools 0.13.0", "lazy_static", "mimalloc", @@ -3519,6 +3561,7 @@ dependencies = [ "serde_yaml", "sqlplannertest", "tokio", + "tracing-subscriber", ] [[package]] @@ -3743,6 +3786,12 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "pollster" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f3a9f18d041e6d0e102a0a46750538147e5e8992d3b4873aaafee2520b00ce3" + [[package]] name = "postgres-protocol" version = "0.6.7" @@ -3865,6 +3914,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "psm" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "200b9ff220857e53e184257720a14553b2f4aa02577d2ed9842d45d4b9654810" +dependencies = [ + "cc", +] + [[package]] name = "quad-rand" version = "0.2.3" @@ -4030,8 +4088,17 @@ checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", - "regex-automata", - "regex-syntax", + "regex-automata 0.4.9", + "regex-syntax 0.8.5", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", ] [[package]] @@ -4042,7 +4109,7 @@ checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.8.5", ] [[package]] @@ -4051,6 +4118,12 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + [[package]] name = "regex-syntax" version = "0.8.5" @@ -4311,7 +4384,7 @@ dependencies = [ "libc", "log", "memchr", - "nix", + "nix 0.28.0", "radix_trie", "unicode-segmentation", "unicode-width 0.1.14", @@ -4669,6 +4742,19 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "stacker" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.59.0", +] + [[package]] name = "static_assertions" version = "1.1.0" @@ -5157,10 +5243,14 @@ version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" dependencies = [ + "matchers", "nu-ansi-term", + "once_cell", + "regex", "sharded-slab", "smallvec", "thread_local", + "tracing", "tracing-core", "tracing-log", ] diff --git a/optd-core/Cargo.toml b/optd-core/Cargo.toml index 5fc14c90..03ce387d 100644 --- a/optd-core/Cargo.toml +++ b/optd-core/Cargo.toml @@ -19,6 +19,8 @@ serde = { version = "1.0", features = ["derive", "rc"] } arrow-schema = "53.3.0" chrono = "0.4" erased-serde = "0.4" +pollster = "0.4" +stacker = "0.1" [dev-dependencies] pretty_assertions = "1.4.1" diff --git a/optd-core/src/cascades.rs b/optd-core/src/cascades.rs index 2325f00b..e43b48ca 100644 --- a/optd-core/src/cascades.rs +++ b/optd-core/src/cascades.rs @@ -7,8 +7,8 @@ mod memo; mod optimizer; -mod tasks; +pub mod rule_match; +mod tasks2; pub use memo::{Memo, NaiveMemo}; pub use optimizer::{CascadesOptimizer, ExprId, GroupId, OptimizerProperties, RelNodeContext}; -use tasks::Task; diff --git a/optd-core/src/cascades/memo.rs b/optd-core/src/cascades/memo.rs index 743448e0..11093926 100644 --- a/optd-core/src/cascades/memo.rs +++ b/optd-core/src/cascades/memo.rs @@ -158,6 +158,11 @@ pub trait Memo: 'static + Send + Sync { ) -> Result> { get_best_group_binding_inner(self, group_id, &mut post_process) } + + /// Get winner of a group and a subgroup. + fn get_group_winner(&self, group_id: GroupId) -> &Winner { + &self.get_group(group_id).info.winner + } } fn get_best_group_binding_inner + ?Sized, T: NodeType>( diff --git a/optd-core/src/cascades/optimizer.rs b/optd-core/src/cascades/optimizer.rs index 55093292..7d2abfdc 100644 --- a/optd-core/src/cascades/optimizer.rs +++ b/optd-core/src/cascades/optimizer.rs @@ -3,17 +3,19 @@ // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at // https://opensource.org/licenses/MIT. -use std::collections::{BTreeSet, HashMap, HashSet, VecDeque}; +use std::collections::{BTreeSet, HashMap, HashSet}; use std::fmt::Display; +use std::future::Future; +use std::pin::Pin; use std::sync::Arc; use anyhow::Result; use tracing::trace; use super::memo::{ArcMemoPlanNode, GroupInfo, Memo}; -use super::tasks::OptimizeGroupTask; -use super::{NaiveMemo, Task}; +use super::NaiveMemo; use crate::cascades::memo::Winner; +use crate::cascades::tasks2::{TaskContext, TaskDesc}; use crate::cost::CostModel; use crate::logical_property::{LogicalPropertyBuilder, LogicalPropertyBuilderAny}; use crate::nodes::{ @@ -27,8 +29,10 @@ pub type RuleId = usize; #[derive(Default, Clone, Debug)] pub struct OptimizerContext { - pub budget_used_logical: bool, - pub budget_used_all: bool, + /// Not apply logical rules any more + pub logical_budget_used: bool, + /// Not apply all rules any more; get a physical plan ASAP + pub all_budget_used: bool, pub rules_applied: usize, } @@ -47,19 +51,23 @@ pub struct OptimizerProperties { pub struct CascadesStats { pub rule_match_count: HashMap, pub rule_total_bindings: HashMap, + pub explore_group_count: usize, + pub optimize_group_count: usize, + pub optimize_expr_count: usize, + pub apply_rule_count: usize, + pub optimize_input_count: usize, } pub struct CascadesOptimizer = NaiveMemo> { memo: M, - pub(super) tasks: VecDeque>>, explored_group: HashSet, - explored_expr: HashSet, + explored_expr: HashSet, fired_rules: HashMap>, rules: Arc<[Arc>]>, pub stats: CascadesStats, disabled_rules: HashSet, cost: Arc>, - property_builders: Arc<[Box>]>, + logical_property_builders: Arc<[Box>]>, pub ctx: OptimizerContext, pub prop: OptimizerProperties, } @@ -105,39 +113,36 @@ impl CascadesOptimizer> { pub fn new( rules: Vec>>, cost: Box>>, - property_builders: Vec>>, + logical_property_builders: Arc<[Box>]>, ) -> Self { - Self::new_with_prop(rules, cost, property_builders, Default::default()) + Self::new_with_options(rules, cost, logical_property_builders, Default::default()) } - pub fn new_with_prop( + pub fn new_with_options( rules: Vec>>, cost: Box>>, - property_builders: Vec>>, + logical_property_builders: Arc<[Box>]>, prop: OptimizerProperties, ) -> Self { - let tasks = VecDeque::new(); - let property_builders: Arc<[_]> = property_builders.into(); - let memo = NaiveMemo::new(property_builders.clone()); + let memo = NaiveMemo::new(logical_property_builders.clone()); Self { memo, - tasks, explored_group: HashSet::new(), explored_expr: HashSet::new(), fired_rules: HashMap::new(), rules: rules.into(), cost: cost.into(), ctx: OptimizerContext::default(), - property_builders, + logical_property_builders, prop, - disabled_rules: HashSet::new(), stats: CascadesStats::default(), + disabled_rules: HashSet::new(), } } /// Clear the memo table and all optimizer states. pub fn step_clear(&mut self) { - self.memo = NaiveMemo::new(self.property_builders.clone()); + self.memo = NaiveMemo::new(self.logical_property_builders.clone()); self.fired_rules.clear(); self.explored_group.clear(); self.explored_expr.clear(); @@ -146,6 +151,13 @@ impl CascadesOptimizer> { /// Clear the winner so that the optimizer can continue to explore the group. pub fn step_clear_winner(&mut self) { self.memo.clear_winner(); + self.explored_group.clear(); + self.explored_expr.clear(); + } + + /// Clear the explored groups so that the optimizer can continue to apply the rules. + pub fn step_next_stage(&mut self) { + self.explored_group.clear(); self.explored_expr.clear(); } } @@ -175,6 +187,32 @@ impl> CascadesOptimizer { self.disabled_rules.remove(&rule_id); } + pub fn disable_rule_by_name(&mut self, rule_name: &str) { + let mut modified = false; + for (id, rule) in self.rules.iter().enumerate() { + if rule.name() == rule_name { + self.disabled_rules.insert(id); + modified = true; + } + } + if !modified { + panic!("rule {} not found", rule_name); + } + } + + pub fn enable_rule_by_name(&mut self, rule_name: &str) { + let mut modified = false; + for (id, rule) in self.rules.iter().enumerate() { + if rule.name() == rule_name { + self.disabled_rules.remove(&id); + modified = true; + } + } + if !modified { + panic!("rule {} not found", rule_name); + } + } + pub fn is_rule_disabled(&self, rule_id: usize) -> bool { self.disabled_rules.contains(&rule_id) } @@ -198,7 +236,7 @@ impl> CascadesOptimizer { }; println!("group_id={} {}", group_id, winner_str); let group = self.memo.get_group(group_id); - for (id, property) in self.property_builders.iter().enumerate() { + for (id, property) in self.logical_property_builders.iter().enumerate() { println!( " {}={}", property.property_name(), @@ -218,9 +256,9 @@ impl> CascadesOptimizer { } } } - /// Optimize a `RelNode`. pub fn step_optimize_rel(&mut self, root_rel: ArcPlanNode) -> Result { + trace!(event = "step_optimize_rel", rel = %root_rel); let (group_id, _) = self.add_new_expr(root_rel); self.fire_optimize_tasks(group_id)?; Ok(group_id) @@ -254,74 +292,21 @@ impl> CascadesOptimizer { res } - fn fire_optimize_tasks(&mut self, group_id: GroupId) -> Result<()> { + pub fn fire_optimize_tasks(&mut self, group_id: GroupId) -> Result<()> { + use pollster::FutureExt as _; trace!(event = "fire_optimize_tasks", root_group_id = %group_id); - self.tasks - .push_back(Box::new(OptimizeGroupTask::new(group_id, None))); - // get the task from the stack - self.ctx.budget_used_logical = false; - self.ctx.budget_used_all = false; - let plan_space_begin = self.memo.estimated_plan_space(); - let mut iter = 0; - while let Some(task) = self.tasks.pop_back() { - let new_tasks = task.execute(self)?; - self.tasks.extend(new_tasks); - iter += 1; - if !self.ctx.budget_used_logical { - let plan_space = self.memo.estimated_plan_space(); - if let Some(partial_explore_space) = self.prop.partial_explore_space { - if plan_space - plan_space_begin > partial_explore_space { - println!( - "plan space size budget used, not applying logical rules any more. current plan space: {}", - plan_space - ); - self.ctx.budget_used_logical = true; - if self.prop.panic_on_budget { - panic!("plan space size budget used"); - } - } - } - } - if !self.ctx.budget_used_all { - if let Some(partial_explore_iter) = self.prop.partial_explore_iter { - if iter >= partial_explore_iter { - println!( - "plan explore iter budget used, not applying physical/logical rules any more if there's no winner. current iter: {}", - iter - ); - self.ctx.budget_used_all = true; - if self.prop.panic_on_budget { - panic!("plan space size budget used"); - } - } - } - } - if iter > 100000 && iter % 10000 == 0 { - println!("iter={}", iter); - println!("plan_space={}", self.memo.estimated_plan_space()); - for (id, rule) in self.rules.iter().enumerate() { - println!( - "{}: matched={}, bindings={}", - rule.name(), - self.stats - .rule_match_count - .get(&id) - .copied() - .unwrap_or_default(), - self.stats - .rule_total_bindings - .get(&id) - .copied() - .unwrap_or_default() - ); - } - } - } + let mut task = TaskContext::new(self); + // 32MB stack for the optimization process, TODO: reduce memory footprint + stacker::maybe_grow(32 * 1024 * 1024, 32 * 1024 * 1024, || { + let fut: Pin>> = Box::pin(task.fire_optimize(group_id)); + fut.block_on(); + }); Ok(()) } fn optimize_inner(&mut self, root_rel: ArcPlanNode) -> Result> { let (group_id, _) = self.add_new_expr(root_rel); + self.fire_optimize_tasks(group_id)?; self.memo.get_best_group_binding(group_id, |_, _, _| {}) } @@ -346,12 +331,12 @@ impl> CascadesOptimizer { self.memo.add_expr_to_group(rel_node, group_id) } - pub(super) fn get_group_info(&self, group_id: GroupId) -> &GroupInfo { - self.memo.get_group_info(group_id) + pub(super) fn get_group_winner(&self, group_id: GroupId) -> &Winner { + self.memo.get_group_winner(group_id) } - pub(super) fn update_group_info(&mut self, group_id: GroupId, group_info: GroupInfo) { - self.memo.update_group_info(group_id, group_info) + pub(super) fn update_group_winner(&mut self, group_id: GroupId, winner: Winner) { + self.memo.update_group_info(group_id, GroupInfo { winner }); } /// Get the properties of a Cascades group @@ -370,10 +355,6 @@ impl> CascadesOptimizer { .clone() } - pub(super) fn get_group_id(&self, expr_id: ExprId) -> GroupId { - self.memo.get_group_id(expr_id) - } - pub(super) fn get_expr_memoed(&self, expr_id: ExprId) -> ArcMemoPlanNode { self.memo.get_expr_memoed(expr_id) } @@ -390,16 +371,16 @@ impl> CascadesOptimizer { self.explored_group.insert(group_id); } - pub(super) fn is_expr_explored(&self, expr_id: ExprId) -> bool { - self.explored_expr.contains(&expr_id) + pub(super) fn has_task_started(&self, task_desc: &TaskDesc) -> bool { + self.explored_expr.contains(task_desc) } - pub(super) fn mark_expr_explored(&mut self, expr_id: ExprId) { - self.explored_expr.insert(expr_id); + pub(super) fn mark_task_start(&mut self, task_desc: &TaskDesc) { + self.explored_expr.insert(task_desc.clone()); } - pub(super) fn unmark_expr_explored(&mut self, expr_id: ExprId) { - self.explored_expr.remove(&expr_id); + pub(super) fn mark_task_end(&mut self, task_desc: &TaskDesc) { + self.explored_expr.remove(task_desc); } pub(super) fn is_rule_fired(&self, group_expr_id: ExprId, rule_id: RuleId) -> bool { @@ -419,6 +400,31 @@ impl> CascadesOptimizer { pub fn memo(&self) -> &M { &self.memo } + + pub fn dump_stats(&self) { + println!("plan_space={}", self.memo.estimated_plan_space()); + for (id, rule) in self.rules.iter().enumerate() { + println!( + "{}: matched={}, bindings={}", + rule.name(), + self.stats + .rule_match_count + .get(&id) + .copied() + .unwrap_or_default(), + self.stats + .rule_total_bindings + .get(&id) + .copied() + .unwrap_or_default() + ); + } + println!("explore_group_count={}", self.stats.explore_group_count); + println!("optimize_group_count={}", self.stats.optimize_group_count); + println!("optimize_expr_count={}", self.stats.optimize_expr_count); + println!("apply_rule_count={}", self.stats.apply_rule_count); + println!("optimize_input_count={}", self.stats.optimize_input_count); + } } impl> Optimizer for CascadesOptimizer { diff --git a/optd-core/src/cascades/tasks/apply_rule.rs b/optd-core/src/cascades/rule_match.rs similarity index 51% rename from optd-core/src/cascades/tasks/apply_rule.rs rename to optd-core/src/cascades/rule_match.rs index f1edd791..215a9433 100644 --- a/optd-core/src/cascades/tasks/apply_rule.rs +++ b/optd-core/src/cascades/rule_match.rs @@ -5,44 +5,14 @@ use std::sync::Arc; -use anyhow::Result; use itertools::Itertools; -use tracing::trace; -use super::Task; use crate::cascades::memo::ArcMemoPlanNode; -use crate::cascades::optimizer::{CascadesOptimizer, ExprId, RuleId}; -use crate::cascades::tasks::{OptimizeExpressionTask, OptimizeInputsTask}; +use crate::cascades::optimizer::{CascadesOptimizer, ExprId}; use crate::cascades::{GroupId, Memo}; use crate::nodes::{ArcPlanNode, NodeType, PlanNode, PlanNodeOrGroup}; use crate::rules::RuleMatcher; -pub struct ApplyRuleTask { - rule_id: RuleId, - expr_id: ExprId, - exploring: bool, - upper_bound: Option, -} - -impl ApplyRuleTask { - pub fn new( - rule_id: RuleId, - expr_id: ExprId, - exploring: bool, - upper_bound: Option, - ) -> Self { - Self { - rule_id, - expr_id, - exploring, - upper_bound, - } - } -} - -// Pick/match logic, to get pieces of info to pass to the rule apply function -// TODO: I would like to see this moved elsewhere - fn match_node>( children: &[RuleMatcher], node: ArcMemoPlanNode, @@ -138,7 +108,7 @@ fn match_and_pick>( } } -fn match_and_pick_expr>( +pub(crate) fn match_and_pick_expr>( matcher: &RuleMatcher, expr_id: ExprId, optimizer: &CascadesOptimizer, @@ -159,66 +129,3 @@ fn match_and_pick_group>( } matches } - -impl> Task for ApplyRuleTask { - fn execute(&self, optimizer: &mut CascadesOptimizer) -> Result>>> { - if optimizer.is_rule_fired(self.expr_id, self.rule_id) { - return Ok(vec![]); - } - - if optimizer.is_rule_disabled(self.rule_id) { - optimizer.mark_rule_fired(self.expr_id, self.rule_id); - return Ok(vec![]); - } - - let rule = optimizer.rules()[self.rule_id].clone(); - - trace!(event = "task_begin", task = "apply_rule", expr_id = %self.expr_id, rule_id = %self.rule_id, rule = %rule.name()); - let group_id = optimizer.get_group_id(self.expr_id); - let mut tasks = vec![]; - let binding_exprs = match_and_pick_expr(rule.matcher(), self.expr_id, optimizer); - if !binding_exprs.is_empty() { - *optimizer.stats.rule_match_count.entry(self.rule_id).or_default() += 1; - } - for binding in binding_exprs { - *optimizer.stats.rule_total_bindings.entry(self.rule_id).or_default() += 1; - trace!(event = "before_apply_rule", task = "apply_rule", input_binding=%binding); - let applied = rule.apply(optimizer, binding); - - for expr in applied { - trace!(event = "after_apply_rule", task = "apply_rule", output_binding=%expr); - // TODO: remove clone in the below line - if let Some(expr_id) = optimizer.add_expr_to_group(expr.clone(), group_id) { - let typ = expr.unwrap_typ(); - if typ.is_logical() { - tasks.push( - Box::new(OptimizeExpressionTask::new(expr_id, self.exploring, self.upper_bound)) - as Box>, - ); - } else { - tasks.push(Box::new(OptimizeInputsTask::new( - expr_id, - !optimizer.prop.disable_pruning, - self.upper_bound - )) as Box>); - } - optimizer.unmark_expr_explored(expr_id); - trace!(event = "apply_rule", expr_id = %self.expr_id, rule_id = %self.rule_id, new_expr_id = %expr_id); - } else { - trace!(event = "apply_rule", expr_id = %self.expr_id, rule_id = %self.rule_id, "triggered group merge"); - } - } - } - optimizer.mark_rule_fired(self.expr_id, self.rule_id); - - trace!(event = "task_end", task = "apply_rule", expr_id = %self.expr_id, rule_id = %self.rule_id); - Ok(tasks) - } - - fn describe(&self) -> String { - format!( - "apply_rule {{ rule_id: {}, expr_id: {}, exploring: {} }}", - self.rule_id, self.expr_id, self.exploring - ) - } -} diff --git a/optd-core/src/cascades/tasks.rs b/optd-core/src/cascades/tasks.rs deleted file mode 100644 index 610c24e8..00000000 --- a/optd-core/src/cascades/tasks.rs +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2023-2024 CMU Database Group -// -// Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at -// https://opensource.org/licenses/MIT. - -use anyhow::Result; - -use super::{CascadesOptimizer, Memo}; -use crate::nodes::NodeType; - -mod apply_rule; -mod explore_group; -mod optimize_expression; -mod optimize_group; -mod optimize_inputs; - -pub use apply_rule::ApplyRuleTask; -pub use explore_group::ExploreGroupTask; -pub use optimize_expression::OptimizeExpressionTask; -pub use optimize_group::OptimizeGroupTask; -pub use optimize_inputs::OptimizeInputsTask; - -pub trait Task>: 'static + Send + Sync { - fn execute(&self, optimizer: &mut CascadesOptimizer) -> Result>>>; - - #[allow(dead_code)] - fn describe(&self) -> String; -} diff --git a/optd-core/src/cascades/tasks/explore_group.rs b/optd-core/src/cascades/tasks/explore_group.rs deleted file mode 100644 index 98ce9a1b..00000000 --- a/optd-core/src/cascades/tasks/explore_group.rs +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (c) 2023-2024 CMU Database Group -// -// Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at -// https://opensource.org/licenses/MIT. - -use anyhow::Result; -use tracing::trace; - -use super::Task; -use crate::cascades::optimizer::{CascadesOptimizer, GroupId}; -use crate::cascades::tasks::OptimizeExpressionTask; -use crate::cascades::Memo; -use crate::nodes::NodeType; - -pub struct ExploreGroupTask { - group_id: GroupId, - upper_bound: Option, -} - -impl ExploreGroupTask { - pub fn new(group_id: GroupId, upper_bound: Option) -> Self { - Self { - group_id, - upper_bound, - } - } -} - -impl> Task for ExploreGroupTask { - fn execute(&self, optimizer: &mut CascadesOptimizer) -> Result>>> { - trace!(event = "task_begin", task = "explore_group", group_id = %self.group_id); - let mut tasks = vec![]; - if optimizer.is_group_explored(self.group_id) { - trace!(target: "task_finish", task = "explore_group", result = "already explored, skipping", group_id = %self.group_id); - return Ok(vec![]); - } - let exprs = optimizer.get_all_exprs_in_group(self.group_id); - let exprs_cnt = exprs.len(); - for expr in exprs { - let typ = optimizer.get_expr_memoed(expr).typ.clone(); - if typ.is_logical() { - tasks - .push(Box::new(OptimizeExpressionTask::new(expr, true, self.upper_bound)) as Box>); - } - } - optimizer.mark_group_explored(self.group_id); - trace!( - event = "task_finish", - task = "explore_group", - result = "expand group", - exprs_cnt = exprs_cnt - ); - Ok(tasks) - } - - fn describe(&self) -> String { - format!("explore_group {}", self.group_id) - } -} diff --git a/optd-core/src/cascades/tasks/optimize_expression.rs b/optd-core/src/cascades/tasks/optimize_expression.rs deleted file mode 100644 index d350af9b..00000000 --- a/optd-core/src/cascades/tasks/optimize_expression.rs +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright (c) 2023-2024 CMU Database Group -// -// Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at -// https://opensource.org/licenses/MIT. - -use anyhow::Result; -use tracing::trace; - -use super::Task; -use crate::cascades::optimizer::{CascadesOptimizer, ExprId}; -use crate::cascades::tasks::{ApplyRuleTask, ExploreGroupTask}; -use crate::cascades::Memo; -use crate::nodes::NodeType; -use crate::rules::RuleMatcher; - -pub struct OptimizeExpressionTask { - expr_id: ExprId, - exploring: bool, - upper_bound: Option, -} - -impl OptimizeExpressionTask { - pub fn new(expr_id: ExprId, exploring: bool, upper_bound: Option) -> Self { - Self { - expr_id, - exploring, - upper_bound, - } - } -} - -fn top_matches(matcher: &RuleMatcher, match_typ: T) -> bool { - match matcher { - RuleMatcher::MatchNode { typ, .. } => typ == &match_typ, - RuleMatcher::MatchDiscriminant { - typ_discriminant, .. - } => std::mem::discriminant(&match_typ) == *typ_discriminant, - _ => panic!("IR should have root node of match"), - } -} - -impl> Task for OptimizeExpressionTask { - fn execute(&self, optimizer: &mut CascadesOptimizer) -> Result>>> { - let expr = optimizer.get_expr_memoed(self.expr_id); - let group_id = optimizer.get_group_id(self.expr_id); - trace!(event = "task_begin", task = "optimize_expr", expr_id = %self.expr_id, expr = %expr); - let mut tasks = vec![]; - for (rule_id, rule) in optimizer.rules().iter().enumerate() { - if optimizer.is_rule_fired(self.expr_id, rule_id) { - continue; - } - // Skip impl rules when exploring - if self.exploring && rule.is_impl_rule() { - continue; - } - // Skip transformation rules when budget is used - if (optimizer.ctx.budget_used_logical || optimizer.ctx.budget_used_all) - && !rule.is_impl_rule() - { - continue; - } - if optimizer.ctx.budget_used_all - && optimizer.get_group_info(group_id).winner.has_full_winner() - { - break; - } - if top_matches(rule.matcher(), expr.typ.clone()) { - tasks.push(Box::new(ApplyRuleTask::new( - rule_id, - self.expr_id, - self.exploring, - self.upper_bound, - )) as Box>); - for &input_group_id in &expr.children { - tasks.push( - Box::new(ExploreGroupTask::new(input_group_id, self.upper_bound)) - as Box>, - ); - } - } - } - trace!(event = "task_end", task = "optimize_expr", expr_id = %self.expr_id); - Ok(tasks) - } - - fn describe(&self) -> String { - format!("optimize_expr {}", self.expr_id) - } -} diff --git a/optd-core/src/cascades/tasks/optimize_group.rs b/optd-core/src/cascades/tasks/optimize_group.rs deleted file mode 100644 index 58b2e877..00000000 --- a/optd-core/src/cascades/tasks/optimize_group.rs +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2023-2024 CMU Database Group -// -// Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at -// https://opensource.org/licenses/MIT. - -use anyhow::Result; -use tracing::trace; - -use super::Task; -use crate::cascades::optimizer::GroupId; -use crate::cascades::tasks::optimize_expression::OptimizeExpressionTask; -use crate::cascades::tasks::OptimizeInputsTask; -use crate::cascades::{CascadesOptimizer, Memo}; -use crate::nodes::NodeType; - -pub struct OptimizeGroupTask { - group_id: GroupId, - upper_bound: Option, -} - -impl OptimizeGroupTask { - pub fn new(group_id: GroupId, upper_bound: Option) -> Self { - Self { - group_id, - upper_bound, - } - } -} - -impl> Task for OptimizeGroupTask { - fn execute(&self, optimizer: &mut CascadesOptimizer) -> Result>>> { - trace!(event = "task_begin", task = "optimize_group", group_id = %self.group_id); - let group_info = optimizer.get_group_info(self.group_id); - if group_info.winner.has_decided() { - trace!(event = "task_finish", task = "optimize_group"); - return Ok(vec![]); - } - let exprs = optimizer.get_all_exprs_in_group(self.group_id); - let mut tasks = vec![]; - let exprs_cnt = exprs.len(); - for &expr in &exprs { - let typ = optimizer.get_expr_memoed(expr).typ.clone(); - if typ.is_logical() { - tasks.push(Box::new(OptimizeExpressionTask::new(expr, false, self.upper_bound)) as Box>); - } - } - for &expr in &exprs { - let typ = optimizer.get_expr_memoed(expr).typ.clone(); - if !typ.is_logical() { - tasks.push(Box::new(OptimizeInputsTask::new( - expr, - !optimizer.prop.disable_pruning, - self.upper_bound - )) as Box>); - } - } - trace!(event = "task_finish", task = "optimize_group", group_id = %self.group_id, exprs_cnt = exprs_cnt); - Ok(tasks) - } - - fn describe(&self) -> String { - format!("optimize_group {}", self.group_id) - } -} diff --git a/optd-core/src/cascades/tasks/optimize_inputs.rs b/optd-core/src/cascades/tasks/optimize_inputs.rs deleted file mode 100644 index 637eaa8d..00000000 --- a/optd-core/src/cascades/tasks/optimize_inputs.rs +++ /dev/null @@ -1,302 +0,0 @@ -// Copyright (c) 2023-2024 CMU Database Group -// -// Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at -// https://opensource.org/licenses/MIT. - -use anyhow::Result; -use itertools::Itertools; -use tracing::trace; - -use super::Task; -use crate::cascades::memo::{GroupInfo, Winner, WinnerInfo}; -use crate::cascades::optimizer::ExprId; -use crate::cascades::tasks::OptimizeGroupTask; -use crate::cascades::{CascadesOptimizer, Memo, RelNodeContext}; -use crate::cost::{Cost, Statistics}; -use crate::nodes::NodeType; - -#[derive(Debug, Clone)] -struct ContinueTask { - next_group_idx: usize, - return_from_optimize_group: bool, -} - -struct ContinueTaskDisplay<'a>(&'a Option); - -impl std::fmt::Display for ContinueTaskDisplay<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self.0 { - Some(x) => { - if x.return_from_optimize_group { - write!(f, "return,next_group_idx={}", x.next_group_idx) - } else { - write!(f, "enter,next_group_idx={}", x.next_group_idx) - } - } - None => write!(f, "none"), - } - } -} - -pub struct OptimizeInputsTask { - expr_id: ExprId, - continue_from: Option, - pruning: bool, - upper_bound: Option, -} - -impl OptimizeInputsTask { - pub fn new(expr_id: ExprId, pruning: bool, upper_bound: Option) -> Self { - Self { - expr_id, - continue_from: None, - pruning, - upper_bound, - } - } - - fn continue_from(&self, cont: ContinueTask, pruning: bool, upper_bound: Option) -> Self { - Self { - expr_id: self.expr_id, - continue_from: Some(cont), - pruning, - upper_bound, - } - } - - fn update_winner_impossible>( - &self, - optimizer: &mut CascadesOptimizer, - ) { - let group_id = optimizer.get_group_id(self.expr_id); - if let Winner::Unknown = optimizer.get_group_info(group_id).winner { - optimizer.update_group_info( - group_id, - GroupInfo { - winner: Winner::Impossible, - }, - ); - } - } - - fn update_winner>( - &self, - input_statistics: Vec>, - operation_cost: Cost, - total_cost: Cost, - optimizer: &mut CascadesOptimizer, - ) { - let group_id = optimizer.get_group_id(self.expr_id); - let group_info = optimizer.get_group_info(group_id); - let cost = optimizer.cost(); - let operation_weighted_cost = cost.weighted_cost(&operation_cost); - let total_weighted_cost = cost.weighted_cost(&total_cost); - let mut update_cost = false; - if let Some(winner) = group_info.winner.as_full_winner() { - if winner.total_weighted_cost > total_weighted_cost { - update_cost = true; - } - } else { - update_cost = true; - } - if update_cost { - let expr = optimizer.get_expr_memoed(self.expr_id); - let preds = expr - .predicates - .iter() - .map(|pred_id| optimizer.get_pred(*pred_id)) - .collect_vec(); - let statistics = cost.derive_statistics( - &expr.typ, - &preds, - &input_statistics - .iter() - .map(|x| x.expect("child winner should always have statistics?")) - .collect::>(), - RelNodeContext { - group_id, - expr_id: self.expr_id, - children_group_ids: expr.children.clone(), - }, - optimizer, - ); - optimizer.update_group_info( - group_id, - GroupInfo { - winner: Winner::Full(WinnerInfo { - expr_id: self.expr_id, - total_weighted_cost, - operation_weighted_cost, - total_cost, - operation_cost, - statistics: statistics.into(), - }), - }, - ); - } - } -} - -impl> Task for OptimizeInputsTask { - fn execute(&self, optimizer: &mut CascadesOptimizer) -> Result>>> { - if self.continue_from.is_none() { - if optimizer.is_expr_explored(self.expr_id) { - // skip optimize_inputs to avoid dead-loop: consider join commute being fired twice - // that produces two projections, therefore having groups like - // projection1 -> projection2 -> join = projection1. - trace!(event = "task_skip", task = "optimize_inputs", expr_id = %self.expr_id); - return Ok(vec![]); - } - optimizer.mark_expr_explored(self.expr_id); - } - let expr = optimizer.get_expr_memoed(self.expr_id); - let group_id = optimizer.get_group_id(self.expr_id); - let children_group_ids = &expr.children; - let cost = optimizer.cost(); - - trace!(event = "task_begin", task = "optimize_inputs", expr_id = %self.expr_id, continue_from = %ContinueTaskDisplay(&self.continue_from), total_children = %children_group_ids.len()); - - let upper_bound = if self.pruning { - if let Some(upper_bound) = self.upper_bound { - Some(upper_bound) - } else if let Some(winner) = optimizer.get_group_info(group_id).winner.as_full_winner() - { - Some(winner.total_weighted_cost) - } else { - None - } - } else { - None - }; - - if let Some(ContinueTask { - next_group_idx, - return_from_optimize_group, - }) = self.continue_from.clone() - { - let context = RelNodeContext { - expr_id: self.expr_id, - group_id, - children_group_ids: children_group_ids.clone(), - }; - let input_statistics = children_group_ids - .iter() - .map(|&group_id| { - optimizer - .get_group_info(group_id) - .winner - .as_full_winner() - .map(|x| x.statistics.clone()) - }) - .collect::>(); - let input_statistics_ref = input_statistics - .iter() - .map(|x| x.as_deref()) - .collect::>(); - let input_cost = children_group_ids - .iter() - .map(|&group_id| { - optimizer - .get_group_info(group_id) - .winner - .as_full_winner() - .map(|x| x.total_cost.clone()) - .unwrap_or_else(|| cost.zero()) - }) - .collect::>(); - let preds = expr - .predicates - .iter() - .map(|pred_id| optimizer.get_pred(*pred_id)) - .collect_vec(); - let operation_cost = cost.compute_operation_cost( - &expr.typ, - &preds, - &input_statistics_ref, - context.clone(), - optimizer, - ); - let total_cost = cost.sum(&operation_cost, &input_cost); - - if self.pruning { - let group_info = optimizer.get_group_info(group_id); - fn trace_fmt(winner: &Winner) -> String { - match winner { - Winner::Full(winner) => winner.total_weighted_cost.to_string(), - Winner::Impossible => "impossible".to_string(), - Winner::Unknown => "unknown".to_string(), - } - } - trace!( - event = "compute_cost", - task = "optimize_inputs", - expr_id = %self.expr_id, - weighted_cost_so_far = cost.weighted_cost(&total_cost), - winner_weighted_cost = %trace_fmt(&group_info.winner), - current_processing = %next_group_idx, - total_child_groups = %children_group_ids.len()); - if let Some(upper_bound) = upper_bound { - let cost_so_far = cost.weighted_cost(&total_cost); - if upper_bound <= cost_so_far { - trace!(event = "task_finish", task = "optimize_inputs", expr_id = %self.expr_id, result = "pruned"); - return Ok(vec![]); - } - } - } - - if next_group_idx < children_group_ids.len() { - let child_group_id = children_group_ids[next_group_idx]; - let group_idx = next_group_idx; - let child_group_info = optimizer.get_group_info(child_group_id); - let Some(child_winner) = child_group_info.winner.as_full_winner() else { - if !return_from_optimize_group { - trace!(event = "task_yield", task = "optimize_inputs", expr_id = %self.expr_id, group_idx = %group_idx, yield_to = "optimize_group", optimize_group_id = %child_group_id); - return Ok(vec![ - Box::new(self.continue_from( - ContinueTask { - next_group_idx, - return_from_optimize_group: true, - }, - self.pruning, - upper_bound, - )) as Box>, - Box::new(OptimizeGroupTask::new(child_group_id, upper_bound)) - as Box>, - ]); - } else { - self.update_winner_impossible(optimizer); - trace!(event = "task_finish", task = "optimize_inputs", expr_id = %self.expr_id, result = "impossible"); - return Ok(vec![]); - } - }; - trace!(event = "task_yield", task = "optimize_inputs", expr_id = %self.expr_id, group_idx = %group_idx, yield_to = "next_optimize_input"); - Ok(vec![Box::new(self.continue_from( - ContinueTask { - next_group_idx: group_idx + 1, - return_from_optimize_group: false, - }, - self.pruning, - upper_bound.map(|bound| bound - child_winner.total_weighted_cost), - )) as Box>]) - } else { - self.update_winner(input_statistics_ref, operation_cost, total_cost, optimizer); - trace!(event = "task_finish", task = "optimize_inputs", expr_id = %self.expr_id, result = "optimized"); - Ok(vec![]) - } - } else { - trace!(event = "task_yield", task = "optimize_inputs", expr_id = %self.expr_id); - Ok(vec![Box::new(self.continue_from( - ContinueTask { - next_group_idx: 0, - return_from_optimize_group: false, - }, - self.pruning, - upper_bound, - )) as Box>]) - } - } - - fn describe(&self) -> String { - format!("optimize_inputs {}", self.expr_id) - } -} diff --git a/optd-core/src/cascades/tasks2.rs b/optd-core/src/cascades/tasks2.rs new file mode 100644 index 00000000..5644ab7f --- /dev/null +++ b/optd-core/src/cascades/tasks2.rs @@ -0,0 +1,550 @@ +use std::sync::Arc; + +use itertools::Itertools; +use tracing::trace; + +use super::memo::MemoPlanNode; +use super::rule_match::match_and_pick_expr; +use super::{optimizer::RuleId, CascadesOptimizer, ExprId, GroupId, Memo}; +use crate::cascades::{ + memo::{Winner, WinnerInfo}, + RelNodeContext, +}; +use crate::cost::{Cost, Statistics}; +use crate::nodes::ArcPredNode; +use crate::{nodes::NodeType, rules::RuleMatcher}; + +struct SearchContext { + group_id: GroupId, + upper_bound: Option, +} + +pub struct TaskContext<'a, T: NodeType, M: Memo> { + optimizer: &'a mut CascadesOptimizer, + steps: usize, +} + +/// Ensures we don't run into cycles / dead loops. +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub enum TaskDesc { + OptimizeExpr(ExprId, GroupId), + OptimizeInput(ExprId, GroupId), +} + +impl<'a, T: NodeType, M: Memo> TaskContext<'a, T, M> { + pub fn new(optimizer: &'a mut CascadesOptimizer) -> Self { + Self { + optimizer, + steps: 0, + } + } + + pub async fn fire_optimize(&mut self, group_id: GroupId) { + self.optimize_group(SearchContext { + group_id, + upper_bound: None, + }) + .await; + } + + async fn optimize_group(&mut self, ctx: SearchContext) { + Box::pin(self.optimize_group_inner(ctx)).await; + } + + async fn optimize_expr(&mut self, ctx: SearchContext, expr_id: ExprId, exploring: bool) { + Box::pin(self.optimize_expr_inner(ctx, expr_id, exploring)).await; + } + + async fn explore_group(&mut self, ctx: SearchContext) { + Box::pin(self.explore_group_inner(ctx)).await; + } + + async fn apply_rule( + &mut self, + ctx: SearchContext, + rule_id: RuleId, + expr_id: ExprId, + exploring: bool, + ) { + Box::pin(self.apply_rule_inner(ctx, rule_id, expr_id, exploring)).await; + } + + async fn optimize_input(&mut self, ctx: SearchContext, expr_id: ExprId) { + Box::pin(self.optimize_input_inner(ctx, expr_id)).await; + } + + async fn optimize_group_inner(&mut self, ctx: SearchContext) { + self.steps += 1; + self.optimizer.stats.optimize_group_count += 1; + self.on_task_start(); + let SearchContext { group_id, .. } = ctx; + trace!(event = "task_begin", task = "optimize_group", group_id = %group_id); + + if self.optimizer.is_group_explored(group_id) { + trace!( + event = "task_finish", + task = "optimize_group", + group_id = %group_id, + outcome = "already explored, skipping", + ); + return; + } + self.optimizer.mark_group_explored(group_id); + + // The Columbia optimizer will stop if we have a full winner, but given that we implement + // 2-stage optimization, we will continue to optimize the group even if we have a full winner. + + let exprs = self.optimizer.get_all_exprs_in_group(group_id); + // First, optimize all physical expressions + for &expr_id in &exprs { + let expr = self.optimizer.get_expr_memoed(expr_id); + if !expr.typ.is_logical() { + self.optimize_input( + SearchContext { + group_id, + upper_bound: ctx.upper_bound, + }, + expr_id, + ) + .await; + } + } + // Then, optimize all logical expressions + for &expr_id in &exprs { + let typ = self.optimizer.get_expr_memoed(expr_id).typ.clone(); + if typ.is_logical() { + self.optimize_expr( + SearchContext { + group_id, + upper_bound: ctx.upper_bound, + }, + expr_id, + false, + ) + .await + } + } + trace!(event = "task_finish", task = "optimize_group", group_id = %group_id); + } + + async fn optimize_expr_inner(&mut self, ctx: SearchContext, expr_id: ExprId, exploring: bool) { + self.steps += 1; + self.optimizer.stats.optimize_expr_count += 1; + self.on_task_start(); + let SearchContext { group_id, .. } = ctx; + let desc = TaskDesc::OptimizeExpr(expr_id, group_id); + if self.optimizer.has_task_started(&desc) { + trace!(event = "task_skip", task = "optimize_expr", expr_id = %expr_id); + return; + } + self.optimizer.mark_task_start(&desc); + + fn top_matches(matcher: &RuleMatcher, match_typ: T) -> bool { + match matcher { + RuleMatcher::MatchNode { typ, .. } => typ == &match_typ, + RuleMatcher::MatchDiscriminant { + typ_discriminant, .. + } => std::mem::discriminant(&match_typ) == *typ_discriminant, + _ => panic!("IR should have root node of match"), + } + } + let expr = self.optimizer.get_expr_memoed(expr_id); + assert!(expr.typ.is_logical()); + trace!(event = "task_begin", task = "optimize_expr", expr_id = %expr_id, expr = %expr); + for (rule_id, rule) in self.optimizer.rules().iter().enumerate() { + if self.optimizer.is_rule_fired(expr_id, rule_id) { + continue; + } + // Skip impl rules when exploring + if exploring && rule.is_impl_rule() { + continue; + } + // Skip transformation rules when budget is used + if self.optimizer.ctx.logical_budget_used && !rule.is_impl_rule() { + continue; + } + if self.optimizer.ctx.all_budget_used { + break; + } + if top_matches(rule.matcher(), expr.typ.clone()) { + for &input_group_id in &expr.children { + self.explore_group(SearchContext { + group_id: input_group_id, + upper_bound: ctx.upper_bound, + }) + .await; + } + self.apply_rule( + SearchContext { + group_id, + upper_bound: ctx.upper_bound, + }, + rule_id, + expr_id, + exploring, + ) + .await; + } + } + self.optimizer.mark_task_end(&desc); + trace!(event = "task_end", task = "optimize_expr", expr_id = %expr_id, expr = %expr); + } + + async fn explore_group_inner(&mut self, ctx: SearchContext) { + self.steps += 1; + self.optimizer.stats.explore_group_count += 1; + self.on_task_start(); + let SearchContext { group_id, .. } = ctx; + trace!(event = "task_begin", task = "explore_group", group_id = %group_id); + let exprs = self.optimizer.get_all_exprs_in_group(group_id); + for expr in exprs { + let typ = self.optimizer.get_expr_memoed(expr).typ.clone(); + if typ.is_logical() { + self.optimize_expr( + SearchContext { + group_id, + upper_bound: ctx.upper_bound, + }, + expr, + true, + ) + .await; + } + } + trace!( + event = "task_finish", + task = "explore_group", + group_id = %group_id, + outcome = "expanded group" + ); + } + + async fn apply_rule_inner( + &mut self, + ctx: SearchContext, + rule_id: RuleId, + expr_id: ExprId, + exploring: bool, + ) { + self.steps += 1; + self.optimizer.stats.apply_rule_count += 1; + self.on_task_start(); + let SearchContext { group_id, .. } = ctx; + trace!(event = "task_begin", task = "apply_rule", expr_id = %expr_id, exploring = %exploring); + if self.optimizer.is_rule_fired(expr_id, rule_id) { + trace!(event = "task_end", task = "apply_rule", expr_id = %expr_id, exploring = %exploring, outcome = "rule already fired"); + return; + } + + if self.optimizer.is_rule_disabled(rule_id) { + trace!(event = "task_end", task = "apply_rule", expr_id = %expr_id, exploring = %exploring, outcome = "rule disabled"); + return; + } + + self.optimizer.mark_rule_fired(expr_id, rule_id); + + let rule = self.optimizer.rules()[rule_id].clone(); + + let binding_exprs = match_and_pick_expr(rule.matcher(), expr_id, self.optimizer); + if binding_exprs.len() >= 100 { + tracing::warn!( + event = "rule_application", + task = "apply_rule", + expr_id = %expr_id, + rule_id = %rule_id, + outcome = "too_many_bindings", + num_bindings = %binding_exprs.len() + ); + } + if !binding_exprs.is_empty() { + *self + .optimizer + .stats + .rule_match_count + .entry(rule_id) + .or_default() += 1; + } + for binding in binding_exprs { + *self + .optimizer + .stats + .rule_total_bindings + .entry(rule_id) + .or_default() += 1; + if !self.optimizer.ctx.logical_budget_used { + let plan_space = self.optimizer.memo().estimated_plan_space(); + if let Some(partial_explore_space) = self.optimizer.prop.partial_explore_space { + if plan_space > partial_explore_space { + tracing::warn!( + "plan space size budget used, not applying logical rules any more. current plan space: {}", + plan_space + ); + self.optimizer.ctx.logical_budget_used = true; + if self.optimizer.prop.panic_on_budget { + panic!("plan space size budget used"); + } + } + } + } + if !self.optimizer.ctx.all_budget_used { + let step = self.steps; + if let Some(partial_explore_iter) = self.optimizer.prop.partial_explore_iter { + if step > partial_explore_iter { + tracing::warn!( + "iter budget used, not applying any rules any more. current iter: {}", + step + ); + self.optimizer.ctx.all_budget_used = true; + if self.optimizer.prop.panic_on_budget { + panic!("plan space size budget used"); + } + } + } + } + + if self.optimizer.ctx.all_budget_used { + break; + } + if self.optimizer.ctx.logical_budget_used && !rule.is_impl_rule() { + continue; + } + + trace!(event = "before_apply_rule", task = "apply_rule", input_binding=%binding); + let applied = rule.apply(self.optimizer, binding); + for expr in applied { + trace!(event = "after_apply_rule", task = "apply_rule", output_binding=%expr); + // TODO: remove clone in the below line + if let Some(expr_id) = self.optimizer.add_expr_to_group(expr.clone(), group_id) { + let typ = expr.unwrap_typ(); + if typ.is_logical() { + self.optimize_expr( + SearchContext { + group_id, + upper_bound: ctx.upper_bound, + }, + expr_id, + exploring, + ) + .await; + } else { + self.optimize_input( + SearchContext { + group_id, + upper_bound: ctx.upper_bound, + }, + expr_id, + ) + .await; + } + trace!(event = "apply_rule", expr_id = %expr_id, rule_id = %rule_id, new_expr_id = %expr_id); + } else { + trace!(event = "apply_rule", expr_id = %expr_id, rule_id = %rule_id, "triggered group merge"); + } + } + } + trace!(event = "task_end", task = "apply_rule", expr_id = %expr_id, rule_id = %rule_id); + } + + fn update_winner_if_better(&mut self, group_id: GroupId, proposed_winner: WinnerInfo) { + let mut update_cost = false; + let current_winner = self.optimizer.get_group_winner(group_id); + if let Some(winner) = current_winner.as_full_winner() { + if winner.total_weighted_cost > proposed_winner.total_weighted_cost { + update_cost = true; + } + } else { + update_cost = true; + } + if update_cost { + tracing::trace!( + event = "update_winner", + task = "optimize_inputs", + expr_id = ?proposed_winner.expr_id, + total_weighted_cost = %proposed_winner.total_weighted_cost, + operation_weighted_cost = %proposed_winner.operation_weighted_cost, + ); + self.optimizer + .update_group_winner(group_id, Winner::Full(proposed_winner)); + } + } + + #[allow(clippy::type_complexity)] + fn gather_statistics_and_costs( + &mut self, + group_id: GroupId, + expr_id: ExprId, + expr: &MemoPlanNode, + predicates: &[ArcPredNode], + ) -> (Vec>>, Vec, Cost, Cost) { + let context = RelNodeContext { + expr_id, + group_id, + children_group_ids: expr.children.clone(), + }; + let mut input_stats = Vec::with_capacity(expr.children.len()); + let mut input_cost = Vec::with_capacity(expr.children.len()); + let cost = self.optimizer.cost(); + #[allow(clippy::needless_range_loop)] + for idx in 0..expr.children.len() { + let winner = self + .optimizer + .get_group_winner(expr.children[idx]) + .as_full_winner(); + let stats = winner.map(|x| x.statistics.clone()); + input_stats.push(stats.clone()); + input_cost.push( + winner + .map(|x| x.total_cost.clone()) + .unwrap_or_else(|| cost.zero()), + ); + } + let input_stats_ref = input_stats + .iter() + .map(|x| x.as_ref().map(|y| y.as_ref())) + .collect_vec(); + let operation_cost = cost.compute_operation_cost( + &expr.typ, + predicates, + &input_stats_ref, + context.clone(), + self.optimizer, + ); + let total_cost = cost.sum(&operation_cost, &input_cost); + (input_stats, input_cost, total_cost, operation_cost) + } + + async fn optimize_input_inner(&mut self, ctx: SearchContext, expr_id: ExprId) { + self.steps += 1; + self.optimizer.stats.optimize_input_count += 1; + self.on_task_start(); + let SearchContext { group_id, .. } = ctx; + let desc = TaskDesc::OptimizeInput(expr_id, group_id); + if self.optimizer.has_task_started(&desc) { + trace!(event = "task_skip", task = "optimize_input", expr_id = %expr_id); + return; + } + self.optimizer.mark_task_start(&desc); + + trace!(event = "task_begin", task = "optimize_inputs", expr_id = %expr_id); + + // TODO: assert this plan node satisfies subgoal + + let expr = self.optimizer.get_expr_memoed(expr_id); + let cost = self.optimizer.cost(); + + let predicates = expr + .predicates + .iter() + .map(|pred_id| self.optimizer.get_pred(*pred_id)) + .collect_vec(); + + // The upper bound of the search is the minimum of cost of the current best plan AND the + // upper bound of the context. + let winner_upper_bound = self + .optimizer + .memo() + .get_group_winner(group_id) + .as_full_winner() + .map(|winner| winner.total_weighted_cost); + + let upper_bound = match (ctx.upper_bound, winner_upper_bound) { + (Some(ub), Some(wub)) => Some(ub.min(wub)), + (Some(ub), None) => Some(ub), + (None, Some(wub)) => Some(wub), + (None, None) => None, + }; + + for (input_group_idx, _) in expr.children.iter().enumerate() { + // Before optimizing each of the child, infer a current lower bound cost + let (_, input_costs, total_cost, _) = + self.gather_statistics_and_costs(group_id, expr_id, &expr, &predicates); + + let child_upper_bound = if !self.optimizer.prop.disable_pruning { + let cost_so_far = cost.weighted_cost(&total_cost); + let child_current_cost = input_costs[input_group_idx].clone(); + // TODO: also adds up lower-bound cost + trace!( + event = "compute_cost", + task = "optimize_inputs", + expr_id = %expr_id, + weighted_cost_so_far = cost_so_far, + upper_bound = ?upper_bound, + current_processing = %input_group_idx, + total_child_groups = %expr.children.len()); + if let Some(upper_bound) = upper_bound { + if upper_bound < cost_so_far { + // allow strictly == because we want to replan one of the child + trace!(event = "task_finish", task = "optimize_inputs", expr_id = %expr_id, result = "pruned"); + self.optimizer.mark_task_end(&desc); + return; + } + Some(upper_bound - cost_so_far + cost.weighted_cost(&child_current_cost)) + } else { + None + } + } else { + None + }; + + let child_group_id = expr.children[input_group_idx]; + // always optimize group even if there's a winner b/c we want to replan (versus if there's a full winner then exit) + self.optimize_group(SearchContext { + group_id: child_group_id, + upper_bound: child_upper_bound, + }) + .await; + let child_group_winner = self.optimizer.get_group_winner(child_group_id); + if !child_group_winner.has_full_winner() { + if let Winner::Unknown = self.optimizer.get_group_winner(child_group_id) { + self.optimizer.mark_task_end(&desc); + trace!(event = "task_finish", task = "optimize_inputs", expr_id = %expr_id, result = "impossible"); + return; + } + } + } + + // Compute everything again + let (input_stats, _, total_cost, operation_cost) = + self.gather_statistics_and_costs(group_id, expr_id, &expr, &predicates); + let input_stats_ref = input_stats + .iter() + .map(|x| { + x.as_ref() + .expect("stats should be available for full winners") + .as_ref() + }) + .collect_vec(); + let statistics = Arc::new(cost.derive_statistics( + &expr.typ, + &predicates, + &input_stats_ref, + RelNodeContext { + expr_id, + group_id, + children_group_ids: expr.children.clone(), + }, + self.optimizer, + )); + let proposed_winner = WinnerInfo { + expr_id, + total_cost: total_cost.clone(), + operation_cost: operation_cost.clone(), + total_weighted_cost: cost.weighted_cost(&total_cost), + operation_weighted_cost: cost.weighted_cost(&operation_cost), + statistics, + }; + self.update_winner_if_better(group_id, proposed_winner); + trace!(event = "task_finish", task = "optimize_inputs", expr_id = %expr_id, result = "resolved"); + self.optimizer.mark_task_end(&desc); + } + + fn on_task_start(&self) { + if (self.optimizer.ctx.all_budget_used || self.optimizer.ctx.logical_budget_used) + && self.steps % 100 == 0 + { + println!("out of budget, dumping info"); + println!("step={}", self.steps); + self.optimizer.dump_stats(); + } + } +} diff --git a/optd-datafusion-bridge/src/lib.rs b/optd-datafusion-bridge/src/lib.rs index 0a324760..f4a05474 100644 --- a/optd-datafusion-bridge/src/lib.rs +++ b/optd-datafusion-bridge/src/lib.rs @@ -190,6 +190,7 @@ impl OptdQueryPlanner { if verbose { Some(&meta) } else { None }, ), )); + tracing::debug!("generating optd-join-order"); let join_orders = optimizer .optd_cascades_optimizer() .memo() diff --git a/optd-datafusion-repr-adv-cost/Cargo.toml b/optd-datafusion-repr-adv-cost/Cargo.toml index db342fbe..d71b355f 100644 --- a/optd-datafusion-repr-adv-cost/Cargo.toml +++ b/optd-datafusion-repr-adv-cost/Cargo.toml @@ -21,6 +21,5 @@ rayon = "1.10" itertools = "0.13" test-case = "3.3" tracing = "0.1" -tracing-subscriber = "0.3" optd-gungnir = { path = "../optd-gungnir", version = "0.1" } serde_with = { version = "3.7.0", features = ["json"] } diff --git a/optd-datafusion-repr/Cargo.toml b/optd-datafusion-repr/Cargo.toml index 3fed11de..8511e097 100644 --- a/optd-datafusion-repr/Cargo.toml +++ b/optd-datafusion-repr/Cargo.toml @@ -14,7 +14,6 @@ repository = { workspace = true } anyhow = "1" arrow-schema = "53.3.0" tracing = "0.1" -tracing-subscriber = "0.3" pretty-xmlish = "0.1" itertools = "0.13" optd-core = { path = "../optd-core", version = "0.1" } diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index 1bef36ba..d7d17eec 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -20,7 +20,7 @@ pub use optd_core::nodes::Value; use optd_core::optimizer::Optimizer; use optd_core::rules::Rule; pub use optimizer_ext::OptimizerExt; -use plan_nodes::{ArcDfPlanNode, DfNodeType}; +use plan_nodes::{ArcDfPlanNode, DfNodeType, DfReprPlanNode}; use properties::column_ref::ColumnRefPropertyBuilder; use properties::schema::{Catalog, SchemaPropertyBuilder}; @@ -139,17 +139,20 @@ impl DatafusionOptimizer { ]); Self { runtime_statistics: runtime_map, - cascades_optimizer: CascadesOptimizer::new_with_prop( + cascades_optimizer: CascadesOptimizer::new_with_options( cascades_rules, Box::new(cost_model), vec![ - Box::new(SchemaPropertyBuilder::new(catalog.clone())), - Box::new(ColumnRefPropertyBuilder::new(catalog.clone())), - ], + Box::new(SchemaPropertyBuilder::new(catalog.clone())) + as Box>, + Box::new(ColumnRefPropertyBuilder::new(catalog.clone())) + as Box>, + ] + .into(), OptimizerProperties { panic_on_budget: false, - partial_explore_iter: Some(1 << 20), - partial_explore_space: None, // remove this in the future + partial_explore_iter: Some(1 << 18), + partial_explore_space: Some(1 << 14), disable_pruning: false, }, ), @@ -186,9 +189,12 @@ impl DatafusionOptimizer { rule_wrappers, Box::new(cost_model), vec![ - Box::new(SchemaPropertyBuilder::new(catalog.clone())), - Box::new(ColumnRefPropertyBuilder::new(catalog)), - ], + Box::new(SchemaPropertyBuilder::new(catalog.clone())) + as Box>, + Box::new(ColumnRefPropertyBuilder::new(catalog.clone())) + as Box>, + ] + .into(), ); Self { runtime_statistics, @@ -224,13 +230,40 @@ impl DatafusionOptimizer { self.cascades_optimizer.step_clear(); } - let group_id = self.cascades_optimizer.step_optimize_rel(root_rel)?; + tracing::debug!("before_cascades={}", root_rel.explain_to_string(None)); + + self.cascades_optimizer + .disable_rule_by_name("join_commute_rule"); + self.cascades_optimizer + .disable_rule_by_name("join_assoc_rule"); + let group_id = self + .cascades_optimizer + .step_optimize_rel(root_rel.clone())?; + + tracing::debug!( + "stage_1_best_plan={}", + self.cascades_optimizer + .step_get_optimize_rel(group_id, &mut None)? + .explain_to_string(None) + ); + + self.cascades_optimizer + .enable_rule_by_name("join_commute_rule"); + self.cascades_optimizer + .enable_rule_by_name("join_assoc_rule"); + self.cascades_optimizer.step_next_stage(); + self.cascades_optimizer.fire_optimize_tasks(group_id)?; let mut meta = Some(HashMap::new()); let optimized_rel = self .cascades_optimizer .step_get_optimize_rel(group_id, &mut meta)?; + tracing::debug!( + "stage_2_best_plan={}", + optimized_rel.explain_to_string(None) + ); + Ok((group_id, optimized_rel, meta.unwrap())) } } diff --git a/optd-datafusion-repr/src/memo_ext.rs b/optd-datafusion-repr/src/memo_ext.rs index 3c0f7bb5..cb8bd0c5 100644 --- a/optd-datafusion-repr/src/memo_ext.rs +++ b/optd-datafusion-repr/src/memo_ext.rs @@ -38,7 +38,8 @@ pub trait MemoExt { fn enumerate_join_order_expr_inner + ?Sized>( memo: &M, current: ExprId, - visited: &mut HashMap>, + visited: &mut HashMap>, + warning_fired: &mut bool, ) -> Vec { let expr = memo.get_expr_memoed(current); match &expr.typ { @@ -54,10 +55,12 @@ fn enumerate_join_order_expr_inner + ?Sized>( // Assume child 0 == left, child 1 == right let left = expr.children[0]; let right = expr.children[1]; - let left_join_orders = enumerate_join_order_group_inner(memo, left, visited); - let right_join_orders = enumerate_join_order_group_inner(memo, right, visited); + let left_join_orders = + enumerate_join_order_group_inner(memo, left, visited, warning_fired); + let right_join_orders = + enumerate_join_order_group_inner(memo, right, visited, warning_fired); let mut join_orders = BTreeSet::new(); - for left_join_order in left_join_orders { + for left_join_order in left_join_orders.iter() { for right_join_order in right_join_orders.iter() { join_orders.insert(LogicalJoinOrder::Join( Box::new(left_join_order.clone()), @@ -68,12 +71,22 @@ fn enumerate_join_order_expr_inner + ?Sized>( join_orders.into_iter().collect() } typ if typ.is_logical() => { + const MAX_JOIN_ORDER_OUTPUT: usize = 20; let mut join_orders = BTreeSet::new(); - for (idx, child) in expr.children.iter().enumerate() { - let child_join_orders = enumerate_join_order_group_inner(memo, *child, visited); + 'outer: for (idx, child) in expr.children.iter().enumerate() { + let child_join_orders = + enumerate_join_order_group_inner(memo, *child, visited, warning_fired); if idx == 0 { - for child_join_order in child_join_orders { - join_orders.insert(child_join_order); + for child_join_order in child_join_orders.iter() { + join_orders.insert(child_join_order.clone()); + if join_orders.len() > MAX_JOIN_ORDER_OUTPUT && !*warning_fired { + *warning_fired = true; + tracing::warn!( + "too many join orders, returning the first {} items, TODO: only enumerate join orders when requested", + MAX_JOIN_ORDER_OUTPUT + ); + break 'outer; + } } } else { assert!( @@ -82,7 +95,13 @@ fn enumerate_join_order_expr_inner + ?Sized>( ); } } - join_orders.into_iter().collect() + + join_orders + .iter() + .take(MAX_JOIN_ORDER_OUTPUT) + .map(|x| (*x).clone()) + .collect_vec() + .into() } _ => Vec::new(), } @@ -91,24 +110,26 @@ fn enumerate_join_order_expr_inner + ?Sized>( fn enumerate_join_order_group_inner + ?Sized>( memo: &M, current: GroupId, - visited: &mut HashMap>, -) -> Vec { + visited: &mut HashMap>, + warning_fired: &mut bool, +) -> Arc<[LogicalJoinOrder]> { if let Some(result) = visited.get(¤t) { return result.clone(); } // If the current node is processed again before the result gets populated, simply return an // empty list, as another search path will eventually return a correct for it, and then get // combined with this empty list. - visited.insert(current, Vec::new()); + visited.insert(current, Arc::new([])); let group_exprs = memo.get_all_exprs_in_group(current); let mut join_orders = BTreeSet::new(); for expr_id in group_exprs { - let expr_join_orders = enumerate_join_order_expr_inner(memo, expr_id, visited); + let expr_join_orders = + enumerate_join_order_expr_inner(memo, expr_id, visited, warning_fired); for expr_join_order in expr_join_orders { join_orders.insert(expr_join_order); } } - let res = join_orders.into_iter().collect_vec(); + let res: Arc<[_]> = join_orders.into_iter().collect_vec().into(); visited.insert(current, res.clone()); res } @@ -116,7 +137,10 @@ fn enumerate_join_order_group_inner + ?Sized>( impl> MemoExt for M { fn enumerate_join_order(&self, entry: GroupId) -> Vec { let mut visited = HashMap::new(); - enumerate_join_order_group_inner(self, entry, &mut visited) + enumerate_join_order_group_inner(self, entry, &mut visited, &mut false) + .iter() + .map(|x| x.clone()) + .collect() } } diff --git a/optd-sqlplannertest/Cargo.toml b/optd-sqlplannertest/Cargo.toml index 7ea9e0e4..66f7d130 100644 --- a/optd-sqlplannertest/Cargo.toml +++ b/optd-sqlplannertest/Cargo.toml @@ -25,7 +25,6 @@ datafusion = { version = "43.0.0", features = [ "unicode_expressions", "compression", ] } -env_logger = "0.9" mimalloc = { version = "0.1", default-features = false } regex = "1.8" tokio = { version = "1.24", features = [ @@ -39,6 +38,8 @@ optd-datafusion-bridge = { path = "../optd-datafusion-bridge", version = "0.1" } optd-datafusion-repr = { path = "../optd-datafusion-repr", version = "0.1" } itertools = "0.13" lazy_static = "1.4.0" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } +backtrace-on-stack-overflow = "0.3" [dev-dependencies] criterion = { version = "0.5.1", features = ["async_tokio"] } diff --git a/optd-sqlplannertest/src/bin/planner_test_apply.rs b/optd-sqlplannertest/src/bin/planner_test_apply.rs index 6afca93c..102e3a42 100644 --- a/optd-sqlplannertest/src/bin/planner_test_apply.rs +++ b/optd-sqlplannertest/src/bin/planner_test_apply.rs @@ -24,7 +24,18 @@ struct Cli { #[tokio::main] async fn main() -> Result<()> { - env_logger::init(); + use tracing_subscriber::{filter::LevelFilter, fmt, prelude::*, EnvFilter}; + + tracing_subscriber::registry() + .with(fmt::layer()) + .with( + EnvFilter::builder() + .with_default_directive(LevelFilter::INFO.into()) + .from_env_lossy(), + ) + .init(); + + unsafe { backtrace_on_stack_overflow::enable() }; let cli = Cli::parse(); diff --git a/optd-sqlplannertest/tests/basic/cross_product.planner.sql b/optd-sqlplannertest/tests/basic/cross_product.planner.sql index 0799cc5a..0c61dbf4 100644 --- a/optd-sqlplannertest/tests/basic/cross_product.planner.sql +++ b/optd-sqlplannertest/tests/basic/cross_product.planner.sql @@ -14,10 +14,10 @@ select * from t1, t2; /* LogicalProjection { exprs: [ #0, #1 ] } -└── LogicalJoin { join_type: Cross, cond: true } +└── LogicalJoin { join_type: Inner, cond: true } ├── LogicalScan { table: t1 } └── LogicalScan { table: t2 } -PhysicalNestedLoopJoin { join_type: Cross, cond: true } +PhysicalNestedLoopJoin { join_type: Inner, cond: true } ├── PhysicalScan { table: t1 } └── PhysicalScan { table: t2 } 0 0 diff --git a/optd-sqlplannertest/tests/basic/filter.planner.sql b/optd-sqlplannertest/tests/basic/filter.planner.sql index f3ad2f1c..b3e15052 100644 --- a/optd-sqlplannertest/tests/basic/filter.planner.sql +++ b/optd-sqlplannertest/tests/basic/filter.planner.sql @@ -43,7 +43,7 @@ LogicalProjection { exprs: [ #0, #1, #2, #3 ] } │ │ ├── #0 │ │ └── #2 │ └── false - └── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } ├── LogicalScan { table: t1 } └── LogicalScan { table: t2 } PhysicalEmptyRelation { produce_one_row: false } @@ -63,7 +63,7 @@ LogicalProjection { exprs: [ #0, #1, #2, #3 ] } │ │ ├── #0 │ │ └── #3 │ └── true - └── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } ├── LogicalScan { table: t1 } └── LogicalScan { table: t2 } PhysicalHashJoin { join_type: Inner, left_keys: [ #0, #0 ], right_keys: [ #0, #1 ] } @@ -86,7 +86,7 @@ LogicalProjection { exprs: [ #0, #1, #2, #3 ] } │ │ ├── #0 │ │ └── #3 │ └── true - └── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } ├── LogicalScan { table: t1 } └── LogicalScan { table: t2 } PhysicalFilter @@ -97,7 +97,7 @@ PhysicalFilter │ └── Eq │ ├── #0 │ └── #3 -└── PhysicalNestedLoopJoin { join_type: Cross, cond: true } +└── PhysicalNestedLoopJoin { join_type: Inner, cond: true } ├── PhysicalScan { table: t1 } └── PhysicalScan { table: t2 } 0 0 0 200 @@ -119,10 +119,10 @@ LogicalProjection { exprs: [ #0, #1, #2, #3 ] } │ │ ├── #0 │ │ └── #3 │ └── true - └── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } ├── LogicalScan { table: t1 } └── LogicalScan { table: t2 } -PhysicalNestedLoopJoin { join_type: Cross, cond: true } +PhysicalNestedLoopJoin { join_type: Inner, cond: true } ├── PhysicalScan { table: t1 } └── PhysicalScan { table: t2 } 0 0 0 200 @@ -153,7 +153,7 @@ LogicalProjection { exprs: [ #0, #1, #2, #3 ] } │ └── Eq │ ├── #0 │ └── #2 - └── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } ├── LogicalScan { table: t1 } └── LogicalScan { table: t2 } PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } @@ -254,7 +254,7 @@ LogicalProjection { exprs: [ #0, #1, #2, #3 ] } │ └── true ├── LogicalScan { table: t1 } └── LogicalScan { table: t2 } -PhysicalNestedLoopJoin { join_type: Cross, cond: true } +PhysicalNestedLoopJoin { join_type: Inner, cond: true } ├── PhysicalScan { table: t1 } └── PhysicalScan { table: t2 } 0 0 0 200 diff --git a/optd-sqlplannertest/tests/joins/join_enumerate.planner.sql b/optd-sqlplannertest/tests/joins/join_enumerate.planner.sql index 8aa8a622..5cdad09a 100644 --- a/optd-sqlplannertest/tests/joins/join_enumerate.planner.sql +++ b/optd-sqlplannertest/tests/joins/join_enumerate.planner.sql @@ -31,6 +31,8 @@ select * from t2, t1 where t1v1 = t2v1; select * from t2, t1, t3 where t1v1 = t2v1 and t1v1 = t3v2; /* +(Join t1 (Join t2 t3)) +(Join t1 (Join t3 t2)) (Join t2 (Join t1 t3)) (Join t2 (Join t3 t1)) (Join t3 (Join t1 t2)) @@ -38,8 +40,12 @@ select * from t2, t1, t3 where t1v1 = t2v1 and t1v1 = t3v2; (Join (Join t1 t2) t3) (Join (Join t1 t3) t2) (Join (Join t2 t1) t3) +(Join (Join t2 t3) t1) (Join (Join t3 t1) t2) +(Join (Join t3 t2) t1) +(Join t1 (Join t2 t3)) +(Join t1 (Join t3 t2)) (Join t2 (Join t1 t3)) (Join t2 (Join t3 t1)) (Join t3 (Join t1 t2)) @@ -47,7 +53,9 @@ select * from t2, t1, t3 where t1v1 = t2v1 and t1v1 = t3v2; (Join (Join t1 t2) t3) (Join (Join t1 t3) t2) (Join (Join t2 t1) t3) +(Join (Join t2 t3) t1) (Join (Join t3 t1) t2) +(Join (Join t3 t2) t1) 0 200 0 0 0 300 1 201 1 1 1 301 @@ -58,6 +66,8 @@ select * from t2, t1, t3 where t1v1 = t2v1 and t1v1 = t3v2; select * from t2, t1, t3 where t1v1 = t2v1 and t1v2 = t3v2; /* +(Join t1 (Join t2 t3)) +(Join t1 (Join t3 t2)) (Join t2 (Join t1 t3)) (Join t2 (Join t3 t1)) (Join t3 (Join t1 t2)) @@ -65,8 +75,12 @@ select * from t2, t1, t3 where t1v1 = t2v1 and t1v2 = t3v2; (Join (Join t1 t2) t3) (Join (Join t1 t3) t2) (Join (Join t2 t1) t3) +(Join (Join t2 t3) t1) (Join (Join t3 t1) t2) +(Join (Join t3 t2) t1) +(Join t1 (Join t2 t3)) +(Join t1 (Join t3 t2)) (Join t2 (Join t1 t3)) (Join t2 (Join t3 t1)) (Join t3 (Join t1 t2)) @@ -74,7 +88,9 @@ select * from t2, t1, t3 where t1v1 = t2v1 and t1v2 = t3v2; (Join (Join t1 t2) t3) (Join (Join t1 t3) t2) (Join (Join t2 t1) t3) +(Join (Join t2 t3) t1) (Join (Join t3 t1) t2) +(Join (Join t3 t2) t1) 0 200 0 0 0 300 1 201 1 1 1 301 @@ -86,31 +102,41 @@ select * from t1, (select * from t2, t3) where t1v1 = t2v1 and t1v2 = t3v2; /* (Join t1 (Join t2 t3)) +(Join t1 (Join t3 t2)) +(Join t2 (Join t1 t3)) +(Join t2 (Join t3 t1)) +(Join t3 (Join t1 t2)) +(Join t3 (Join t2 t1)) +(Join (Join t1 t2) t3) +(Join (Join t1 t3) t2) +(Join (Join t2 t1) t3) (Join (Join t2 t3) t1) +(Join (Join t3 t1) t2) +(Join (Join t3 t2) t1) +PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } +├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } +│ ├── PhysicalScan { table: t1 } +│ └── PhysicalScan { table: t2 } +└── PhysicalScan { table: t3 } (Join t1 (Join t2 t3)) +(Join t1 (Join t3 t2)) +(Join t2 (Join t1 t3)) +(Join t2 (Join t3 t1)) +(Join t3 (Join t1 t2)) +(Join t3 (Join t2 t1)) +(Join (Join t1 t2) t3) +(Join (Join t1 t3) t2) +(Join (Join t2 t1) t3) (Join (Join t2 t3) t1) +(Join (Join t3 t1) t2) +(Join (Join t3 t2) t1) -LogicalProjection { exprs: [ #0, #1, #2, #3, #4, #5 ] } -└── LogicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #0 - │ │ └── #2 - │ └── Eq - │ ├── #1 - │ └── #4 - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalScan { table: t1 } - └── LogicalProjection { exprs: [ #0, #1, #2, #3 ] } - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalScan { table: t2 } - └── LogicalScan { table: t3 } -PhysicalHashJoin { join_type: Inner, left_keys: [ #0, #1 ], right_keys: [ #0, #2 ] } -├── PhysicalScan { table: t1 } -└── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - ├── PhysicalScan { table: t2 } - └── PhysicalScan { table: t3 } +PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } +├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } +│ ├── PhysicalScan { table: t1 } +│ └── PhysicalScan { table: t2 } +└── PhysicalScan { table: t3 } 0 0 0 200 0 300 1 1 1 201 1 301 2 2 2 202 2 302 diff --git a/optd-sqlplannertest/tests/joins/join_enumerate.yml b/optd-sqlplannertest/tests/joins/join_enumerate.yml index 9fe19109..5a91791c 100644 --- a/optd-sqlplannertest/tests/joins/join_enumerate.yml +++ b/optd-sqlplannertest/tests/joins/join_enumerate.yml @@ -32,6 +32,6 @@ select * from t1, (select * from t2, t3) where t1v1 = t2v1 and t1v2 = t3v2; desc: Test whether the optimizer enumerates all 3-join orders. (It don't currently) tasks: - - explain[disable_pruning]:logical_join_orders - - explain:logical_join_orders,logical_optd,physical_optd + - explain[disable_pruning]:logical_join_orders,physical_optd + - explain:logical_join_orders,physical_optd - execute diff --git a/optd-sqlplannertest/tests/joins/multi-join.planner.sql b/optd-sqlplannertest/tests/joins/multi-join.planner.sql index ec29505b..d6d87fc4 100644 --- a/optd-sqlplannertest/tests/joins/multi-join.planner.sql +++ b/optd-sqlplannertest/tests/joins/multi-join.planner.sql @@ -21,16 +21,16 @@ LogicalProjection { exprs: [ #0, #1, #2, #3, #4, #5 ] } │ └── Eq │ ├── #3 │ └── #4 - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } + ├── LogicalJoin { join_type: Inner, cond: true } │ ├── LogicalScan { table: t1 } │ └── LogicalScan { table: t2 } └── LogicalScan { table: t3 } -PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } -├── PhysicalScan { table: t1 } -└── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } - ├── PhysicalScan { table: t2 } - └── PhysicalScan { table: t3 } +PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } +├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } +│ ├── PhysicalScan { table: t1 } +│ └── PhysicalScan { table: t2 } +└── PhysicalScan { table: t3 } */ -- test 3-way join @@ -46,8 +46,8 @@ LogicalProjection { exprs: [ #0, #1, #2, #3, #4, #5 ] } │ └── Eq │ ├── #1 │ └── #4 - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } + ├── LogicalJoin { join_type: Inner, cond: true } │ ├── LogicalScan { table: t1 } │ └── LogicalScan { table: t2 } └── LogicalScan { table: t3 } @@ -74,19 +74,19 @@ LogicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6, #7 ] } │ └── Eq │ ├── #5 │ └── #6 - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } + ├── LogicalJoin { join_type: Inner, cond: true } + │ ├── LogicalJoin { join_type: Inner, cond: true } │ │ ├── LogicalScan { table: t1 } │ │ └── LogicalScan { table: t2 } │ └── LogicalScan { table: t3 } └── LogicalScan { table: t4 } -PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } -├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } -│ ├── PhysicalScan { table: t1 } -│ └── PhysicalScan { table: t2 } -└── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } - ├── PhysicalScan { table: t3 } - └── PhysicalScan { table: t4 } +PhysicalHashJoin { join_type: Inner, left_keys: [ #5 ], right_keys: [ #0 ] } +├── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } +│ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } +│ │ ├── PhysicalScan { table: t1 } +│ │ └── PhysicalScan { table: t2 } +│ └── PhysicalScan { table: t3 } +└── PhysicalScan { table: t4 } */ diff --git a/optd-sqlplannertest/tests/joins/self-join.planner.sql b/optd-sqlplannertest/tests/joins/self-join.planner.sql index d0c2124f..29b963d1 100644 --- a/optd-sqlplannertest/tests/joins/self-join.planner.sql +++ b/optd-sqlplannertest/tests/joins/self-join.planner.sql @@ -23,7 +23,7 @@ LogicalSort ├── cond:Eq │ ├── #0 │ └── #2 - └── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } ├── LogicalScan { table: t1 } └── LogicalScan { table: t1 } PhysicalSort diff --git a/optd-sqlplannertest/tests/pushdowns/fliter_transpose.planner.sql b/optd-sqlplannertest/tests/pushdowns/fliter_transpose.planner.sql index 4277bd5e..cd279d92 100644 --- a/optd-sqlplannertest/tests/pushdowns/fliter_transpose.planner.sql +++ b/optd-sqlplannertest/tests/pushdowns/fliter_transpose.planner.sql @@ -20,7 +20,7 @@ LogicalProjection { exprs: [ #0, #1, #3 ] } ├── cond:Eq │ ├── #0 │ └── #2 - └── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } ├── LogicalScan { table: t1 } └── LogicalScan { table: t2 } PhysicalProjection { exprs: [ #0, #1, #3 ] } @@ -28,7 +28,7 @@ PhysicalProjection { exprs: [ #0, #1, #3 ] } ├── cond:Eq │ ├── #0 │ └── #2 - └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + └── PhysicalNestedLoopJoin { join_type: Inner, cond: true } ├── PhysicalScan { table: t1 } └── PhysicalScan { table: t2 } */ @@ -44,7 +44,7 @@ LogicalProjection { exprs: [ #0, #1, #3 ] } ├── cond:Eq │ ├── #0 │ └── #3 - └── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } ├── LogicalScan { table: t1 } └── LogicalScan { table: t2 } PhysicalProjection { exprs: [ #0, #1, #3 ] } @@ -52,7 +52,7 @@ PhysicalProjection { exprs: [ #0, #1, #3 ] } ├── cond:Eq │ ├── #0 │ └── #3 - └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + └── PhysicalNestedLoopJoin { join_type: Inner, cond: true } ├── PhysicalScan { table: t1 } └── PhysicalScan { table: t2 } */ @@ -69,7 +69,7 @@ LogicalProjection { exprs: [ #0, #1, #2 ] } │ ├── #0 │ └── #2 └── LogicalProjection { exprs: [ #0, #1, #3 ] } - └── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } ├── LogicalScan { table: t1 } └── LogicalScan { table: t2 } PhysicalProjection { exprs: [ #0, #1, #3 ] } @@ -77,7 +77,7 @@ PhysicalProjection { exprs: [ #0, #1, #3 ] } ├── cond:Eq │ ├── #0 │ └── #3 - └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + └── PhysicalNestedLoopJoin { join_type: Inner, cond: true } ├── PhysicalScan { table: t1 } └── PhysicalScan { table: t2 } */ diff --git a/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql b/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql index 93f180ce..45068554 100644 --- a/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql +++ b/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql @@ -62,35 +62,36 @@ LogicalProjection { exprs: [ #0, #1 ] } ├── LogicalAgg { exprs: [], groups: [ #0 ] } │ └── LogicalScan { table: t1 } └── LogicalScan { table: t2 } -PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=4033003,io=4000}, stat: {row_cnt=1} } -└── PhysicalFilter - ├── cond:Gt - │ ├── #4 - │ └── 100(i64) - ├── cost: {compute=4033000,io=4000} - ├── stat: {row_cnt=1} - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=4030000,io=4000}, stat: {row_cnt=1000} } - ├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalNestedLoopJoin - ├── join_type: LeftOuter - ├── cond:And - │ └── Eq - │ ├── #0 - │ └── #1 - ├── cost: {compute=4018000,io=3000} - ├── stat: {row_cnt=10000} - ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } - │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalAgg - ├── aggrs:Agg(Sum) - │ └── [ Cast { cast_to: Int64, child: #2 } ] - ├── groups: [ #0 ] - ├── cost: {compute=14000,io=2000} - ├── stat: {row_cnt=1000} - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } - ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } - │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } +PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=4033008,io=4000}, stat: {row_cnt=1} } +└── PhysicalProjection { exprs: [ #0, #1, #2, #4 ], cost: {compute=4033005,io=4000}, stat: {row_cnt=1} } + └── PhysicalFilter + ├── cond:Gt + │ ├── #4 + │ └── 100(i64) + ├── cost: {compute=4033000,io=4000} + ├── stat: {row_cnt=1} + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=4030000,io=4000}, stat: {row_cnt=1000} } + ├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalNestedLoopJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── cost: {compute=4018000,io=3000} + ├── stat: {row_cnt=10000} + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── [ Cast { cast_to: Int64, child: #2 } ] + ├── groups: [ #0 ] + ├── cost: {compute=14000,io=2000} + ├── stat: {row_cnt=1000} + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } */ -- Test whether the optimizer can unnest correlated subqueries with (scalar op group agg) @@ -167,52 +168,53 @@ LogicalProjection { exprs: [ #0, #1 ] } ├── LogicalAgg { exprs: [], groups: [ #0 ] } │ └── LogicalScan { table: t1 } └── LogicalScan { table: t2 } -PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=44228003,io=5000}, stat: {row_cnt=1} } -└── PhysicalFilter - ├── cond:Gt - │ ├── #4 - │ └── 100(i64) - ├── cost: {compute=44228000,io=5000} - ├── stat: {row_cnt=1} - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=44225000,io=5000}, stat: {row_cnt=1000} } - ├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalNestedLoopJoin - ├── join_type: LeftOuter - ├── cond:And - │ └── Eq - │ ├── #0 - │ └── #1 - ├── cost: {compute=44123000,io=4000} - ├── stat: {row_cnt=100000} - ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } - │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalAgg - ├── aggrs:Agg(Sum) - │ └── [ #2 ] - ├── groups: [ #0 ] - ├── cost: {compute=4119000,io=3000} - ├── stat: {row_cnt=10000} - └── PhysicalProjection { exprs: [ #0, #2, #3 ], cost: {compute=4059000,io=3000}, stat: {row_cnt=10000} } - └── PhysicalNestedLoopJoin - ├── join_type: LeftOuter - ├── cond:And - │ └── Eq - │ ├── #0 - │ └── #1 - ├── cost: {compute=4019000,io=3000} - ├── stat: {row_cnt=10000} - ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } - │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalAgg - ├── aggrs:Agg(Sum) - │ └── [ Cast { cast_to: Int64, child: #2 } ] - ├── groups: [ #0, #1 ] - ├── cost: {compute=15000,io=2000} - ├── stat: {row_cnt=1000} - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } - ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } - │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } +PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=44228008,io=5000}, stat: {row_cnt=1} } +└── PhysicalProjection { exprs: [ #0, #1, #2, #4 ], cost: {compute=44228005,io=5000}, stat: {row_cnt=1} } + └── PhysicalFilter + ├── cond:Gt + │ ├── #4 + │ └── 100(i64) + ├── cost: {compute=44228000,io=5000} + ├── stat: {row_cnt=1} + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=44225000,io=5000}, stat: {row_cnt=1000} } + ├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalNestedLoopJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── cost: {compute=44123000,io=4000} + ├── stat: {row_cnt=100000} + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── [ #2 ] + ├── groups: [ #0 ] + ├── cost: {compute=4119000,io=3000} + ├── stat: {row_cnt=10000} + └── PhysicalProjection { exprs: [ #0, #2, #3 ], cost: {compute=4059000,io=3000}, stat: {row_cnt=10000} } + └── PhysicalNestedLoopJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── cost: {compute=4019000,io=3000} + ├── stat: {row_cnt=10000} + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── [ Cast { cast_to: Int64, child: #2 } ] + ├── groups: [ #0, #1 ] + ├── cost: {compute=15000,io=2000} + ├── stat: {row_cnt=1000} + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } */ -- Test whether the optimizer can unnest correlated subqueries with scalar agg in select list @@ -262,29 +264,30 @@ LogicalProjection { exprs: [ #0, #2 ] } ├── LogicalAgg { exprs: [], groups: [ #0 ] } │ └── LogicalScan { table: t1 } └── LogicalScan { table: t2 } -PhysicalProjection { exprs: [ #0, #4 ], cost: {compute=4033000,io=4000}, stat: {row_cnt=1000} } -└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=4030000,io=4000}, stat: {row_cnt=1000} } - ├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalNestedLoopJoin - ├── join_type: LeftOuter - ├── cond:And - │ └── Eq - │ ├── #0 - │ └── #1 - ├── cost: {compute=4018000,io=3000} - ├── stat: {row_cnt=10000} - ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } - │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalAgg - ├── aggrs:Agg(Sum) - │ └── [ Cast { cast_to: Int64, child: #2 } ] - ├── groups: [ #0 ] - ├── cost: {compute=14000,io=2000} - ├── stat: {row_cnt=1000} - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } - ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } - │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } +PhysicalProjection { exprs: [ #0, #3 ], cost: {compute=4038000,io=4000}, stat: {row_cnt=1000} } +└── PhysicalProjection { exprs: [ #0, #1, #2, #4 ], cost: {compute=4035000,io=4000}, stat: {row_cnt=1000} } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=4030000,io=4000}, stat: {row_cnt=1000} } + ├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalNestedLoopJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── cost: {compute=4018000,io=3000} + ├── stat: {row_cnt=10000} + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── [ Cast { cast_to: Int64, child: #2 } ] + ├── groups: [ #0 ] + ├── cost: {compute=14000,io=2000} + ├── stat: {row_cnt=1000} + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } */ -- Test whether the optimizer can unnest correlated subqueries. @@ -312,7 +315,7 @@ LogicalProjection { exprs: [ #0, #1 ] } │ └── Eq │ ├── #1 │ └── #2 - └── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } ├── LogicalScan { table: t2 } └── LogicalScan { table: t3 } LogicalProjection { exprs: [ #0, #1 ] } @@ -353,39 +356,40 @@ LogicalProjection { exprs: [ #0, #1 ] } └── LogicalJoin { join_type: Inner, cond: true } ├── LogicalAgg { exprs: [], groups: [ #0 ] } │ └── LogicalScan { table: t1 } - └── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } ├── LogicalScan { table: t2 } └── LogicalScan { table: t3 } -PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=4036003,io=5000}, stat: {row_cnt=1} } -└── PhysicalFilter - ├── cond:Gt - │ ├── #4 - │ └── 100(i64) - ├── cost: {compute=4036000,io=5000} - ├── stat: {row_cnt=1} - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=4033000,io=5000}, stat: {row_cnt=1000} } - ├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalNestedLoopJoin - ├── join_type: LeftOuter - ├── cond:And - │ └── Eq - │ ├── #0 - │ └── #1 - ├── cost: {compute=4021000,io=4000} - ├── stat: {row_cnt=10000} - ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } - │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalAgg - ├── aggrs:Agg(Sum) - │ └── [ Cast { cast_to: Int64, child: #2 } ] - ├── groups: [ #0 ] - ├── cost: {compute=17000,io=3000} - ├── stat: {row_cnt=1000} - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ], cost: {compute=9000,io=3000}, stat: {row_cnt=1000} } - ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } - │ ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } - │ │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - │ └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalScan { table: t3, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } +PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=4036008,io=5000}, stat: {row_cnt=1} } +└── PhysicalProjection { exprs: [ #0, #1, #2, #4 ], cost: {compute=4036005,io=5000}, stat: {row_cnt=1} } + └── PhysicalFilter + ├── cond:Gt + │ ├── #4 + │ └── 100(i64) + ├── cost: {compute=4036000,io=5000} + ├── stat: {row_cnt=1} + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=4033000,io=5000}, stat: {row_cnt=1000} } + ├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalNestedLoopJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── cost: {compute=4021000,io=4000} + ├── stat: {row_cnt=10000} + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── [ Cast { cast_to: Int64, child: #2 } ] + ├── groups: [ #0 ] + ├── cost: {compute=17000,io=3000} + ├── stat: {row_cnt=1000} + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=9000,io=3000}, stat: {row_cnt=1000} } + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ], cost: {compute=3000,io=2000}, stat: {row_cnt=1000} } + ├── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalScan { table: t3, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } */ diff --git a/optd-sqlplannertest/tests/tpch/q10.planner.sql b/optd-sqlplannertest/tests/tpch/q10.planner.sql index 012a0114..a3814a94 100644 --- a/optd-sqlplannertest/tests/tpch/q10.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q10.planner.sql @@ -68,9 +68,9 @@ LogicalLimit { skip: 0(i64), fetch: 20(i64) } │ └── Eq │ ├── #3 │ └── #33 - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } + ├── LogicalJoin { join_type: Inner, cond: true } + │ ├── LogicalJoin { join_type: Inner, cond: true } │ │ ├── LogicalScan { table: customer } │ │ └── LogicalScan { table: orders } │ └── LogicalScan { table: lineitem } @@ -89,26 +89,26 @@ PhysicalLimit { skip: 0(i64), fetch: 20(i64) } │ └── #23 ├── groups: [ #0, #1, #5, #4, #34, #2, #7 ] └── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } - ├── PhysicalProjection { exprs: [ #25, #26, #27, #28, #29, #30, #31, #32, #16, #17, #18, #19, #20, #21, #22, #23, #24, #0, #1, #2, #3, #4, #5, #6, #7, #8, #9, #10, #11, #12, #13, #14, #15 ] } - │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - │ ├── PhysicalFilter - │ │ ├── cond:Eq - │ │ │ ├── #8 - │ │ │ └── "R" - │ │ └── PhysicalScan { table: lineitem } - │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } - │ ├── PhysicalFilter - │ │ ├── cond:And - │ │ │ ├── Geq - │ │ │ │ ├── #4 - │ │ │ │ └── Cast { cast_to: Date32, child: "1993-07-01" } - │ │ │ └── Lt - │ │ │ ├── #4 - │ │ │ └── Add - │ │ │ ├── Cast { cast_to: Date32, child: "1993-07-01" } - │ │ │ └── INTERVAL_MONTH_DAY_NANO (3, 0, 0) - │ │ └── PhysicalScan { table: orders } - │ └── PhysicalScan { table: customer } + ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #8 ], right_keys: [ #0 ] } + │ ├── PhysicalProjection { exprs: [ #9, #10, #11, #12, #13, #14, #15, #16, #0, #1, #2, #3, #4, #5, #6, #7, #8 ] } + │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } + │ │ ├── PhysicalFilter + │ │ │ ├── cond:And + │ │ │ │ ├── Geq + │ │ │ │ │ ├── #4 + │ │ │ │ │ └── Cast { cast_to: Date32, child: "1993-07-01" } + │ │ │ │ └── Lt + │ │ │ │ ├── #4 + │ │ │ │ └── Add + │ │ │ │ ├── Cast { cast_to: Date32, child: "1993-07-01" } + │ │ │ │ └── INTERVAL_MONTH_DAY_NANO (3, 0, 0) + │ │ │ └── PhysicalScan { table: orders } + │ │ └── PhysicalScan { table: customer } + │ └── PhysicalFilter + │ ├── cond:Eq + │ │ ├── #8 + │ │ └── "R" + │ └── PhysicalScan { table: lineitem } └── PhysicalScan { table: nation } */ diff --git a/optd-sqlplannertest/tests/tpch/q11.planner.sql b/optd-sqlplannertest/tests/tpch/q11.planner.sql index 3e31bceb..2b654217 100644 --- a/optd-sqlplannertest/tests/tpch/q11.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q11.planner.sql @@ -54,8 +54,8 @@ LogicalSort │ │ └── Eq │ │ ├── #13 │ │ └── "CHINA" - │ └── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } + │ └── LogicalJoin { join_type: Inner, cond: true } + │ ├── LogicalJoin { join_type: Inner, cond: true } │ │ ├── LogicalScan { table: partsupp } │ │ └── LogicalScan { table: supplier } │ └── LogicalScan { table: nation } @@ -83,58 +83,58 @@ LogicalSort │ └── Eq │ ├── #13 │ └── "CHINA" - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } + ├── LogicalJoin { join_type: Inner, cond: true } │ ├── LogicalScan { table: partsupp } │ └── LogicalScan { table: supplier } └── LogicalScan { table: nation } PhysicalSort ├── exprs:SortOrder { order: Desc } │ └── #1 -└── PhysicalProjection { exprs: [ #1, #2 ] } +└── PhysicalProjection { exprs: [ #0, #1 ] } └── PhysicalNestedLoopJoin ├── join_type: Inner ├── cond:Gt - │ ├── Cast { cast_to: Decimal128(38, 15), child: #2 } - │ └── #0 - ├── PhysicalProjection - │ ├── exprs:Cast - │ │ ├── cast_to: Decimal128(38, 15) - │ │ ├── child:Mul - │ │ │ ├── Cast { cast_to: Float64, child: #0 } - │ │ │ └── 0.0001(float) + │ ├── Cast { cast_to: Decimal128(38, 15), child: #1 } + │ └── #2 + ├── PhysicalAgg + │ ├── aggrs:Agg(Sum) + │ │ └── Mul + │ │ ├── #3 + │ │ └── Cast { cast_to: Decimal128(10, 0), child: #2 } + │ ├── groups: [ #0 ] + │ └── PhysicalProjection { exprs: [ #11, #12, #13, #14, #15, #4, #5, #6, #7, #8, #9, #10, #0, #1, #2, #3 ] } + │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #4 ], right_keys: [ #1 ] } + │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #3 ] } + │ │ ├── PhysicalFilter + │ │ │ ├── cond:Eq + │ │ │ │ ├── #1 + │ │ │ │ └── "CHINA" + │ │ │ └── PhysicalScan { table: nation } + │ │ └── PhysicalScan { table: supplier } + │ └── PhysicalScan { table: partsupp } + └── PhysicalProjection + ├── exprs:Cast + │ ├── cast_to: Decimal128(38, 15) + │ ├── child:Mul + │ │ ├── Cast { cast_to: Float64, child: #0 } + │ │ └── 0.0001(float) - │ └── PhysicalAgg - │ ├── aggrs:Agg(Sum) - │ │ └── Mul - │ │ ├── #3 - │ │ └── Cast { cast_to: Decimal128(10, 0), child: #2 } - │ ├── groups: [] - │ └── PhysicalProjection { exprs: [ #11, #12, #13, #14, #15, #4, #5, #6, #7, #8, #9, #10, #0, #1, #2, #3 ] } - │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #4 ], right_keys: [ #1 ] } - │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #3 ] } - │ │ ├── PhysicalFilter - │ │ │ ├── cond:Eq - │ │ │ │ ├── #1 - │ │ │ │ └── "CHINA" - │ │ │ └── PhysicalScan { table: nation } - │ │ └── PhysicalScan { table: supplier } - │ └── PhysicalScan { table: partsupp } - └── PhysicalAgg - ├── aggrs:Agg(Sum) - │ └── Mul - │ ├── #3 - │ └── Cast { cast_to: Decimal128(10, 0), child: #2 } - ├── groups: [ #0 ] - └── PhysicalProjection { exprs: [ #11, #12, #13, #14, #15, #4, #5, #6, #7, #8, #9, #10, #0, #1, #2, #3 ] } - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #4 ], right_keys: [ #1 ] } - ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #3 ] } - │ ├── PhysicalFilter - │ │ ├── cond:Eq - │ │ │ ├── #1 - │ │ │ └── "CHINA" - │ │ └── PhysicalScan { table: nation } - │ └── PhysicalScan { table: supplier } - └── PhysicalScan { table: partsupp } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── Mul + │ ├── #3 + │ └── Cast { cast_to: Decimal128(10, 0), child: #2 } + ├── groups: [] + └── PhysicalProjection { exprs: [ #11, #12, #13, #14, #15, #4, #5, #6, #7, #8, #9, #10, #0, #1, #2, #3 ] } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #4 ], right_keys: [ #1 ] } + ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #3 ] } + │ ├── PhysicalFilter + │ │ ├── cond:Eq + │ │ │ ├── #1 + │ │ │ └── "CHINA" + │ │ └── PhysicalScan { table: nation } + │ └── PhysicalScan { table: supplier } + └── PhysicalScan { table: partsupp } */ diff --git a/optd-sqlplannertest/tests/tpch/q12.planner.sql b/optd-sqlplannertest/tests/tpch/q12.planner.sql index 02b122d6..e1c6f934 100644 --- a/optd-sqlplannertest/tests/tpch/q12.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q12.planner.sql @@ -72,7 +72,7 @@ LogicalSort │ └── Lt │ ├── #21 │ └── Cast { cast_to: Date32, child: "1995-01-01" } - └── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } ├── LogicalScan { table: orders } └── LogicalScan { table: lineitem } PhysicalSort diff --git a/optd-sqlplannertest/tests/tpch/q14.planner.sql b/optd-sqlplannertest/tests/tpch/q14.planner.sql index cc3fac47..1d8fd0fb 100644 --- a/optd-sqlplannertest/tests/tpch/q14.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q14.planner.sql @@ -50,7 +50,7 @@ LogicalProjection │ └── Add │ ├── Cast { cast_to: Date32, child: "1995-09-01" } │ └── INTERVAL_MONTH_DAY_NANO (1, 0, 0) - └── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } ├── LogicalScan { table: lineitem } └── LogicalScan { table: part } PhysicalProjection diff --git a/optd-sqlplannertest/tests/tpch/q15.planner.sql b/optd-sqlplannertest/tests/tpch/q15.planner.sql index d66bc87d..9f1b34fe 100644 --- a/optd-sqlplannertest/tests/tpch/q15.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q15.planner.sql @@ -47,7 +47,7 @@ LogicalSort │ ├── #8 │ └── #9 └── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [] } - ├── LogicalJoin { join_type: Cross, cond: true } + ├── LogicalJoin { join_type: Inner, cond: true } │ ├── LogicalScan { table: supplier } │ └── LogicalProjection { exprs: [ #0, #1 ] } │ └── LogicalProjection { exprs: [ #0, #1 ] } @@ -99,29 +99,30 @@ LogicalSort PhysicalSort ├── exprs:SortOrder { order: Asc } │ └── #0 -└── PhysicalProjection { exprs: [ #2, #3, #4, #6, #1 ] } - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } - ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - │ ├── PhysicalAgg - │ │ ├── aggrs:Agg(Sum) - │ │ │ └── Mul - │ │ │ ├── #5 - │ │ │ └── Sub - │ │ │ ├── Cast { cast_to: Decimal128(20, 0), child: 1(i64) } - │ │ │ └── #6 - │ │ ├── groups: [ #2 ] - │ │ └── PhysicalFilter - │ │ ├── cond:And - │ │ │ ├── Geq - │ │ │ │ ├── #10 - │ │ │ │ └── Cast { cast_to: Date32, child: "1993-01-01" } - │ │ │ └── Lt - │ │ │ ├── #10 - │ │ │ └── Add - │ │ │ ├── Cast { cast_to: Date32, child: "1993-01-01" } - │ │ │ └── INTERVAL_MONTH_DAY_NANO (3, 0, 0) - │ │ └── PhysicalScan { table: lineitem } - │ └── PhysicalScan { table: supplier } +└── PhysicalProjection { exprs: [ #0, #1, #2, #4, #8 ] } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #8 ], right_keys: [ #0 ] } + ├── PhysicalProjection { exprs: [ #2, #3, #4, #5, #6, #7, #8, #0, #1 ] } + │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + │ ├── PhysicalAgg + │ │ ├── aggrs:Agg(Sum) + │ │ │ └── Mul + │ │ │ ├── #5 + │ │ │ └── Sub + │ │ │ ├── Cast { cast_to: Decimal128(20, 0), child: 1(i64) } + │ │ │ └── #6 + │ │ ├── groups: [ #2 ] + │ │ └── PhysicalFilter + │ │ ├── cond:And + │ │ │ ├── Geq + │ │ │ │ ├── #10 + │ │ │ │ └── Cast { cast_to: Date32, child: "1993-01-01" } + │ │ │ └── Lt + │ │ │ ├── #10 + │ │ │ └── Add + │ │ │ ├── Cast { cast_to: Date32, child: "1993-01-01" } + │ │ │ └── INTERVAL_MONTH_DAY_NANO (3, 0, 0) + │ │ └── PhysicalScan { table: lineitem } + │ └── PhysicalScan { table: supplier } └── PhysicalAgg ├── aggrs:Agg(Max) │ └── [ #1 ] diff --git a/optd-sqlplannertest/tests/tpch/q16.planner.sql b/optd-sqlplannertest/tests/tpch/q16.planner.sql index b1adf6f6..ca5fdb66 100644 --- a/optd-sqlplannertest/tests/tpch/q16.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q16.planner.sql @@ -59,7 +59,7 @@ LogicalSort │ └── Not │ └── [ #14 ] └── RawDependentJoin { sq_type: Any { pred: PredNode { typ: ColumnRef, children: [], data: Some(UInt64(1)) }, op: Eq }, cond: true, extern_cols: [] } - ├── LogicalJoin { join_type: Cross, cond: true } + ├── LogicalJoin { join_type: Inner, cond: true } │ ├── LogicalScan { table: partsupp } │ └── LogicalScan { table: part } └── LogicalProjection { exprs: [ #0 ] } @@ -96,7 +96,7 @@ PhysicalSort ├── cond:Eq │ ├── #1 │ └── #14 - ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + ├── PhysicalNestedLoopJoin { join_type: Inner, cond: true } │ ├── PhysicalScan { table: partsupp } │ └── PhysicalScan { table: part } └── PhysicalProjection { exprs: [ #0 ] } diff --git a/optd-sqlplannertest/tests/tpch/q17.planner.sql b/optd-sqlplannertest/tests/tpch/q17.planner.sql index c2aa8009..0f9b7e96 100644 --- a/optd-sqlplannertest/tests/tpch/q17.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q17.planner.sql @@ -44,7 +44,7 @@ LogicalProjection │ ├── Cast { cast_to: Decimal128(30, 15), child: #4 } │ └── #25 └── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [ Extern(#16) ] } - ├── LogicalJoin { join_type: Cross, cond: true } + ├── LogicalJoin { join_type: Inner, cond: true } │ ├── LogicalScan { table: lineitem } │ └── LogicalScan { table: part } └── LogicalProjection @@ -74,56 +74,58 @@ PhysicalProjection ├── aggrs:Agg(Sum) │ └── [ #5 ] ├── groups: [] - └── PhysicalProjection { exprs: [ #9, #10, #11, #12, #13, #14, #15, #16, #17, #18, #19, #20, #21, #22, #23, #24, #0, #1, #2, #3, #4, #5, #6, #7, #8, #26 ] } - └── PhysicalNestedLoopJoin - ├── join_type: Inner - ├── cond:And - │ ├── Lt - │ │ ├── Cast { cast_to: Decimal128(30, 15), child: #13 } - │ │ └── #26 - │ └── Eq - │ ├── #0 - │ └── #25 - ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } - │ ├── PhysicalFilter - │ │ ├── cond:And - │ │ │ ├── Eq - │ │ │ │ ├── #3 - │ │ │ │ └── "Brand#13" - │ │ │ └── Eq - │ │ │ ├── #6 - │ │ │ └── "JUMBO PKG" - │ │ └── PhysicalScan { table: part } - │ └── PhysicalScan { table: lineitem } - └── PhysicalProjection - ├── exprs: - │ ┌── #0 - │ └── Cast - │ ├── cast_to: Decimal128(30, 15) - │ ├── child:Mul - │ │ ├── 0.2(float) - │ │ └── Cast { cast_to: Float64, child: #1 } + └── PhysicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6, #7, #8, #9, #10, #11, #12, #13, #14, #15, #16, #17, #18, #19, #20, #21, #22, #23, #24, #26 ] } + └── PhysicalProjection { exprs: [ #11, #12, #13, #14, #15, #16, #17, #18, #19, #20, #21, #22, #23, #24, #25, #26, #2, #3, #4, #5, #6, #7, #8, #9, #10, #0, #1 ] } + └── PhysicalNestedLoopJoin + ├── join_type: Inner + ├── cond:And + │ ├── Eq + │ │ ├── #2 + │ │ └── #12 + │ └── Lt + │ ├── Cast { cast_to: Decimal128(30, 15), child: #15 } + │ └── #1 + ├── PhysicalProjection { exprs: [ #9, #10, #0, #1, #2, #3, #4, #5, #6, #7, #8 ] } + │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + │ ├── PhysicalFilter + │ │ ├── cond:And + │ │ │ ├── Eq + │ │ │ │ ├── #3 + │ │ │ │ └── "Brand#13" + │ │ │ └── Eq + │ │ │ ├── #6 + │ │ │ └── "JUMBO PKG" + │ │ └── PhysicalScan { table: part } + │ └── PhysicalProjection + │ ├── exprs: + │ │ ┌── #0 + │ │ └── Cast + │ │ ├── cast_to: Decimal128(30, 15) + │ │ ├── child:Mul + │ │ │ ├── 0.2(float) + │ │ │ └── Cast { cast_to: Float64, child: #1 } - └── PhysicalProjection { exprs: [ #0, #2 ] } - └── PhysicalNestedLoopJoin - ├── join_type: LeftOuter - ├── cond:And - │ └── Eq - │ ├── #0 - │ └── #1 - ├── PhysicalAgg { aggrs: [], groups: [ #16 ] } - │ └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ ├── PhysicalScan { table: lineitem } - │ └── PhysicalScan { table: part } - └── PhysicalAgg - ├── aggrs:Agg(Avg) - │ └── [ #5 ] - ├── groups: [ #0 ] - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } - ├── PhysicalAgg { aggrs: [], groups: [ #16 ] } - │ └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ ├── PhysicalScan { table: lineitem } - │ └── PhysicalScan { table: part } - └── PhysicalScan { table: lineitem } + │ └── PhysicalProjection { exprs: [ #0, #2 ] } + │ └── PhysicalNestedLoopJoin + │ ├── join_type: LeftOuter + │ ├── cond:And + │ │ └── Eq + │ │ ├── #0 + │ │ └── #1 + │ ├── PhysicalAgg { aggrs: [], groups: [ #16 ] } + │ │ └── PhysicalNestedLoopJoin { join_type: Inner, cond: true } + │ │ ├── PhysicalScan { table: lineitem } + │ │ └── PhysicalScan { table: part } + │ └── PhysicalAgg + │ ├── aggrs:Agg(Avg) + │ │ └── [ #5 ] + │ ├── groups: [ #0 ] + │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } + │ ├── PhysicalAgg { aggrs: [], groups: [ #16 ] } + │ │ └── PhysicalNestedLoopJoin { join_type: Inner, cond: true } + │ │ ├── PhysicalScan { table: lineitem } + │ │ └── PhysicalScan { table: part } + │ └── PhysicalScan { table: lineitem } + └── PhysicalScan { table: lineitem } */ diff --git a/optd-sqlplannertest/tests/tpch/q19.planner.sql b/optd-sqlplannertest/tests/tpch/q19.planner.sql index 5178f9d0..ef2dcef4 100644 --- a/optd-sqlplannertest/tests/tpch/q19.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q19.planner.sql @@ -100,7 +100,7 @@ LogicalProjection { exprs: [ #0 ] } │ └── Eq │ ├── #13 │ └── "DELIVER IN PERSON" - └── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } ├── LogicalScan { table: lineitem } └── LogicalScan { table: part } PhysicalAgg @@ -170,7 +170,7 @@ PhysicalAgg │ └── Eq │ ├── #13 │ └── "DELIVER IN PERSON" - └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + └── PhysicalNestedLoopJoin { join_type: Inner, cond: true } ├── PhysicalScan { table: lineitem } └── PhysicalScan { table: part } */ diff --git a/optd-sqlplannertest/tests/tpch/q2.planner.sql b/optd-sqlplannertest/tests/tpch/q2.planner.sql index 90fb0d2c..6af79d8d 100644 --- a/optd-sqlplannertest/tests/tpch/q2.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q2.planner.sql @@ -82,10 +82,10 @@ LogicalLimit { skip: 0(i64), fetch: 100(i64) } │ ├── #19 │ └── #28 └── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [ Extern(#0) ] } - ├── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } + ├── LogicalJoin { join_type: Inner, cond: true } + │ ├── LogicalJoin { join_type: Inner, cond: true } + │ │ ├── LogicalJoin { join_type: Inner, cond: true } + │ │ │ ├── LogicalJoin { join_type: Inner, cond: true } │ │ │ │ ├── LogicalScan { table: part } │ │ │ │ └── LogicalScan { table: supplier } │ │ │ └── LogicalScan { table: partsupp } @@ -113,9 +113,9 @@ LogicalLimit { skip: 0(i64), fetch: 100(i64) } │ └── Eq │ ├── #17 │ └── "AFRICA" - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } + ├── LogicalJoin { join_type: Inner, cond: true } + │ ├── LogicalJoin { join_type: Inner, cond: true } │ │ ├── LogicalScan { table: partsupp } │ │ └── LogicalScan { table: supplier } │ └── LogicalScan { table: nation } @@ -162,10 +162,10 @@ LogicalLimit { skip: 0(i64), fetch: 100(i64) } ├── cond:Eq │ ├── #0 │ └── #28 - ├── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } + ├── LogicalJoin { join_type: Inner, cond: true } + │ ├── LogicalJoin { join_type: Inner, cond: true } + │ │ ├── LogicalJoin { join_type: Inner, cond: true } + │ │ │ ├── LogicalJoin { join_type: Inner, cond: true } │ │ │ │ ├── LogicalScan { table: part } │ │ │ │ └── LogicalScan { table: supplier } │ │ │ └── LogicalScan { table: partsupp } @@ -180,10 +180,10 @@ LogicalLimit { skip: 0(i64), fetch: 100(i64) } │ ├── #0 │ └── #1 ├── LogicalAgg { exprs: [], groups: [ #0 ] } - │ └── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } + │ └── LogicalJoin { join_type: Inner, cond: true } + │ ├── LogicalJoin { join_type: Inner, cond: true } + │ │ ├── LogicalJoin { join_type: Inner, cond: true } + │ │ │ ├── LogicalJoin { join_type: Inner, cond: true } │ │ │ │ ├── LogicalScan { table: part } │ │ │ │ └── LogicalScan { table: supplier } │ │ │ └── LogicalScan { table: partsupp } @@ -212,18 +212,18 @@ LogicalLimit { skip: 0(i64), fetch: 100(i64) } │ └── "AFRICA" └── LogicalJoin { join_type: Inner, cond: true } ├── LogicalAgg { exprs: [], groups: [ #0 ] } - │ └── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } + │ └── LogicalJoin { join_type: Inner, cond: true } + │ ├── LogicalJoin { join_type: Inner, cond: true } + │ │ ├── LogicalJoin { join_type: Inner, cond: true } + │ │ │ ├── LogicalJoin { join_type: Inner, cond: true } │ │ │ │ ├── LogicalScan { table: part } │ │ │ │ └── LogicalScan { table: supplier } │ │ │ └── LogicalScan { table: partsupp } │ │ └── LogicalScan { table: nation } │ └── LogicalScan { table: region } - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } + ├── LogicalJoin { join_type: Inner, cond: true } + │ ├── LogicalJoin { join_type: Inner, cond: true } │ │ ├── LogicalScan { table: partsupp } │ │ └── LogicalScan { table: supplier } │ └── LogicalScan { table: nation } @@ -239,81 +239,69 @@ PhysicalLimit { skip: 0(i64), fetch: 100(i64) } │ │ └── #1 │ └── SortOrder { order: Asc } │ └── #3 - └── PhysicalProjection { exprs: [ #21, #17, #4, #7, #9, #18, #20, #22 ] } - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #26, #7 ], right_keys: [ #2, #0 ] } - ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #7, #16 ], right_keys: [ #0, #1 ] } - │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #12 ] } - │ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #2 ] } - │ │ │ ├── PhysicalFilter - │ │ │ │ ├── cond:Eq - │ │ │ │ │ ├── #1 - │ │ │ │ │ └── "AFRICA" - │ │ │ │ └── PhysicalScan { table: region } - │ │ │ └── PhysicalScan { table: nation } - │ │ └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ ├── PhysicalFilter - │ │ │ ├── cond:And - │ │ │ │ ├── Eq - │ │ │ │ │ ├── Cast { cast_to: Int64, child: #5 } - │ │ │ │ │ └── 4(i64) - │ │ │ │ └── Like { expr: #4, pattern: "%TIN", negated: false, case_insensitive: false } - │ │ │ └── PhysicalScan { table: part } - │ │ └── PhysicalScan { table: supplier } - │ └── PhysicalScan { table: partsupp } - └── PhysicalNestedLoopJoin - ├── join_type: LeftOuter - ├── cond:And - │ └── Eq - │ ├── #0 - │ └── #1 - ├── PhysicalAgg { aggrs: [], groups: [ #0 ] } - │ └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ │ ├── PhysicalScan { table: part } - │ │ │ │ └── PhysicalScan { table: supplier } - │ │ │ └── PhysicalScan { table: partsupp } - │ │ └── PhysicalScan { table: nation } - │ └── PhysicalScan { table: region } - └── PhysicalAgg - ├── aggrs:Agg(Min) - │ └── [ #4 ] - ├── groups: [ #0 ] - └── PhysicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #0 - │ │ └── #1 - │ ├── Eq - │ │ ├── #6 - │ │ └── #2 - │ ├── Eq - │ │ ├── #9 - │ │ └── #13 - │ ├── Eq - │ │ ├── #15 - │ │ └── #17 - │ └── Eq - │ ├── #18 - │ └── "AFRICA" - └── PhysicalNestedLoopJoin { join_type: Inner, cond: true } + └── PhysicalProjection { exprs: [ #14, #10, #22, #0, #2, #11, #13, #15 ] } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #19, #0 ], right_keys: [ #1, #0 ] } + ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #23 ], right_keys: [ #0 ] } + │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #12 ], right_keys: [ #0 ] } + │ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0, #9 ], right_keys: [ #0, #1 ] } + │ │ │ ├── PhysicalNestedLoopJoin { join_type: Inner, cond: true } + │ │ │ │ ├── PhysicalFilter + │ │ │ │ │ ├── cond:And + │ │ │ │ │ │ ├── Eq + │ │ │ │ │ │ │ ├── Cast { cast_to: Int64, child: #5 } + │ │ │ │ │ │ │ └── 4(i64) + │ │ │ │ │ │ └── Like { expr: #4, pattern: "%TIN", negated: false, case_insensitive: false } + │ │ │ │ │ └── PhysicalScan { table: part } + │ │ │ │ └── PhysicalScan { table: supplier } + │ │ │ └── PhysicalScan { table: partsupp } + │ │ └── PhysicalScan { table: nation } + │ └── PhysicalFilter + │ ├── cond:Eq + │ │ ├── #1 + │ │ └── "AFRICA" + │ └── PhysicalScan { table: region } + └── PhysicalProjection { exprs: [ #0, #2 ] } + └── PhysicalNestedLoopJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── PhysicalAgg { aggrs: [], groups: [ #0 ] } + │ └── PhysicalNestedLoopJoin { join_type: Inner, cond: true } + │ ├── PhysicalNestedLoopJoin { join_type: Inner, cond: true } + │ │ ├── PhysicalNestedLoopJoin { join_type: Inner, cond: true } + │ │ │ ├── PhysicalNestedLoopJoin { join_type: Inner, cond: true } + │ │ │ │ ├── PhysicalScan { table: part } + │ │ │ │ └── PhysicalScan { table: supplier } + │ │ │ └── PhysicalScan { table: partsupp } + │ │ └── PhysicalScan { table: nation } + │ └── PhysicalScan { table: region } + └── PhysicalAgg + ├── aggrs:Agg(Min) + │ └── [ #4 ] + ├── groups: [ #0 ] + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } ├── PhysicalAgg { aggrs: [], groups: [ #0 ] } - │ └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ └── PhysicalNestedLoopJoin { join_type: Inner, cond: true } + │ ├── PhysicalNestedLoopJoin { join_type: Inner, cond: true } + │ │ ├── PhysicalNestedLoopJoin { join_type: Inner, cond: true } + │ │ │ ├── PhysicalNestedLoopJoin { join_type: Inner, cond: true } │ │ │ │ ├── PhysicalScan { table: part } │ │ │ │ └── PhysicalScan { table: supplier } │ │ │ └── PhysicalScan { table: partsupp } │ │ └── PhysicalScan { table: nation } │ └── PhysicalScan { table: region } - └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #14 ], right_keys: [ #0 ] } + ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #8 ], right_keys: [ #0 ] } + │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } │ │ ├── PhysicalScan { table: partsupp } │ │ └── PhysicalScan { table: supplier } │ └── PhysicalScan { table: nation } - └── PhysicalScan { table: region } + └── PhysicalFilter + ├── cond:Eq + │ ├── #1 + │ └── "AFRICA" + └── PhysicalScan { table: region } */ diff --git a/optd-sqlplannertest/tests/tpch/q20.planner.sql b/optd-sqlplannertest/tests/tpch/q20.planner.sql index e5e6065d..2c1aabff 100644 --- a/optd-sqlplannertest/tests/tpch/q20.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q20.planner.sql @@ -52,7 +52,7 @@ LogicalSort │ ├── #8 │ └── "IRAQ" └── RawDependentJoin { sq_type: Any { pred: PredNode { typ: ColumnRef, children: [], data: Some(UInt64(0)) }, op: Eq }, cond: true, extern_cols: [] } - ├── LogicalJoin { join_type: Cross, cond: true } + ├── LogicalJoin { join_type: Inner, cond: true } │ ├── LogicalScan { table: supplier } │ └── LogicalScan { table: nation } └── LogicalProjection { exprs: [ #1 ] } @@ -111,84 +111,90 @@ PhysicalSort ├── cond:Eq │ ├── #0 │ └── #11 - ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + ├── PhysicalNestedLoopJoin { join_type: Inner, cond: true } │ ├── PhysicalScan { table: supplier } │ └── PhysicalScan { table: nation } - └── PhysicalProjection { exprs: [ #4 ] } - └── PhysicalFilter + └── PhysicalProjection { exprs: [ #1 ] } + └── PhysicalNestedLoopJoin + ├── join_type: Inner ├── cond:And - │ ├── #8 - │ └── Gt - │ ├── Cast { cast_to: Float64, child: #5 } - │ └── #2 - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0, #1 ], right_keys: [ #0, #1 ] } - ├── PhysicalProjection - │ ├── exprs: - │ │ ┌── #0 - │ │ ├── #1 - │ │ └── Mul - │ │ ├── 0.5(float) - │ │ └── Cast { cast_to: Float64, child: #2 } - │ └── PhysicalProjection { exprs: [ #0, #1, #4 ] } - │ └── PhysicalNestedLoopJoin - │ ├── join_type: LeftOuter - │ ├── cond:And - │ │ ├── Eq - │ │ │ ├── #0 - │ │ │ └── #2 - │ │ └── Eq - │ │ ├── #1 - │ │ └── #3 - │ ├── PhysicalAgg { aggrs: [], groups: [ #0, #1 ] } - │ │ └── PhysicalNestedLoopJoin - │ │ ├── join_type: LeftMark - │ │ ├── cond:Eq - │ │ │ ├── #0 - │ │ │ └── #5 - │ │ ├── PhysicalScan { table: partsupp } - │ │ └── PhysicalProjection { exprs: [ #0 ] } - │ │ └── PhysicalFilter { cond: Like { expr: #1, pattern: "indian%", negated: false, case_insensitive: false } } - │ │ └── PhysicalScan { table: part } - │ └── PhysicalAgg - │ ├── aggrs:Agg(Sum) - │ │ └── [ #6 ] - │ ├── groups: [ #0, #1 ] - │ └── PhysicalFilter - │ ├── cond:And - │ │ ├── Eq - │ │ │ ├── #3 - │ │ │ └── #0 - │ │ ├── Eq - │ │ │ ├── #4 - │ │ │ └── #1 - │ │ ├── Geq - │ │ │ ├── #12 - │ │ │ └── Cast { cast_to: Date32, child: "1996-01-01" } - │ │ └── Lt - │ │ ├── #12 - │ │ └── Add - │ │ ├── Cast { cast_to: Date32, child: "1996-01-01" } - │ │ └── INTERVAL_MONTH_DAY_NANO (12, 0, 0) - │ └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ ├── PhysicalAgg { aggrs: [], groups: [ #0, #1 ] } - │ │ └── PhysicalNestedLoopJoin - │ │ ├── join_type: LeftMark - │ │ ├── cond:Eq - │ │ │ ├── #0 - │ │ │ └── #5 - │ │ ├── PhysicalScan { table: partsupp } - │ │ └── PhysicalProjection { exprs: [ #0 ] } - │ │ └── PhysicalFilter { cond: Like { expr: #1, pattern: "indian%", negated: false, case_insensitive: false } } - │ │ └── PhysicalScan { table: part } - │ └── PhysicalScan { table: lineitem } - └── PhysicalNestedLoopJoin - ├── join_type: LeftMark - ├── cond:Eq - │ ├── #0 - │ └── #5 - ├── PhysicalScan { table: partsupp } - └── PhysicalProjection { exprs: [ #0 ] } - └── PhysicalFilter { cond: Like { expr: #1, pattern: "indian%", negated: false, case_insensitive: false } } - └── PhysicalScan { table: part } + │ ├── Gt + │ │ ├── Cast { cast_to: Float64, child: #2 } + │ │ └── #8 + │ ├── Eq + │ │ ├── #0 + │ │ └── #6 + │ └── Eq + │ ├── #1 + │ └── #7 + ├── PhysicalFilter { cond: #5 } + │ └── PhysicalNestedLoopJoin + │ ├── join_type: LeftMark + │ ├── cond:Eq + │ │ ├── #0 + │ │ └── #5 + │ ├── PhysicalScan { table: partsupp } + │ └── PhysicalProjection { exprs: [ #0 ] } + │ └── PhysicalFilter { cond: Like { expr: #1, pattern: "indian%", negated: false, case_insensitive: false } } + │ └── PhysicalScan { table: part } + └── PhysicalProjection + ├── exprs: + │ ┌── #0 + │ ├── #1 + │ └── Mul + │ ├── 0.5(float) + │ └── Cast { cast_to: Float64, child: #2 } + └── PhysicalProjection { exprs: [ #0, #1, #4 ] } + └── PhysicalNestedLoopJoin + ├── join_type: LeftOuter + ├── cond:And + │ ├── Eq + │ │ ├── #0 + │ │ └── #2 + │ └── Eq + │ ├── #1 + │ └── #3 + ├── PhysicalAgg { aggrs: [], groups: [ #0, #1 ] } + │ └── PhysicalNestedLoopJoin + │ ├── join_type: LeftMark + │ ├── cond:Eq + │ │ ├── #0 + │ │ └── #5 + │ ├── PhysicalScan { table: partsupp } + │ └── PhysicalProjection { exprs: [ #0 ] } + │ └── PhysicalFilter { cond: Like { expr: #1, pattern: "indian%", negated: false, case_insensitive: false } } + │ └── PhysicalScan { table: part } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── [ #6 ] + ├── groups: [ #0, #1 ] + └── PhysicalFilter + ├── cond:And + │ ├── Eq + │ │ ├── #3 + │ │ └── #0 + │ ├── Eq + │ │ ├── #4 + │ │ └── #1 + │ ├── Geq + │ │ ├── #12 + │ │ └── Cast { cast_to: Date32, child: "1996-01-01" } + │ └── Lt + │ ├── #12 + │ └── Add + │ ├── Cast { cast_to: Date32, child: "1996-01-01" } + │ └── INTERVAL_MONTH_DAY_NANO (12, 0, 0) + └── PhysicalNestedLoopJoin { join_type: Inner, cond: true } + ├── PhysicalAgg { aggrs: [], groups: [ #0, #1 ] } + │ └── PhysicalNestedLoopJoin + │ ├── join_type: LeftMark + │ ├── cond:Eq + │ │ ├── #0 + │ │ └── #5 + │ ├── PhysicalScan { table: partsupp } + │ └── PhysicalProjection { exprs: [ #0 ] } + │ └── PhysicalFilter { cond: Like { expr: #1, pattern: "indian%", negated: false, case_insensitive: false } } + │ └── PhysicalScan { table: part } + └── PhysicalScan { table: lineitem } */ diff --git a/optd-sqlplannertest/tests/tpch/q22.planner.sql b/optd-sqlplannertest/tests/tpch/q22.planner.sql index 89f93534..ebd4253b 100644 --- a/optd-sqlplannertest/tests/tpch/q22.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q22.planner.sql @@ -127,44 +127,46 @@ PhysicalSort │ └── Eq │ ├── #0 │ └── #9 - ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ ├── PhysicalScan { table: customer } - │ └── PhysicalAgg - │ ├── aggrs:Agg(Avg) - │ │ └── [ #5 ] - │ ├── groups: [] - │ └── PhysicalFilter - │ ├── cond:And - │ │ ├── Gt - │ │ │ ├── Cast { cast_to: Decimal128(30, 15), child: #5 } - │ │ │ └── Cast { cast_to: Decimal128(30, 15), child: 0(float) } - │ │ └── InList - │ │ ├── expr:Scalar(Substr) - │ │ │ └── [ #4, 1(i64), 2(i64) ] - │ │ ├── list: [ "13", "31", "23", "29", "30", "18", "17" ] - │ │ ├── negated: false + ├── PhysicalProjection { exprs: [ #1, #2, #3, #4, #5, #6, #7, #8, #0 ] } + │ └── PhysicalNestedLoopJoin { join_type: Inner, cond: true } + │ ├── PhysicalAgg + │ │ ├── aggrs:Agg(Avg) + │ │ │ └── [ #5 ] + │ │ ├── groups: [] + │ │ └── PhysicalFilter + │ │ ├── cond:And + │ │ │ ├── Gt + │ │ │ │ ├── Cast { cast_to: Decimal128(30, 15), child: #5 } + │ │ │ │ └── Cast { cast_to: Decimal128(30, 15), child: 0(float) } + │ │ │ └── InList + │ │ │ ├── expr:Scalar(Substr) + │ │ │ │ └── [ #4, 1(i64), 2(i64) ] + │ │ │ ├── list: [ "13", "31", "23", "29", "30", "18", "17" ] + │ │ │ ├── negated: false - │ └── PhysicalScan { table: customer } + │ │ └── PhysicalScan { table: customer } + │ └── PhysicalScan { table: customer } └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } ├── PhysicalAgg { aggrs: [], groups: [ #0 ] } - │ └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ ├── PhysicalScan { table: customer } - │ └── PhysicalAgg - │ ├── aggrs:Agg(Avg) - │ │ └── [ #5 ] - │ ├── groups: [] - │ └── PhysicalFilter - │ ├── cond:And - │ │ ├── Gt - │ │ │ ├── Cast { cast_to: Decimal128(30, 15), child: #5 } - │ │ │ └── Cast { cast_to: Decimal128(30, 15), child: 0(float) } - │ │ └── InList - │ │ ├── expr:Scalar(Substr) - │ │ │ └── [ #4, 1(i64), 2(i64) ] - │ │ ├── list: [ "13", "31", "23", "29", "30", "18", "17" ] - │ │ ├── negated: false + │ └── PhysicalProjection { exprs: [ #1, #2, #3, #4, #5, #6, #7, #8, #0 ] } + │ └── PhysicalNestedLoopJoin { join_type: Inner, cond: true } + │ ├── PhysicalAgg + │ │ ├── aggrs:Agg(Avg) + │ │ │ └── [ #5 ] + │ │ ├── groups: [] + │ │ └── PhysicalFilter + │ │ ├── cond:And + │ │ │ ├── Gt + │ │ │ │ ├── Cast { cast_to: Decimal128(30, 15), child: #5 } + │ │ │ │ └── Cast { cast_to: Decimal128(30, 15), child: 0(float) } + │ │ │ └── InList + │ │ │ ├── expr:Scalar(Substr) + │ │ │ │ └── [ #4, 1(i64), 2(i64) ] + │ │ │ ├── list: [ "13", "31", "23", "29", "30", "18", "17" ] + │ │ │ ├── negated: false - │ └── PhysicalScan { table: customer } + │ │ └── PhysicalScan { table: customer } + │ └── PhysicalScan { table: customer } └── PhysicalScan { table: orders } */ diff --git a/optd-sqlplannertest/tests/tpch/q3.planner.sql b/optd-sqlplannertest/tests/tpch/q3.planner.sql index 9c7e0bbc..63f44707 100644 --- a/optd-sqlplannertest/tests/tpch/q3.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q3.planner.sql @@ -56,8 +56,8 @@ LogicalLimit { skip: 0(i64), fetch: 10(i64) } │ └── Gt │ ├── #27 │ └── Cast { cast_to: Date32, child: "1995-03-29" } - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } + ├── LogicalJoin { join_type: Inner, cond: true } │ ├── LogicalScan { table: customer } │ └── LogicalScan { table: orders } └── LogicalScan { table: lineitem } @@ -77,22 +77,22 @@ PhysicalLimit { skip: 0(i64), fetch: 10(i64) } │ ├── Cast { cast_to: Decimal128(20, 0), child: 1(i64) } │ └── #23 ├── groups: [ #17, #12, #15 ] - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } - ├── PhysicalFilter - │ ├── cond:Eq - │ │ ├── #6 - │ │ └── "FURNITURE" - │ └── PhysicalScan { table: customer } - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - ├── PhysicalFilter - │ ├── cond:Lt - │ │ ├── #4 - │ │ └── Cast { cast_to: Date32, child: "1995-03-29" } - │ └── PhysicalScan { table: orders } - └── PhysicalFilter - ├── cond:Gt - │ ├── #10 - │ └── Cast { cast_to: Date32, child: "1995-03-29" } - └── PhysicalScan { table: lineitem } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #8 ], right_keys: [ #0 ] } + ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } + │ ├── PhysicalFilter + │ │ ├── cond:Eq + │ │ │ ├── #6 + │ │ │ └── "FURNITURE" + │ │ └── PhysicalScan { table: customer } + │ └── PhysicalFilter + │ ├── cond:Lt + │ │ ├── #4 + │ │ └── Cast { cast_to: Date32, child: "1995-03-29" } + │ └── PhysicalScan { table: orders } + └── PhysicalFilter + ├── cond:Gt + │ ├── #10 + │ └── Cast { cast_to: Date32, child: "1995-03-29" } + └── PhysicalScan { table: lineitem } */ diff --git a/optd-sqlplannertest/tests/tpch/q5.planner.sql b/optd-sqlplannertest/tests/tpch/q5.planner.sql index 227a03a7..dadb2e67 100644 --- a/optd-sqlplannertest/tests/tpch/q5.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q5.planner.sql @@ -66,11 +66,11 @@ LogicalSort │ └── Lt │ ├── #12 │ └── Cast { cast_to: Date32, child: "2024-01-01" } - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } + ├── LogicalJoin { join_type: Inner, cond: true } + │ ├── LogicalJoin { join_type: Inner, cond: true } + │ │ ├── LogicalJoin { join_type: Inner, cond: true } + │ │ │ ├── LogicalJoin { join_type: Inner, cond: true } │ │ │ │ ├── LogicalScan { table: customer } │ │ │ │ └── LogicalScan { table: orders } │ │ │ └── LogicalScan { table: lineitem } @@ -91,20 +91,20 @@ PhysicalSort └── PhysicalHashJoin { join_type: Inner, left_keys: [ #42 ], right_keys: [ #0 ] } ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #36 ], right_keys: [ #0 ] } │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #19, #3 ], right_keys: [ #0, #3 ] } - │ │ ├── PhysicalProjection { exprs: [ #25, #26, #27, #28, #29, #30, #31, #32, #0, #1, #2, #3, #4, #5, #6, #7, #8, #9, #10, #11, #12, #13, #14, #15, #16, #17, #18, #19, #20, #21, #22, #23, #24 ] } - │ │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } - │ │ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - │ │ │ │ ├── PhysicalFilter - │ │ │ │ │ ├── cond:And - │ │ │ │ │ │ ├── Geq - │ │ │ │ │ │ │ ├── #4 - │ │ │ │ │ │ │ └── Cast { cast_to: Date32, child: "2023-01-01" } - │ │ │ │ │ │ └── Lt - │ │ │ │ │ │ ├── #4 - │ │ │ │ │ │ └── Cast { cast_to: Date32, child: "2024-01-01" } - │ │ │ │ │ └── PhysicalScan { table: orders } - │ │ │ │ └── PhysicalScan { table: lineitem } - │ │ │ └── PhysicalScan { table: customer } + │ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #8 ], right_keys: [ #0 ] } + │ │ │ ├── PhysicalProjection { exprs: [ #9, #10, #11, #12, #13, #14, #15, #16, #0, #1, #2, #3, #4, #5, #6, #7, #8 ] } + │ │ │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } + │ │ │ │ ├── PhysicalFilter + │ │ │ │ │ ├── cond:And + │ │ │ │ │ │ ├── Geq + │ │ │ │ │ │ │ ├── #4 + │ │ │ │ │ │ │ └── Cast { cast_to: Date32, child: "2023-01-01" } + │ │ │ │ │ │ └── Lt + │ │ │ │ │ │ ├── #4 + │ │ │ │ │ │ └── Cast { cast_to: Date32, child: "2024-01-01" } + │ │ │ │ │ └── PhysicalScan { table: orders } + │ │ │ │ └── PhysicalScan { table: customer } + │ │ │ └── PhysicalScan { table: lineitem } │ │ └── PhysicalScan { table: supplier } │ └── PhysicalScan { table: nation } └── PhysicalFilter diff --git a/optd-sqlplannertest/tests/tpch/q7.planner.sql b/optd-sqlplannertest/tests/tpch/q7.planner.sql index 2bf8f752..10687b7d 100644 --- a/optd-sqlplannertest/tests/tpch/q7.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q7.planner.sql @@ -97,11 +97,11 @@ LogicalSort │ │ ├── #45 │ │ └── "FRANCE" │ └── Between { child: #17, lower: Cast { cast_to: Date32, child: "1995-01-01" }, upper: Cast { cast_to: Date32, child: "1996-12-31" } } - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } + ├── LogicalJoin { join_type: Inner, cond: true } + │ ├── LogicalJoin { join_type: Inner, cond: true } + │ │ ├── LogicalJoin { join_type: Inner, cond: true } + │ │ │ ├── LogicalJoin { join_type: Inner, cond: true } │ │ │ │ ├── LogicalScan { table: supplier } │ │ │ │ └── LogicalScan { table: lineitem } │ │ │ └── LogicalScan { table: orders } @@ -154,13 +154,13 @@ PhysicalSort │ └── "FRANCE" ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] } │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #24 ], right_keys: [ #0 ] } - │ │ ├── PhysicalProjection { exprs: [ #16, #17, #18, #19, #20, #21, #22, #0, #1, #2, #3, #4, #5, #6, #7, #8, #9, #10, #11, #12, #13, #14, #15, #23, #24, #25, #26, #27, #28, #29, #30, #31 ] } - │ │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } - │ │ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ] } - │ │ │ │ ├── PhysicalFilter { cond: Between { child: #10, lower: Cast { cast_to: Date32, child: "1995-01-01" }, upper: Cast { cast_to: Date32, child: "1996-12-31" } } } - │ │ │ │ │ └── PhysicalScan { table: lineitem } - │ │ │ │ └── PhysicalScan { table: supplier } - │ │ │ └── PhysicalScan { table: orders } + │ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #7 ], right_keys: [ #0 ] } + │ │ │ ├── PhysicalProjection { exprs: [ #16, #17, #18, #19, #20, #21, #22, #0, #1, #2, #3, #4, #5, #6, #7, #8, #9, #10, #11, #12, #13, #14, #15 ] } + │ │ │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ] } + │ │ │ │ ├── PhysicalFilter { cond: Between { child: #10, lower: Cast { cast_to: Date32, child: "1995-01-01" }, upper: Cast { cast_to: Date32, child: "1996-12-31" } } } + │ │ │ │ │ └── PhysicalScan { table: lineitem } + │ │ │ │ └── PhysicalScan { table: supplier } + │ │ │ └── PhysicalScan { table: orders } │ │ └── PhysicalScan { table: customer } │ └── PhysicalScan { table: nation } └── PhysicalScan { table: nation } diff --git a/optd-sqlplannertest/tests/tpch/q8.planner.sql b/optd-sqlplannertest/tests/tpch/q8.planner.sql index 557fe219..c6decc3f 100644 --- a/optd-sqlplannertest/tests/tpch/q8.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q8.planner.sql @@ -100,13 +100,13 @@ LogicalSort │ └── Eq │ ├── #4 │ └── "ECONOMY ANODIZED STEEL" - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } + ├── LogicalJoin { join_type: Inner, cond: true } + │ ├── LogicalJoin { join_type: Inner, cond: true } + │ │ ├── LogicalJoin { join_type: Inner, cond: true } + │ │ │ ├── LogicalJoin { join_type: Inner, cond: true } + │ │ │ │ ├── LogicalJoin { join_type: Inner, cond: true } + │ │ │ │ │ ├── LogicalJoin { join_type: Inner, cond: true } │ │ │ │ │ │ ├── LogicalScan { table: part } │ │ │ │ │ │ └── LogicalScan { table: supplier } │ │ │ │ │ └── LogicalScan { table: lineitem } @@ -149,22 +149,24 @@ PhysicalSort │ └── #54 └── PhysicalHashJoin { join_type: Inner, left_keys: [ #51 ], right_keys: [ #0 ] } ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #12 ], right_keys: [ #0 ] } - │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #16 ], right_keys: [ #0 ] } - │ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0, #9 ], right_keys: [ #1, #2 ] } - │ │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ │ ├── PhysicalFilter - │ │ │ │ │ ├── cond:Eq - │ │ │ │ │ │ ├── #4 - │ │ │ │ │ │ └── "ECONOMY ANODIZED STEEL" - │ │ │ │ │ └── PhysicalScan { table: part } - │ │ │ │ └── PhysicalScan { table: supplier } - │ │ │ └── PhysicalScan { table: lineitem } - │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #12 ], right_keys: [ #0 ] } - │ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } - │ │ │ ├── PhysicalFilter { cond: Between { child: #4, lower: Cast { cast_to: Date32, child: "1995-01-01" }, upper: Cast { cast_to: Date32, child: "1996-12-31" } } } - │ │ │ │ └── PhysicalScan { table: orders } - │ │ │ └── PhysicalScan { table: customer } - │ │ └── PhysicalScan { table: nation } + │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #44 ], right_keys: [ #0 ] } + │ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #33 ], right_keys: [ #0 ] } + │ │ │ ├── PhysicalProjection { exprs: [ #25, #26, #27, #28, #29, #30, #31, #32, #33, #34, #35, #36, #37, #38, #39, #40, #0, #1, #2, #3, #4, #5, #6, #7, #8, #9, #10, #11, #12, #13, #14, #15, #16, #17, #18, #19, #20, #21, #22, #23, #24 ] } + │ │ │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ] } + │ │ │ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] } + │ │ │ │ │ ├── PhysicalProjection { exprs: [ #9, #10, #11, #12, #13, #14, #15, #16, #17, #18, #19, #20, #21, #22, #23, #24, #0, #1, #2, #3, #4, #5, #6, #7, #8 ] } + │ │ │ │ │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + │ │ │ │ │ │ ├── PhysicalFilter { cond: Between { child: #4, lower: Cast { cast_to: Date32, child: "1995-01-01" }, upper: Cast { cast_to: Date32, child: "1996-12-31" } } } + │ │ │ │ │ │ │ └── PhysicalScan { table: orders } + │ │ │ │ │ │ └── PhysicalScan { table: lineitem } + │ │ │ │ │ └── PhysicalFilter + │ │ │ │ │ ├── cond:Eq + │ │ │ │ │ │ ├── #4 + │ │ │ │ │ │ └── "ECONOMY ANODIZED STEEL" + │ │ │ │ │ └── PhysicalScan { table: part } + │ │ │ │ └── PhysicalScan { table: supplier } + │ │ │ └── PhysicalScan { table: customer } + │ │ └── PhysicalScan { table: nation } │ └── PhysicalScan { table: nation } └── PhysicalFilter ├── cond:Eq diff --git a/optd-sqlplannertest/tests/tpch/q9.planner.sql b/optd-sqlplannertest/tests/tpch/q9.planner.sql index 54c12b39..cddaed28 100644 --- a/optd-sqlplannertest/tests/tpch/q9.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q9.planner.sql @@ -79,11 +79,11 @@ LogicalSort │ │ ├── #12 │ │ └── #46 │ └── Like { expr: #1, pattern: "%green%", negated: false, case_insensitive: false } - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } + └── LogicalJoin { join_type: Inner, cond: true } + ├── LogicalJoin { join_type: Inner, cond: true } + │ ├── LogicalJoin { join_type: Inner, cond: true } + │ │ ├── LogicalJoin { join_type: Inner, cond: true } + │ │ │ ├── LogicalJoin { join_type: Inner, cond: true } │ │ │ │ ├── LogicalScan { table: part } │ │ │ │ └── LogicalScan { table: supplier } │ │ │ └── LogicalScan { table: lineitem } @@ -118,7 +118,7 @@ PhysicalSort ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #16 ], right_keys: [ #0 ] } │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #18, #17 ], right_keys: [ #1, #0 ] } │ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #9, #0 ], right_keys: [ #2, #1 ] } - │ │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ │ │ ├── PhysicalNestedLoopJoin { join_type: Inner, cond: true } │ │ │ │ ├── PhysicalFilter { cond: Like { expr: #1, pattern: "%green%", negated: false, case_insensitive: false } } │ │ │ │ │ └── PhysicalScan { table: part } │ │ │ │ └── PhysicalScan { table: supplier }