feat: printing q-errors nicely + explains (#122)

**Summary**: Now printing aggregate q-errors, per-query q-errors, and EXPLAIN results to facilitate experimentation. **Demo**: ![Screenshot 2024-03-21 at 13 34 13](https://github.com/cmu-db/optd/assets/20631215/795f42f8-42a6-45f2-bd5e-c44fd6b8cff3) ![Screenshot 2024-03-21 at 13 28 24](https://github.com/cmu-db/optd/assets/20631215/f9b1be74-1e48-4878-a4f9-26c7f68dd49c) **Details**: * Automatically prints the `EXPLAIN` results of all queries from all DBs to info. Use `RUST_LOG=info` to see them. Use `--query-ids X` to just investigate one specific query. * Now handles edge case where file indicating that a db was created exists but the db doesn't actually exist. * Gracefully handles the case of infinite q-errors. * Deleted dev_scripts. Moved to [gungnir-experiments](https://github.com/wangpatrick57/gungnir-experiments). * Uses `prettytable-rs` for printing.
cmu-db · Mar 21, 2024 · 368bcd6 · 368bcd6
1 parent cdef513
commit 368bcd6
Show file tree

Hide file tree

Showing 11 changed files with 448 additions and 141 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/dev_scripts/kill_postgres.sh b/dev_scripts/kill_postgres.sh
diff --git a/dev_scripts/start_postgres.sh b/dev_scripts/start_postgres.sh
diff --git a/optd-perftest/Cargo.toml b/optd-perftest/Cargo.toml
@@ -39,6 +39,8 @@ env_logger = "0.11"
 lazy_static = "1.4.0"
 tokio-util = "0.7"
 futures-util = "0.3"
+statistical = "1.0"
+prettytable-rs = "0.10"
 
 [dev_dependencies]
 assert_cmd = "2.0"
diff --git a/optd-perftest/src/cardtest.rs b/optd-perftest/src/cardtest.rs
@@ -7,21 +7,21 @@ use crate::{benchmark::Benchmark, datafusion_db::DatafusionDb, tpch::TpchConfig}
 use anyhow::{self};
 use async_trait::async_trait;
 
-/// This struct performs cardinality testing across one or more databases.
-/// Another design would be for the CardtestRunnerDBHelper trait to expose a function
+/// This struct performs cardinality testing across one or more dbmss.
+/// Another design would be for the CardtestRunnerDBMSHelper trait to expose a function
 ///   to evaluate the Q-error. However, I chose not to do this design for reasons
-///   described in the comments of the CardtestRunnerDBHelper trait. This is why
-///   you would use CardtestRunner even for computing the Q-error of a single database.
+///   described in the comments of the CardtestRunnerDBMSHelper trait. This is why
+///   you would use CardtestRunner even for computing the Q-error of a single dbms.
 pub struct CardtestRunner {
-    pub databases: Vec<Box<dyn CardtestRunnerDBHelper>>,
+    pub dbmss: Vec<Box<dyn CardtestRunnerDBMSHelper>>,
 }
 
 impl CardtestRunner {
-    pub async fn new(databases: Vec<Box<dyn CardtestRunnerDBHelper>>) -> anyhow::Result<Self> {
-        Ok(CardtestRunner { databases })
+    pub async fn new(dbmss: Vec<Box<dyn CardtestRunnerDBMSHelper>>) -> anyhow::Result<Self> {
+        Ok(CardtestRunner { dbmss })
     }
 
-    /// Get the Q-error of a query using the cost models of all databases being tested
+    /// Get the Q-error of a query using the cost models of all dbmss being tested
     /// Q-error is defined in [Leis 2015](https://15721.courses.cs.cmu.edu/spring2024/papers/16-costmodels/p204-leis.pdf)
     /// One detail not specified in the paper is that Q-error is based on the ratio of true and estimated cardinality
     ///   of the entire query, not of a subtree of the query. This detail is specified in Section 7.1 of
@@ -32,16 +32,16 @@ impl CardtestRunner {
     ) -> anyhow::Result<HashMap<String, Vec<f64>>> {
         let mut qerrors_alldbs = HashMap::new();
 
-        for database in &mut self.databases {
-            let estcards = database.eval_benchmark_estcards(benchmark).await?;
-            let truecards = database.eval_benchmark_truecards(benchmark).await?;
+        for dbms in &mut self.dbmss {
+            let estcards = dbms.eval_benchmark_estcards(benchmark).await?;
+            let truecards = dbms.eval_benchmark_truecards(benchmark).await?;
             assert!(truecards.len() == estcards.len());
             let qerrors = estcards
                 .into_iter()
                 .zip(truecards.into_iter())
                 .map(|(estcard, truecard)| CardtestRunner::calc_qerror(estcard, truecard))
                 .collect();
-            qerrors_alldbs.insert(String::from(database.get_name()), qerrors);
+            qerrors_alldbs.insert(String::from(dbms.get_name()), qerrors);
         }
 
         Ok(qerrors_alldbs)
@@ -55,27 +55,27 @@ impl CardtestRunner {
     }
 }
 
-/// This trait defines helper functions to enable cardinality testing on a database
+/// This trait defines helper functions to enable cardinality testing on a dbms
 /// The reason a "get qerror" function is not exposed is to allow for greater
-///   flexibility. If we exposed "get qerror" for each database, we would need to
-///   get the true and estimated cardinalities for _each_ database. However, we
-///   can now choose to only get the true cardinalities of _one_ database to
+///   flexibility. If we exposed "get qerror" for each dbms, we would need to
+///   get the true and estimated cardinalities for _each_ dbms. However, we
+///   can now choose to only get the true cardinalities of _one_ dbms to
 ///   improve performance or even cache the true cardinalities. Additionally, if
-///   we do want to get the true cardinalities of all databases, we can compare
+///   we do want to get the true cardinalities of all dbmss, we can compare
 ///   them against each other to ensure they're all equal. All these options are
 ///   possible when exposing "get true card" and "get est card" instead of a
 ///   single "get qerror". If you want to compute the Q-error of a single
-///   database, just create a CardtestRunner with a single database as input.
+///   dbms, just create a CardtestRunner with a single dbms as input.
 /// When exposing a "get true card" and "get est card" interface, you could
 ///   ostensibly do it on the granularity of a single SQL string or on the
 ///   granularity of an entire benchmark. I chose the latter for a simple reason:
-///   different databases might have different SQL strings for the same conceptual
-///   query (see how qgen in tpch-kit takes in database as an input).
+///   different dbmss might have different SQL strings for the same conceptual
+///   query (see how qgen in tpch-kit takes in dbms as an input).
 /// When more performance tests are implemented, you would probably want to extract
-///   get_name() into a generic "Database" trait.
+///   get_name() into a generic "DBMS" trait.
 #[async_trait]
-pub trait CardtestRunnerDBHelper {
-    // get_name() has &self so that we're able to do Box<dyn CardtestRunnerDBHelper>
+pub trait CardtestRunnerDBMSHelper {
+    // get_name() has &self so that we're able to do Box<dyn CardtestRunnerDBMSHelper>
     fn get_name(&self) -> &str;
 
     // The order of queries has to be the same between these two functions.
@@ -97,12 +97,12 @@ pub async fn cardtest<P: AsRef<Path> + Clone>(
 ) -> anyhow::Result<HashMap<String, Vec<f64>>> {
     let pg_db = PostgresDb::new(workspace_dpath.clone(), pguser, pgpassword);
     let df_db = DatafusionDb::new(workspace_dpath).await?;
-    let databases: Vec<Box<dyn CardtestRunnerDBHelper>> = vec![Box::new(pg_db), Box::new(df_db)];
+    let dbmss: Vec<Box<dyn CardtestRunnerDBMSHelper>> = vec![Box::new(pg_db), Box::new(df_db)];
 
     let tpch_benchmark = Benchmark::Tpch(tpch_config.clone());
-    let mut cardtest_runner = CardtestRunner::new(databases).await?;
-    let qerrors = cardtest_runner
+    let mut cardtest_runner = CardtestRunner::new(dbmss).await?;
+    let qerrors_alldbs = cardtest_runner
         .eval_benchmark_qerrors_alldbs(&tpch_benchmark)
         .await?;
-    Ok(qerrors)
+    Ok(qerrors_alldbs)
 }
diff --git a/optd-perftest/src/datafusion_db.rs b/optd-perftest/src/datafusion_db.rs
@@ -6,7 +6,7 @@ use std::{
 
 use crate::{
     benchmark::Benchmark,
-    cardtest::CardtestRunnerDBHelper,
+    cardtest::CardtestRunnerDBMSHelper,
     tpch::{TpchConfig, TpchKit},
 };
 use async_trait::async_trait;
@@ -35,7 +35,7 @@ pub struct DatafusionDb {
 }
 
 #[async_trait]
-impl CardtestRunnerDBHelper for DatafusionDb {
+impl CardtestRunnerDBMSHelper for DatafusionDb {
     fn get_name(&self) -> &str {
         "DataFusion"
     }
@@ -170,11 +170,22 @@ impl DatafusionDb {
         Ok(num_rows)
     }
 
+    fn log_explain(&self, explains: &[Vec<String>]) {
+        // row_cnt is exclusively in physical_plan after optd
+        let physical_plan_after_optd_lines = explains
+            .iter()
+            .find(|explain| explain.first().unwrap() == "physical_plan after optd")
+            .unwrap();
+        let explain_str = physical_plan_after_optd_lines.join("\n");
+        log::info!("{} {}", self.get_name(), explain_str);
+    }
+
     async fn eval_query_estcard(&self, sql: &str) -> anyhow::Result<usize> {
         lazy_static! {
             static ref ROW_CNT_RE: Regex = Regex::new(r"row_cnt=(\d+\.\d+)").unwrap();
         }
         let explains = Self::execute(&self.ctx, &format!("explain verbose {}", sql)).await?;
+        self.log_explain(&explains);
         // Find first occurrence of row_cnt=... in the output.
         let row_cnt = explains
             .iter()

diff --git a/optd-perftest/src/main.rs b/optd-perftest/src/main.rs
@@ -1,9 +1,9 @@
+use clap::{Parser, Subcommand};
 use optd_perftest::cardtest;
 use optd_perftest::shell;
 use optd_perftest::tpch::{TpchConfig, TPCH_KIT_POSTGRES};
-use std::fs;
-
-use clap::{Parser, Subcommand};
+use prettytable::{format, Cell, Row, Table};
+use std::{fs, iter};
 
 #[derive(Parser)]
 struct Cli {
@@ -66,14 +66,75 @@ async fn main() -> anyhow::Result<()> {
             pgpassword,
         } => {
             let tpch_config = TpchConfig {
-                database: String::from(TPCH_KIT_POSTGRES),
+                dbms: String::from(TPCH_KIT_POSTGRES),
                 scale_factor,
                 seed,
-                query_ids,
+                query_ids: query_ids.clone(),
             };
-            let qerrors =
+            let qerrors_alldbs =
                 cardtest::cardtest(&workspace_dpath, &pguser, &pgpassword, tpch_config).await?;
-            println!("qerrors={:?}", qerrors);
+            println!(" Aggregate Q-Error Comparison");
+            let mut agg_qerror_table = Table::new();
+            agg_qerror_table.set_titles(prettytable::row![
+                "DBMS",
+                "Median",
+                "# Infinite",
+                "Mean",
+                "Min",
+                "Max"
+            ]);
+            for (dbms, qerrors) in &qerrors_alldbs {
+                if !qerrors.is_empty() {
+                    let finite_qerrors: Vec<f64> = qerrors
+                        .clone()
+                        .into_iter()
+                        .filter(|&qerror| qerror.is_finite())
+                        .collect();
+                    let ninf_qerrors = qerrors.len() - finite_qerrors.len();
+                    let mean_qerror =
+                        finite_qerrors.iter().sum::<f64>() / finite_qerrors.len() as f64;
+                    let min_qerror = finite_qerrors
+                        .iter()
+                        .min_by(|a, b| a.partial_cmp(b).unwrap())
+                        .unwrap();
+                    let median_qerror = statistical::median(qerrors);
+                    let max_qerror = finite_qerrors
+                        .iter()
+                        .max_by(|a, b| a.partial_cmp(b).unwrap())
+                        .unwrap();
+                    agg_qerror_table.add_row(prettytable::row![
+                        dbms,
+                        median_qerror,
+                        ninf_qerrors,
+                        mean_qerror,
+                        min_qerror,
+                        max_qerror
+                    ]);
+                } else {
+                    agg_qerror_table
+                        .add_row(prettytable::row![dbms, "N/A", "N/A", "N/A", "N/A", "N/A"]);
+                }
+            }
+            agg_qerror_table.set_format(*format::consts::FORMAT_NO_LINESEP_WITH_TITLE);
+            agg_qerror_table.printstd();
+
+            let mut per_query_qerror_table = Table::new();
+            println!(" Per-Query Q-Error Comparison");
+            let title_cells = iter::once(Cell::new("Query #"))
+                .chain(qerrors_alldbs.keys().map(|dbms| Cell::new(dbms)))
+                .collect();
+            per_query_qerror_table.set_titles(Row::new(title_cells));
+            for (i, query_id) in query_ids.iter().enumerate() {
+                let mut row_cells = vec![];
+                row_cells.push(prettytable::cell!(query_id));
+                for qerrors in qerrors_alldbs.values() {
+                    let qerror = qerrors.get(i).unwrap();
+                    row_cells.push(prettytable::cell!(qerror));
+                }
+                per_query_qerror_table.add_row(Row::new(row_cells));
+            }
+            per_query_qerror_table.set_format(*format::consts::FORMAT_NO_LINESEP_WITH_TITLE);
+            per_query_qerror_table.printstd();
         }
     }