From ef9e65a1700d5f30f98273862cf69528e8812fb9 Mon Sep 17 00:00:00 2001 From: David Koloski Date: Thu, 6 Jul 2023 19:48:47 -0400 Subject: [PATCH] Rewrite automation in Rust --- .github/workflows/bench.yml | 5 +- .gitignore | 2 - Cargo.toml | 35 ++- build.rs | 6 +- {tools => prebuilt}/capnp.exe | Bin {tools => prebuilt}/capnpc-c++.exe | Bin {tools => prebuilt}/capnpc-capnp.exe | Bin {tools => prebuilt}/flatc.exe | Bin {tools => prebuilt}/protoc.exe | Bin .../README.md.template | 6 +- tools/bencher/Cargo.toml | 11 + tools/bencher/src/main.rs | 91 ++++++ tools/config.json | 9 + tools/formatter/Cargo.toml | 13 + tools/formatter/src/main.rs | 243 ++++++++++++++++ tools/parser/Cargo.toml | 15 + tools/parser/src/main.rs | 87 ++++++ tools/schema/Cargo.toml | 10 + tools/schema/src/lib.rs | 65 +++++ update_benchmark.mjs | 265 ------------------ 20 files changed, 586 insertions(+), 277 deletions(-) rename {tools => prebuilt}/capnp.exe (100%) rename {tools => prebuilt}/capnpc-c++.exe (100%) rename {tools => prebuilt}/capnpc-capnp.exe (100%) rename {tools => prebuilt}/flatc.exe (100%) rename {tools => prebuilt}/protoc.exe (100%) rename README.md.template => tools/README.md.template (96%) create mode 100644 tools/bencher/Cargo.toml create mode 100644 tools/bencher/src/main.rs create mode 100644 tools/config.json create mode 100644 tools/formatter/Cargo.toml create mode 100644 tools/formatter/src/main.rs create mode 100644 tools/parser/Cargo.toml create mode 100644 tools/parser/src/main.rs create mode 100644 tools/schema/Cargo.toml create mode 100644 tools/schema/src/lib.rs delete mode 100644 update_benchmark.mjs diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 0dc34bf..4fbc201 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -3,6 +3,9 @@ name: bench on: workflow_dispatch: push: + paths-ignore: + - 'README.md' + - 'benchmark_results/**' env: CI: true @@ -47,7 +50,7 @@ jobs: git config --global user.name github-actions git config --global user.email github-actions@github.com - node update_benchmark.mjs + cargo run -p bencher git add -A benchmark_results git add README.md diff --git a/.gitignore b/.gitignore index 329a6a3..45dfe28 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,3 @@ -*.log -.date Cargo.lock /target /.vscode diff --git a/Cargo.toml b/Cargo.toml index a204085..fdc5998 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,36 @@ -[package] +[workspace] +members = [ + "tools/bencher", + "tools/formatter", + "tools/parser", + "tools/schema", +] + +[workspace.package] +version = "0.1.0" authors = ["David Koloski "] -build = "build.rs" edition = "2021" +license = "MIT" +publish = false + +[workspace.dependencies] +cargo_metadata = "0.15" +clap = { version = "4", features = ["derive"] } +regex = "1.9" +schema = { path = "tools/schema" } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +tempfile = "3.6" +time = "0.3" + +[package] name = "rust_serialization_benchmark" -version = "0.1.1" +version.workspace = true +authors.workspace = true +edition.workspace = true +license.workspace = true +publish.workspace = true +build = "build.rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # Some features may require multiple dependencies to compile properly @@ -26,7 +53,7 @@ bytecheck = { version = "0.6.10", optional = true } bytemuck = { version = "1.13.1", optional = true } capnp = { version = "0.16.1", optional = true } ciborium = { version = "0.2.0", optional = true } -criterion = "0.4.0" +criterion = "0.5" dlhn = { version = "0.1.4", optional = true } flatbuffers = { version = "23.1.21", optional = true } libflate = "1.3.0" diff --git a/build.rs b/build.rs index ec7ea8a..240463d 100644 --- a/build.rs +++ b/build.rs @@ -18,7 +18,7 @@ fn bebop_compile_dataset(name: &'static str) { fn capnpc_compile_dataset(name: &'static str) -> capnp::Result<()> { let mut command = capnpc::CompilerCommand::new(); #[cfg(windows)] - command.capnp_executable("tools/capnp.exe"); + command.capnp_executable("prebuilt/capnp.exe"); command.file(&format!("src/datasets/{0}/{0}.capnp", name)); command.output_path("."); command.default_parent_module(vec!["datasets".into(), name.into()]); @@ -27,7 +27,7 @@ fn capnpc_compile_dataset(name: &'static str) -> capnp::Result<()> { fn flatc_compile_dataset(name: &'static str) -> flatc_rust::Result<()> { #[cfg(windows)] - let flatc = flatc_rust::Flatc::from_path("./tools/flatc.exe"); + let flatc = flatc_rust::Flatc::from_path("./prebuilt/flatc.exe"); #[cfg(not(windows))] let flatc = flatc_rust::Flatc::from_env_path(); @@ -45,7 +45,7 @@ fn prost_compile_dataset(name: &'static str) -> std::io::Result<()> { if cfg!(windows) { match env::var("PROTOC") { Err(_) => { - env::set_var("PROTOC", "./tools/protoc.exe"); + env::set_var("PROTOC", "./prebuilt/protoc.exe"); } _ => {} } diff --git a/tools/capnp.exe b/prebuilt/capnp.exe similarity index 100% rename from tools/capnp.exe rename to prebuilt/capnp.exe diff --git a/tools/capnpc-c++.exe b/prebuilt/capnpc-c++.exe similarity index 100% rename from tools/capnpc-c++.exe rename to prebuilt/capnpc-c++.exe diff --git a/tools/capnpc-capnp.exe b/prebuilt/capnpc-capnp.exe similarity index 100% rename from tools/capnpc-capnp.exe rename to prebuilt/capnpc-capnp.exe diff --git a/tools/flatc.exe b/prebuilt/flatc.exe similarity index 100% rename from tools/flatc.exe rename to prebuilt/flatc.exe diff --git a/tools/protoc.exe b/prebuilt/protoc.exe similarity index 100% rename from tools/protoc.exe rename to prebuilt/protoc.exe diff --git a/README.md.template b/tools/README.md.template similarity index 96% rename from README.md.template rename to tools/README.md.template index c07f9a6..dc44691 100644 --- a/README.md.template +++ b/tools/README.md.template @@ -1,3 +1,5 @@ +{dne} + # Rust serialization benchmark The goal of these benchmarks is to provide thorough and complete benchmarks for various rust @@ -24,9 +26,9 @@ Zero-copy deserialization libraries have an additional set of benchmarks: Some benchmark results may be italicized and followed by an asterisk. Mouse over these for more details on what situation was benchmarked. Other footnotes are located at the bottom. -## Last updated: ${date} +## Last updated: {date} -${results} +{tables}{links} ## Footnotes: diff --git a/tools/bencher/Cargo.toml b/tools/bencher/Cargo.toml new file mode 100644 index 0000000..021ac40 --- /dev/null +++ b/tools/bencher/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "bencher" +version.workspace = true +authors.workspace = true +edition.workspace = true +license.workspace = true +publish.workspace = true + +[dependencies] +tempfile.workspace = true +time.workspace = true diff --git a/tools/bencher/src/main.rs b/tools/bencher/src/main.rs new file mode 100644 index 0000000..b619dfc --- /dev/null +++ b/tools/bencher/src/main.rs @@ -0,0 +1,91 @@ +use std::{fs, path::PathBuf, process::Command}; + +use tempfile::NamedTempFile; +use time::OffsetDateTime; + +fn main() { + let now = OffsetDateTime::now_utc(); + + let metadata_path = NamedTempFile::new().unwrap().into_temp_path(); + let metadata = Command::new("cargo") + .args(["metadata"]) + .output() + .unwrap() + .stdout; + fs::write(&metadata_path, metadata).unwrap(); + + let mut bench_path = PathBuf::from("benchmark_results"); + bench_path.push(&format!( + "{}-{}-{}_{}-{}-{}", + now.year(), + now.month() as usize, + now.day(), + now.hour(), + now.minute(), + now.second(), + )); + + let mut log_path = bench_path.clone(); + log_path.set_extension("log"); + let log = Command::new("cargo") + .args(["bench"]) + .output() + .unwrap() + .stdout; + fs::write(&log_path, log).unwrap(); + + let mut json_path = bench_path.clone(); + json_path.set_extension("json"); + Command::new("cargo") + .args([ + "run", + "-p", + "parser", + "--", + "--log", + ]) + .arg(&log_path) + .arg("--meta") + .arg(&metadata_path) + .arg("--output") + .arg(&json_path) + .status() + .unwrap(); + + let mut config_path = PathBuf::from("tools"); + config_path.push("config.json"); + + let mut template_path = PathBuf::from("tools"); + template_path.push("README.md.template"); + + Command::new("cargo") + .args([ + "run", + "-p", + "formatter", + "--", + ]) + .arg(&json_path) + .arg("--config") + .arg(&config_path) + .arg("--template") + .arg(&template_path) + .args([ + "--date", + &format!( + "{}-{}-{} {}:{}:{}", + now.year(), + now.month() as usize, + now.day(), + now.hour(), + now.minute(), + now.second(), + ), + "--output", + "README.md", + ]) + .status() + .unwrap(); + + metadata_path.close().unwrap(); +} diff --git a/tools/config.json b/tools/config.json new file mode 100644 index 0000000..c7d280a --- /dev/null +++ b/tools/config.json @@ -0,0 +1,9 @@ +{ + "descriptions": { + "log": "This data set is composed of HTTP request logs that are small and contain many strings.", + "mesh": "This data set is a single mesh. The mesh contains an array of triangles, each of which has three vertices and a normal vector.", + "mk48": "This data set is composed of mk48.io game updates that contain data with many exploitable patterns and invariants.", + "minecraft_savedata": "This data set is composed of Minecraft player saves that contain highly structured data." + }, + "do_not_edit": "\n" +} diff --git a/tools/formatter/Cargo.toml b/tools/formatter/Cargo.toml new file mode 100644 index 0000000..82ad78b --- /dev/null +++ b/tools/formatter/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "formatter" +version.workspace = true +authors.workspace = true +edition.workspace = true +license.workspace = true +publish.workspace = true + +[dependencies] +clap.workspace = true +schema.workspace = true +serde.workspace = true +serde_json.workspace = true diff --git a/tools/formatter/src/main.rs b/tools/formatter/src/main.rs new file mode 100644 index 0000000..a39d1a8 --- /dev/null +++ b/tools/formatter/src/main.rs @@ -0,0 +1,243 @@ +use std::{fs, path::PathBuf, fmt::{Write, Display, self}, collections::HashMap}; + +use clap::Parser; + +use schema::{Results, Dataset, Meta, Bench, Values}; +use serde::{Deserialize, Serialize}; + +#[derive(Parser, Debug)] +#[command(name = "formatter")] +#[command(about = "Formats the README.md template using the parsed data from a benchmarking run")] +struct Args { + input: PathBuf, + #[arg(short, long)] + config: PathBuf, + #[arg(short, long)] + template: PathBuf, + #[arg(short, long)] + date: String, + #[arg(short, long)] + output: PathBuf, +} + +#[derive(Deserialize, Serialize)] +struct Config { + descriptions: HashMap, + do_not_edit: String, +} + +fn main() { + let args = Args::parse(); + + let results = serde_json::from_str::(&fs::read_to_string(args.input).unwrap()).unwrap(); + let config = serde_json::from_str::(&fs::read_to_string(args.config).unwrap()).unwrap(); + let template = fs::read_to_string(args.template).unwrap(); + + fs::write(args.output, format(&results, &config, &template, &args.date).unwrap()).unwrap(); +} + +struct Tables { + header: String, + data: String, + comparison: String, +} + +struct Nanos(f64); + +impl Display for Nanos { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let (value, suffix) = if self.0 > 1_000_000.0 { + (self.0 / 1_000_000.0, "ms") + } else if self.0 > 1_000.0 { + (self.0 / 1_000.0, "µs") + } else { + (self.0, "ns") + }; + write!(f, "{value:.*} {suffix}", 4 - value.log10().floor() as usize) + } +} + +struct Bytes(u64); + +impl Display for Bytes { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +struct Percent(f64, f64); + +impl Display for Percent { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:.2}%", self.0 / self.1 * 100.0) + } +} + +fn format_values( + values: &Values, + output: &mut String, + display: impl Fn(T) -> U, +) -> fmt::Result { + if let Some(value) = values.primary { + write!(output, " {}", display(value))?; + } + + for (name, value) in values.variants.iter() { + write!(output, " *{}\\**", display(*value))?; + } + write!(output, " |")?; + Ok(()) +} + +fn write_crate_row( + output: &mut String, + crate_name: &str, + meta: &Meta, +) -> fmt::Result { + write!(output, "| [{crate_name} {}][{crate_name}] |", meta.crate_versions[crate_name]) +} + +pub fn capitalize(s: &str) -> String { + let mut c = s.chars(); + match c.next() { + None => String::new(), + Some(f) => f.to_uppercase().collect::() + c.as_str(), + } +} + +fn build_tables( + dataset: &Dataset, + columns: &[&str], + meta: &Meta, + placeholder: &str, +) -> Result { + let mut header = "| Crate |".to_string(); + for column in columns { + write!(&mut header, " {} |", capitalize(column))?; + } + write!(&mut header, "\n|---|")?; + for _ in columns { + write!(&mut header, "--:|")?; + } + + let mut data = String::new(); + let mut comparison = String::new(); + + let mins = columns.iter().cloned().map(|col| { + dataset.crates.values().filter_map(|crate_| crate_.benches.get(col)).map(|bench| { + match bench { + Bench::Nanos(values) => values.iter().cloned().reduce(f64::min).unwrap(), + Bench::Bytes(values) => values.iter().cloned().min().unwrap() as f64, + } + }).reduce(f64::min).unwrap() + }).collect::>(); + + for (crate_name, crate_) in dataset.crates.iter() { + if !columns.iter().all(|&c| crate_.benches.get(c).is_none()) { + write_crate_row(&mut data, crate_name, meta)?; + write_crate_row(&mut comparison, crate_name, meta)?; + + for (&column, &min) in columns.iter().zip(mins.iter()) { + if let Some(bench) = crate_.benches.get(column) { + match bench { + Bench::Nanos(values) => { + format_values(values, &mut data, Nanos)?; + format_values(values, &mut comparison, |x| Percent(min, x))?; + } + Bench::Bytes(values) => { + format_values(values, &mut data, Bytes)?; + format_values(values, &mut comparison, |x| Percent(min, x as f64))?; + } + } + } else { + write!(&mut data, " {placeholder} |")?; + write!(&mut comparison, " {placeholder} |")?; + } + } + write!(&mut data, "\n")?; + write!(&mut comparison, "\n")?; + } + } + + Ok(Tables { + header, + data, + comparison, + }) +} + +fn format( + results: &Results, + config: &Config, + template: &str, + date: &str, +) -> Result { + const SERDE_COLS: &[&str] = &["serialize", "deserialize", "size", "zlib", "zstd"]; + const ZCD_COLS: &[&str] = &["access", "read", "update"]; + + let mut tables = String::new(); + + for (dataset_name, dataset) in results.datasets.iter() { + let serde_tables = build_tables(dataset, SERDE_COLS, &results.meta, "†")?; + let zcd_tables = build_tables(dataset, ZCD_COLS, &results.meta, "‡")?; + + write!( + &mut tables, + "\ + ## `{dataset_name}`\n\ + \n\ + {}\n\ + \n\ + ### Raw data\n\ + \n\ + For operations, time per iteration; for size, bytes. Lower is better.\n\ + \n\ + #### Serialize / deserialize speed and size\n\ + \n\ + {}\n\ + {}\n\ + #### Zero-copy deserialization speed\n\ + \n\ + {}\n\ + {}\n\ + ### Comparison\n\ + \n\ + Relative to best. Higher is better.\n\ + \n\ + #### Serialize / deserialize speed and size\n\ + \n\ + {}\n\ + {}\n\ + #### Zero-copy deserialization speed\n\ + \n\ + {}\n\ + {}\n\ + ", + config.descriptions.get(dataset_name).map(|desc| desc.as_str()).unwrap_or("Missing dataset description"), + serde_tables.header, + serde_tables.data, + zcd_tables.header, + zcd_tables.data, + serde_tables.header, + serde_tables.comparison, + zcd_tables.header, + zcd_tables.comparison, + )?; + } + + let mut links = String::new(); + for (crate_name, version) in results.meta.crate_versions.iter() { + write!( + &mut links, + "[{crate_name}]: https://crates.io/crates/{crate_name}/{version}\n", + )?; + } + + Ok( + template + .replace("{dne}", &config.do_not_edit) + .replace("{date}", date) + .replace("{tables}", &tables) + .replace("{links}", &links) + ) +} diff --git a/tools/parser/Cargo.toml b/tools/parser/Cargo.toml new file mode 100644 index 0000000..61e9ef3 --- /dev/null +++ b/tools/parser/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "parser" +version.workspace = true +authors.workspace = true +edition.workspace = true +license.workspace = true +publish.workspace = true + +[dependencies] +cargo_metadata.workspace = true +schema.workspace = true +clap.workspace = true +regex.workspace = true +serde.workspace = true +serde_json.workspace = true diff --git a/tools/parser/src/main.rs b/tools/parser/src/main.rs new file mode 100644 index 0000000..ce98538 --- /dev/null +++ b/tools/parser/src/main.rs @@ -0,0 +1,87 @@ +use std::{path::PathBuf, fs}; +use cargo_metadata::Metadata; +use clap::Parser; +use regex::Regex; + +use schema::{Dataset, Crate, Bench, Results, Meta}; + +#[derive(Parser, Debug)] +#[command(name = "parser")] +#[command(about = "Parses benchmark logs from rust_serialization_benchmark into JSON", long_about = None)] +struct Args { + #[arg(long)] + log: PathBuf, + #[arg(long)] + meta: PathBuf, + #[arg(short, long)] + output: PathBuf, +} + +fn parse_time(s: &str) -> f64 { + let (n, unit) = s.split_once(' ').unwrap(); + let factor = match unit { + "ns" => 1.0, + "µs" => 1_000.0, + "ms" => 1_000_000.0, + _ => panic!("unrecognized unit of time"), + }; + n.parse::().unwrap() * factor +} + +fn main() { + let args = Args::parse(); + + let log = fs::read_to_string(&args.log).unwrap(); + let metadata = serde_json::from_str::(&fs::read_to_string(args.meta).unwrap()).unwrap(); + + let time_benches_re = Regex::new( + r"(?m)^([a-z0-9_\-]+)\/([a-z_\-]+)\/([a-z\-]+)(?: \(([a-z \-+]*)\))?\s+time: \[\d+\.\d+ [µnm]s (\d+\.\d+ [µnm]s)" + ).unwrap(); + let size_benches_re = Regex::new( + r"(?m)^([a-z0-9_\-]+)\/([a-z_\-]+)\/(size|zlib|zstd) (\d+)" + ).unwrap(); + + let mut results = Results::default(); + + for capture in time_benches_re.captures_iter(&log) { + let crate_name = &capture[2]; + + let dataset = results.datasets.entry(capture[1].to_string()).or_insert(Dataset::default()); + let crate_ = dataset.crates.entry(crate_name.to_string()).or_insert(Crate::default()); + let bench = crate_.benches.entry(capture[3].to_string()).or_insert(Bench::nanos()); + let values = bench.unwrap_nanos(); + + let value = parse_time(&capture[5]); + if let Some(variant) = capture.get(4) { + values.variants.insert(variant.as_str().to_string(), value); + } else { + values.primary = Some(value); + } + + register_crate_version(&mut results.meta, crate_name, &metadata); + } + + for capture in size_benches_re.captures_iter(&log) { + let crate_name = &capture[2]; + + let dataset = results.datasets.entry(capture[1].to_string()).or_insert(Dataset::default()); + let crate_ = dataset.crates.entry(crate_name.to_string()).or_insert(Crate::default()); + let bench = crate_.benches.entry(capture[3].to_string()).or_insert(Bench::bytes()); + let values = bench.unwrap_bytes(); + values.primary = Some(capture[4].parse().unwrap()); + + register_crate_version(&mut results.meta, crate_name, &metadata); + } + + fs::write(args.output, serde_json::to_string(&results).unwrap()).unwrap(); +} + +fn register_crate_version(meta: &mut Meta, crate_name: &str, metadata: &Metadata) { + if !meta.crate_versions.contains_key(crate_name) { + let version = metadata.packages.iter().find(|pkg| pkg.name == crate_name).unwrap().version.clone(); + meta.crate_versions.insert( + crate_name.to_string(), + format!("{version}"), + ); + } +} diff --git a/tools/schema/Cargo.toml b/tools/schema/Cargo.toml new file mode 100644 index 0000000..03c9681 --- /dev/null +++ b/tools/schema/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "schema" +version.workspace = true +authors.workspace = true +edition.workspace = true +license.workspace = true +publish.workspace = true + +[dependencies] +serde.workspace = true diff --git a/tools/schema/src/lib.rs b/tools/schema/src/lib.rs new file mode 100644 index 0000000..5897728 --- /dev/null +++ b/tools/schema/src/lib.rs @@ -0,0 +1,65 @@ +use std::collections::{HashMap, BTreeMap}; +use serde::{Deserialize, Serialize}; + +#[derive(Default, Deserialize, Serialize)] +pub struct Results { + pub datasets: BTreeMap, + pub meta: Meta, +} + +#[derive(Default, Deserialize, Serialize)] +pub struct Meta { + pub crate_versions: BTreeMap, +} + +#[derive(Default, Deserialize, Serialize)] +pub struct Dataset { + pub crates: BTreeMap, +} + +#[derive(Default, Deserialize, Serialize)] +pub struct Crate { + pub benches: HashMap, +} + +#[derive(Deserialize, Serialize)] +pub enum Bench { + Nanos(Values), + Bytes(Values), +} + +impl Bench { + pub fn nanos() -> Bench { + Bench::Nanos(Values::default()) + } + + pub fn bytes() -> Bench { + Bench::Bytes(Values::default()) + } + + pub fn unwrap_nanos(&mut self) -> &mut Values { + match self { + Bench::Nanos(b) => b, + _ => panic!("expected nanos bench"), + } + } + + pub fn unwrap_bytes(&mut self) -> &mut Values { + match self { + Bench::Bytes(b) => b, + _ => panic!("expected nanos bench"), + } + } +} + +#[derive(Default, Deserialize, Serialize)] +pub struct Values { + pub primary: Option, + pub variants: BTreeMap, +} + +impl Values { + pub fn iter(&self) -> impl Iterator { + self.variants.values().chain(self.primary.as_ref()) + } +} diff --git a/update_benchmark.mjs b/update_benchmark.mjs deleted file mode 100644 index cfad0af..0000000 --- a/update_benchmark.mjs +++ /dev/null @@ -1,265 +0,0 @@ -import child_process from 'child_process'; -import fs from 'fs'; - -function formatTemplate(str, args) { - for (const arg in args) { - str = str.replace(`\${${arg}}`, args[arg]) - } - return str -} - -function main() { - const now = new Date() - const date = `${now.getFullYear()}-${now.getMonth() + 1}-${now.getDate()}` - - const metadata = JSON.parse(child_process.execSync(`cargo metadata`, { - maxBuffer: 4 * 1024 * 1024, - }).toString('utf-8')) - - const results = child_process.execSync(`cargo bench`).toString('utf-8') - console.log(results) - const resultsPath = `benchmark_results/${date}.txt` - fs.writeFileSync(resultsPath, results) - - const template = fs.readFileSync('README.md.template', 'utf-8') - const DO_NOT_EDIT = '\n\n\n' - fs.writeFileSync( - 'README.md', - DO_NOT_EDIT + formatTemplate(template, { date, results: format(results, metadata) }) - ) -} - -function parseUnit(unit) { - switch (unit) { - case 'ns': - return 1; - case 'µs': - return 1000; - case 'ms': - return 1000000; - } -} - -function parseTime(time) { - return Number(time.substr(0, time.length - 3)) * parseUnit(time.substr(time.length - 2, 2)) -} - -function getMinValue(dataset, bench) { - let min = null - for (let crate in dataset) { - if (dataset[crate][bench] !== undefined) { - let benchResults = dataset[crate][bench] - if (benchResults.value != null && (min == null || benchResults.value < min)) { - min = benchResults.value - } - - for (let variant in benchResults.variants) { - let variantResults = benchResults.variants[variant] - if (variantResults.value != null && (min == null || variantResults.value < min)) { - min = variantResults.value - } - } - } - } - return min -} - -function buildTables(results, dataset, crates, columns, footnote) { - let header = '| Crate |' - for (let column of columns) { - header += ` ${column[0].toUpperCase() + column.substr(1)} |` - } - header += '\n|---|' - for (let column of columns) { - header += '--:|' - } - - let dataTable = '' - for (let crate in results[dataset]) { - let hadResult = false - let row = `| [${crate} ${crates[crate]}][${crate}] |` - for (let column of columns) { - let output = results[dataset][crate][column] - if (output === undefined) { - row += ` ${footnote} |` - } else { - if (output.display != null) { - row += ` ${output.display}` - hadResult = true - } - for (let variant in output.variants) { - row += ` *${output.variants[variant].display}\\**` - hadResult = true - } - row += ' |' - } - } - if (hadResult) { - if (dataTable != '') { - dataTable += '\n' - } - dataTable += `${row}` - } - } - - let comparisonTable = '' - for (let crate in results[dataset]) { - let hadResult = false - let row = `| [${crate} ${crates[crate]}][${crate}] |` - for (let column of columns) { - let min = getMinValue(results[dataset], column) - let output = results[dataset][crate][column] - if (output === undefined) { - row += ` ${footnote} |` - } else { - if (output.value != null) { - row += ` ${(min / output.value * 100).toFixed(2)}%` - hadResult = true - } - for (let variant in output.variants) { - row += ` *${(min / output.variants[variant].value * 100).toFixed(2)}%\\**` - hadResult = true - } - row += ' |' - } - } - if (hadResult) { - if (comparisonTable != '') { - comparisonTable += '\n' - } - comparisonTable += `${row}` - } - } - - return { - header, - data: dataTable, - comparison: comparisonTable - } -} - -const DATASET_DESCRIPTIONS = { - log: 'This data set is composed of HTTP request logs that are small and contain many strings.', - mesh: 'This data set is a single mesh. The mesh contains an array of triangles, each of which has three vertices and a normal vector.', - mk48: 'This data set is composed of mk48.io game updates that contain data with many exploitable patterns and invariants.', - minecraft_savedata: 'This data set is composed of Minecraft player saves that contain highly structured data.' -} - -function format(input, metadata) { - let bench_times_re = /^([a-z0-9_\-]+)\/([a-z_\-]+)\/([a-z\-]+)(?: \(([a-z \-+]*)\))?\W+time: \[\d+\.\d+ [µnm]s (\d+\.\d+ [µnm]s).*$/gm - let bench_sizes_re = /^([a-z0-9_\-]+)\/([a-z_\-]+)\/(size|zlib|zstd) (\d+)$/gm - - let results = {} - let crates = {} - for (let match of input.matchAll(bench_times_re)) { - let dataset = match[1] - let crate = match[2] - let bench = match[3] - let variant = match[4] - let time = match[5] - - if (results[dataset] === undefined) { - results[dataset] = {} - } - - if (results[dataset][crate] === undefined) { - results[dataset][crate] = {} - } - - if (results[dataset][crate][bench] === undefined) { - results[dataset][crate][bench] = { - display: null, - value: null, - variants: {} - } - } - - let benchResults = results[dataset][crate][bench] - - if (variant == null) { - benchResults.display = time - benchResults.value = parseTime(time) - } else { - benchResults.variants[variant] = { - display: time, - value: parseTime(time) - } - } - - if (!(crate in crates)) { - crates[crate] = get_crate_version(crate, metadata) - } - } - - for (let match of input.matchAll(bench_sizes_re)) { - let dataset = match[1] - let crate = match[2] - let bench = match[3] - let size = match[4] - - results[dataset][crate][bench] = { - display: Number(size), - value: Number(size) - } - } - - let output = '' - for (let dataset in results) { - let serdeTables = buildTables(results, dataset, crates, ['serialize', 'deserialize', 'size', 'zlib', 'zstd'], '†') - let zcdTables = buildTables(results, dataset, crates, ['access', 'read', 'update'], '‡') - - output += `\ -## \`${dataset}\` - -${DATASET_DESCRIPTIONS[dataset] || 'Missing dataset description'} - -### Raw Data - -For operations, time per iteration; for size, bytes. Lower is better. - -#### Serialize / deserialize speed and size - -${serdeTables.header} -${serdeTables.data} - -#### Zero-copy deserialization speed - -${zcdTables.header} -${zcdTables.data} - -### Comparison - -Relative to best. Higher is better. - -#### Serialize / deserialize speed and size - -${serdeTables.header} -${serdeTables.comparison} - -#### Zero-copy deserialization speed - -${zcdTables.header} -${zcdTables.comparison} - -` - } - - for (let crate in crates) { - output += `[${crate}]: https://crates.io/crates/${crate}/${crates[crate]}\n` - } - output += `\n\n` - - return output -} - -function get_crate_version(crate, metadata) { - for (const pkg in metadata.packages) { - let pkg_data = metadata.packages[pkg] - if (pkg_data.name == crate) { - return pkg_data.version - } - } - throw new Error(`Failed to find a crate version for ${crate}`) -} - -main()