Skip to content

Commit

Permalink
[no-bench] Add interactive website via Github Pages (#46)
Browse files Browse the repository at this point in the history
* Add github pages.

* Fix dist path.

* Remove dist path.

* Comment out benchmarks for now to speed up debugging.

* Use dist_prod instead of dist since dist is in .gitignore.

* [gh-actions] Update benchmarks

* Revert dist path to ../docs.

* Change step amount based on current value.

* [gh-actions] Update benchmarks

* Add deserialization and round trip benchmarks.

* [gh-actions] Update benchmarks

* Re-enable benchmarks.

* [gh-actions] Update benchmarks

* Add README.md to document interactive site.

* Fix compression/decompression time depending on compressed size instead of uncompressed size.

* Use json file instead of README.

* Use latest json file.

* Use stylist 0.12.1 since it works.

* Exclude crates without deserialize from deserialize/round trip.

* Use peaceiris/actions-gh-pages@v3 and add [no-bench].

* [no-bench] Remove newline from diff with master.

* Add pages package to workspace

Also clean up workspace dependencies and add optimization flags for wasm

* [gh-actions] Update benchmarks

* [no-bench] Put release profile in config.toml to avoid issues with workspace.

* [no-bench] Delete extra benchmarks generated during pr.

* [no-bench] Revert unintentional changes to generated files.

* [no-bench] Revert README.md changes resulting from extra benchmarks generated during pr.

* [no-bench] Use workspace dependency for web-sys.

* [no-bench] Add link to interactive site and its documentation to README.

---------

Co-authored-by: github-actions <github-actions@github.com>
Co-authored-by: David Koloski <djkoloski@gmail.com>
  • Loading branch information
3 people authored Jul 14, 2023
1 parent b961ca2 commit b0ae613
Show file tree
Hide file tree
Showing 20 changed files with 845 additions and 8 deletions.
21 changes: 20 additions & 1 deletion .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
with:
fetch-depth: 1
submodules: true

- uses: actions-rs/toolchain@v1
with:
toolchain: nightly
Expand All @@ -34,7 +34,16 @@ jobs:

- uses: Swatinem/rust-cache@v2

- name: Add wasm32 target
run: rustup target add wasm32-unknown-unknown

- name: Install Trunk
uses: baptiste0928/cargo-install@v2
with:
crate: trunk

- name: bench
if: "!contains(github.event.head_commit.message, '[no-bench]')"
shell: bash
run: |
Expand Down Expand Up @@ -64,3 +73,13 @@ jobs:
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
branch: ${{ github.ref }}

- name: Build Pages
shell: bash
run: cd pages && trunk --config Trunk.prod.toml build --release --filehash=false && cd ..

- name: Deploy Pages
uses: peaceiris/actions-gh-pages@v3
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./pages/dist
11 changes: 9 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[workspace]
members = [
"pages",
"tools/bencher",
"tools/formatter",
"tools/parser",
Expand All @@ -15,13 +16,19 @@ publish = false

[workspace.dependencies]
cargo_metadata = "0.15"
clap = { version = "4", features = ["derive"] }
clap = "4"
enum-iterator = "0.8"
fixed-map = { version = "0.9", default-features = false }
regex = "1.9"
schema = { path = "tools/schema" }
serde = { version = "1.0", features = ["derive"] }
serde = "1.0"
serde_json = "1.0"
stylist = { version = "0.12", default-features = false }
tempfile = "3.6"
time = "0.3"
wasm-bindgen = "0.2"
web-sys = "0.3.60"
yew = { version = "0.20", default-features = false }

[package]
name = "rust_serialization_benchmark"
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ serialization frameworks.

These benchmarks are still being developed and pull requests to improve benchmarks are welcome.

## [Interactive site](https://djkoloski.github.io/rust_serialization_benchmark/)

Calculate the number of messages per second that can be sent/received with various rust serialization frameworks and compression libraries.
[Documentation](pages/README.md)

## Format

All tests benchmark the following properties (time or size):
Expand Down
10 changes: 10 additions & 0 deletions pages/.cargo/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[profile.release]
codegen-units = 1
lto = "fat"
opt-level = 'z'
panic = "abort"
strip = "debuginfo"

[unstable]
build-std = ["std", "panic_abort"]
build-std-features = ["panic_immediate_abort"]
5 changes: 5 additions & 0 deletions pages/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Cargo.lock
/target
/dist
.idea
src/latest.json
30 changes: 30 additions & 0 deletions pages/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
[package]
name = "pages"
version.workspace = true
authors.workspace = true
edition.workspace = true
license.workspace = true
publish.workspace = true

[dependencies]
enum-iterator.workspace = true
fixed-map.workspace = true
schema.workspace = true
serde_json.workspace = true
stylist = { workspace = true, features = ["macros", "yew_integration"] }
wasm-bindgen.workspace = true
yew = { workspace = true, features = ["csr"] }

[dependencies.web-sys]
workspace = true
features = [
'HtmlInputElement',
'HtmlSelectElement',
]

[profile.release]
codegen-units = 1
lto = true
opt-level = "z"
panic = "abort"
strip = "debuginfo"
31 changes: 31 additions & 0 deletions pages/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Rust Serialization Benchmark Interactive Site

## Inputs

* Bandwidth: in terabytes per month. 1 TB/Mo is 0.38 megabytes per second or 3.04 megabits per second
* CPU: fraction of CPU benchmarks were run on available for use (if > 1 assumes 0 overhead for parallelization)
* Dataset: (see ../README.md) changes messages/s to e.g. logs/s
* log: logs (benchmark size divided by 10000, equal to individual logs in benchmark)
* mesh: meshes (benchmark size)
* minecraft_savedata: saves (benchmark size divided by 500, equal to individual player saves in benchmark)
* mk48: updates (benchmark size divided by 1000, equal to individual updates in benchmark)
* Mode:
* serialize: Bandwidth usage is size of compressed data, CPU usage is serialization + compression
* deserialize: Bandwidth usage is size of compressed data, CPU usage is decompression + deserialization (crates without deserialize are excluded)
* round trip: Bandwidth/CPU usage is sum of Mode serialize and Mode deserialize (crates without deserialize are excluded)
* zlib: allow using zlib as Compression
* zstd: allow using zstd as Compression

## Outputs

* Crate: which crate is being used for serialization/deserialization
* Compression: which compression algorithm is deemed the best (most messages/s) for that crate
* messages/s: how many messages could theoretically be sent per second based on available Bandwidth/CPU consumed by compressed data/serialization + compression
* Relative: normalized messages/s
* Bottleneck: whether Bandwidth or CPU runs out first (limiting messages/s)

## Assumptions

* zlib/zstd have a constant speed irrelevant of Dataset (hopefully we can fix this)
* 1 message of size 1000 takes the same Bandwidth/CPU as 1000 messages of size 1
* The amount of messages that need to be sent per second is constant (if each day you had all of your messages in a 1-hour interval, your real CPU requirement would be 24x)
9 changes: 9 additions & 0 deletions pages/Trunk.prod.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[build]
# The index HTML file to drive the bundling process.
target = "index.html"

# Build in release mode.
release = true

# The public URL from which assets are to be served.
public_url = "/rust_serialization_benchmark/"
61 changes: 61 additions & 0 deletions pages/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
use std::fs;
use std::path::Path;
use std::str::FromStr;

fn main() {
let path = "../benchmark_results";
println!("cargo:rerun-if-changed={path}");

let (_, from) = fs::read_dir(path)
.unwrap()
.filter_map(|path| {
let path = path.unwrap().path();
let time = parse_path(&path)?;
Some((time, path))
})
.max_by_key(|(time, _)| *time)
.expect("no benchmark results found");

let to = Path::new("src/latest.json");
fs::copy(&from, to).unwrap();
}

#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
struct DateTime {
year: u16,
month: u8,
day: u8,
hour: u8,
minute: u8,
second: u8,
}

fn parse_path(path: &Path) -> Option<DateTime> {
let s = path.file_name()?.to_str()?;

// {year}-{month}-{day}_{hour}-{minute}-{second}.json
let (year, s) = s.split_once('-')?;
let year = u16::from_str(year).ok()?;
let (month, s) = s.split_once('-')?;
let month = u8::from_str(month).ok()?;
let (day, s) = s.split_once('_')?;
let day = u8::from_str(day).ok()?;
let (hour, s) = s.split_once('-')?;
let hour = u8::from_str(hour).ok()?;
let (minute, s) = s.split_once('-')?;
let minute = u8::from_str(minute).ok()?;
let (second, ext) = s.split_once('.')?;
let second = u8::from_str(second).ok()?;
if ext != "json" {
return None;
}

Some(DateTime {
year,
month,
day,
hour,
minute,
second,
})
}
12 changes: 12 additions & 0 deletions pages/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Rust Serialization Benchmark</title>
<meta property="og:type" content="website"/>
<link data-trunk rel="rust" data-wasm-opt="z" data-no-demangle/>
</head>
<body style="background-color: #2c3e50;">

</body>
</html>
114 changes: 114 additions & 0 deletions pages/src/calc.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
use crate::compression::{Compression, CompressionSet};
use crate::mode::Mode;
use crate::row::Row;
use std::collections::HashSet;
use std::fmt::{Display, Formatter};

#[derive(Copy, Clone)]
pub enum Bottleneck {
Bandwidth,
Cpu,
}

impl Display for Bottleneck {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}",
match self {
Self::Bandwidth => "bandwidth",
Self::Cpu => "CPU",
}
)
}
}

pub struct CalcRow {
pub compression: Compression,
pub crate_: String,
pub limit: Bottleneck,
pub messages_per_second: f32,
pub relative: f32,
}

pub fn calc(
rows: Vec<Row>,
messages_per_benchmark: u32,
bandwidth: u64,
cpus: f32,
compression_set: &CompressionSet,
mode: Mode,
) -> Vec<CalcRow> {
let bandwidth_per_second = bandwidth as f32 * (1.0 / (30.4 * 24.0 * 60.0 * 60.0));

let mut rows: Vec<_> = rows
.into_iter()
.filter_map(|r| {
r.deserialize
.or_else(|| (mode == Mode::Serialize).then_some(0.0))
.map(|d| (r, d))
})
.flat_map(|(r, deserialize)| {
let Row {
crate_,
serialize,
sizes,
..
} = r;
let uncompressed_size = *sizes.get(Compression::None).unwrap();

sizes
.into_iter()
.filter(|(c, _)| compression_set.contains(*c))
.map(move |(compression, compressed_size)| {
// TODO this assumes that inbound bandwidth is equivalent to outbound bandwidth which isn't the case for many VPS.
let limit_size = bandwidth_per_second
/ (compressed_size * if mode == Mode::RoundTrip { 2 } else { 1 }) as f32;

let serialize_seconds =
serialize + compression.serialize_seconds(uncompressed_size);
let deserialize_seconds =
deserialize + compression.deserialize_seconds(uncompressed_size);
let limit_speed = cpus
/ match mode {
Mode::Serialize => serialize_seconds,
Mode::Deserialize => deserialize_seconds,
Mode::RoundTrip => serialize_seconds + deserialize_seconds,
};

let (benchmarks_per_second, limit) = if limit_size < limit_speed {
(limit_size, Bottleneck::Bandwidth)
} else {
(limit_speed, Bottleneck::Cpu)
};
CalcRow {
compression,
crate_: crate_.clone(),
limit,
messages_per_second: benchmarks_per_second * messages_per_benchmark as f32,
relative: 0.0,
}
})
})
.collect();

rows.sort_by(|a, b| {
b.messages_per_second
.partial_cmp(&a.messages_per_second)
.unwrap()
});

let max = rows
.iter()
.map(|r| r.messages_per_second)
.fold(0.0, f32::max);
for row in &mut rows {
row.relative = row.messages_per_second / max;
}

// Dedup crates.
let mut seen = HashSet::new();
rows.retain(|r| seen.insert(r.crate_.clone()));

rows
}
Loading

0 comments on commit b0ae613

Please sign in to comment.