From f61c3d4e4d3925192445b607b46a1e0510a042eb Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Wed, 4 Sep 2024 09:18:59 +0200 Subject: [PATCH] Add benchmark for `chunks` parsing (#30) --- .gitattributes | 1 - core/benches/pyreport.rs | 112 +++++++++++++++++++++++++++-------- core/fixtures/.gitattributes | 1 + 3 files changed, 89 insertions(+), 25 deletions(-) delete mode 100644 .gitattributes create mode 100644 core/fixtures/.gitattributes diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index d962435..0000000 --- a/.gitattributes +++ /dev/null @@ -1 +0,0 @@ -fixtures/**/large/* filter=lfs diff=lfs merge=lfs -text diff --git a/core/benches/pyreport.rs b/core/benches/pyreport.rs index b5ae90e..86e6623 100644 --- a/core/benches/pyreport.rs +++ b/core/benches/pyreport.rs @@ -1,45 +1,109 @@ -use codecov_rs::{parsers::pyreport::report_json, report::test::TestReportBuilder}; +use std::collections::HashMap; + +use codecov_rs::{ + parsers::pyreport::{chunks, report_json}, + report::test::{TestReport, TestReportBuilder}, +}; use divan::Bencher; +use winnow::Parser as _; + +// #[global_allocator] +// static ALLOC: divan::AllocProfiler = divan::AllocProfiler::system(); fn main() { divan::main(); } #[divan::bench] -fn simple_report() { - let reports = &[ - &br#"{"files": {"src/report.rs": [0, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}}}"#[..], - &br#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#[..], - &br#"{"files": {}, "sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#[..], - &br#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}, "sessions": {}}"#[..], - &br#"{"files": {}, "sessions": {}}"#[..], +fn simple_report_json() { + let reports: &[&[u8]] = &[ + br#"{"files": {"src/report.rs": [0, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}}}"#, + br#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#, + br#"{"files": {}, "sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#, + br#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}, "sessions": {}}"#, + br#"{"files": {}, "sessions": {}}"#, ]; for input in reports { - run_parsing(input); + parse_report_json(input); } } -// parsing this is quite slow #[divan::bench] -fn complex_report(bencher: Bencher) { +fn complex_report_json(bencher: Bencher) { // this is a ~11M `report_json` - let path = - "./fixtures/pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-report_json.json"; - let Ok(report) = std::fs::read(path) else { - println!("Failed to read test report"); - return; - }; + let report = load_fixture( + "pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-report_json.json", + ); + + bencher.bench(|| parse_report_json(&report)); +} + +fn parse_report_json(input: &[u8]) -> report_json::ParsedReportJson { + let mut report_builder = TestReportBuilder::default(); + report_json::parse_report_json(input, &mut report_builder).unwrap() +} + +#[divan::bench] +fn simple_chunks() { + let chunks = &[ + // Header and one chunk with an empty line + "{}\n<<<<< end_of_header >>>>>\n{}\n", + // No header, one chunk with a populated line and an empty line + "{}\n[1, null, [[0, 1]]]\n", + // No header, two chunks, the second having just one empty line + "{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n", + // Header, two chunks, the second having multiple data lines and an empty line + "{}\n<<<<< end_of_header >>>>>\n{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n[1, null, [[0, 1]]]\n[1, null, [[0, 1]]]\n", + ]; + + let files = HashMap::from([(0, 0), (1, 1), (2, 2)]); + let sessions = HashMap::from([(0, 0), (1, 1), (2, 2)]); - if report.starts_with(b"version https://git-lfs.github.com/spec/v1\n") { - println!("Sample report has not been pulled from Git LFS"); - return; + for input in chunks { + parse_chunks_file(input, files.clone(), sessions.clone()) } +} + +// just 1 iteration, as this is currently ~4 seconds on my machine +#[divan::bench(sample_count = 1)] +fn complex_chunks(bencher: Bencher) { + // this is a ~96M `chunks` file + let chunks = + load_fixture("pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-chunks.txt"); + let chunks = std::str::from_utf8(&chunks).unwrap(); + + // parsing the chunks depends on having loaded the `report_json` + let report = load_fixture( + "pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-report_json.json", + ); + let report_json::ParsedReportJson { files, sessions } = parse_report_json(&report); - bencher.bench(|| run_parsing(&report)); + bencher.bench(|| parse_chunks_file(chunks, files.clone(), sessions.clone())); } -fn run_parsing(input: &[u8]) { - let mut report_builder = TestReportBuilder::default(); - report_json::parse_report_json(input, &mut report_builder).unwrap(); +fn parse_chunks_file(input: &str, files: HashMap, sessions: HashMap) { + let report_builder = TestReportBuilder::default(); + + let chunks_ctx = chunks::ParseCtx::new(report_builder, files, sessions); + let mut chunks_stream = chunks::ReportOutputStream::<&str, TestReport, TestReportBuilder> { + input, + state: chunks_ctx, + }; + + chunks::parse_chunks_file + .parse_next(&mut chunks_stream) + .unwrap(); +} + +#[track_caller] +fn load_fixture(path: &str) -> Vec { + let path = format!("./fixtures/{path}"); + let contents = std::fs::read(path).unwrap(); + + if contents.starts_with(b"version https://git-lfs.github.com/spec/v1") { + panic!("Fixture has not been pulled from Git LFS"); + } + + contents } diff --git a/core/fixtures/.gitattributes b/core/fixtures/.gitattributes new file mode 100644 index 0000000..fa4a173 --- /dev/null +++ b/core/fixtures/.gitattributes @@ -0,0 +1 @@ +**/large/* filter=lfs diff=lfs merge=lfs -text