Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Start parsing the chunks file with serde #31

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ testing = []

[dependencies]
include_dir = "0.7.3"
memchr = "2.7.4"
memmap2 = "0.9.4"
rand = "0.8.5"
rusqlite = { version = "0.31.0", features = ["bundled", "limits", "serde_json"] }
Expand Down
60 changes: 58 additions & 2 deletions core/benches/pyreport.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::collections::HashMap;

use codecov_rs::{
parsers::pyreport::{chunks, report_json},
parsers::pyreport::{chunks, chunks_serde, report_json},
report::test::{TestReport, TestReportBuilder},
};
use divan::Bencher;
Expand Down Expand Up @@ -53,7 +53,7 @@ fn simple_chunks() {
let chunks = &[
// Header and one chunk with an empty line
"{}\n<<<<< end_of_header >>>>>\n{}\n",
// No header, one chunk with a populated line and an empty line
// No header, one chunk with a populated line and an empty line
"{}\n[1, null, [[0, 1]]]\n",
// No header, two chunks, the second having just one empty line
"{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n",
Expand Down Expand Up @@ -106,3 +106,59 @@ fn parse_chunks_file(input: &str, files: HashMap<usize, i64>, sessions: HashMap<
.parse_next(&mut chunks_stream)
.unwrap();
}

#[divan::bench]
fn simple_chunks_serde() {
let chunks: &[&[u8]] = &[
// Header and one chunk with an empty line
b"{}\n<<<<< end_of_header >>>>>\n{}\n",
// No header, one chunk with a populated line and an empty line
b"{}\n[1, null, [[0, 1]]]\n",
// No header, two chunks, the second having just one empty line
b"{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n",
// Header, two chunks, the second having multiple data lines and an empty line
b"{}\n<<<<< end_of_header >>>>>\n{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n[1, null, [[0, 1]]]\n[1, null, [[0, 1]]]\n",
];

let report_json = report_json::ParsedReportJson {
files: Default::default(),
sessions: Default::default(),
};

for input in chunks {
parse_chunks_file_serde(input, &report_json);
}
}

// this is currently <300 ms on my machine
#[divan::bench(sample_count = 10)]
fn complex_chunks_serde(bencher: Bencher) {
// this is a ~96M `chunks` file
let chunks =
load_fixture("pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-chunks.txt");

// parsing the chunks depends on having loaded the `report_json`
let report = load_fixture(
"pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-report_json.json",
);
let report_json = parse_report_json(&report);

bencher.bench(|| parse_chunks_file_serde(&chunks, &report_json));
}

fn parse_chunks_file_serde(input: &[u8], report_json: &report_json::ParsedReportJson) {
let mut report_builder = TestReportBuilder::default();
chunks_serde::parse_chunks_file(input, report_json, &mut report_builder).unwrap();
}

#[track_caller]
fn load_fixture(path: &str) -> Vec<u8> {
let path = format!("./fixtures/{path}");
let contents = std::fs::read(path).unwrap();

if contents.starts_with(b"version https://git-lfs.github.com/spec/v1") {
panic!("Fixture has not been pulled from Git LFS");
}

contents
}
5 changes: 5 additions & 0 deletions core/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use thiserror::Error;

use crate::parsers::pyreport::chunks_serde::ChunksFileParseError;

pub type Result<T, E = CodecovError> = std::result::Result<T, E>;

#[derive(Error, Debug)]
Expand All @@ -26,4 +28,7 @@ pub enum CodecovError {
#[cfg(feature = "pyreport")]
#[error("failed to convert sqlite to pyreport: '{0}'")]
PyreportConversionError(String),

#[error(transparent)]
ChunksFileParseError(#[from] ChunksFileParseError),
}
Loading
Loading