From 331ce5a14f3a896212b74cda928a2d4ecdc75554 Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Wed, 31 Jul 2024 14:12:29 +0200 Subject: [PATCH] Parse `report_json` files with serde This adds a bunch of Struct definitions along with deriving `serde::Deserialize`. --- Cargo.lock | 11 ++ Cargo.toml | 2 + benches/pyreport.rs | 400 +++++++++++++++++++++++++++----------------- 3 files changed, 259 insertions(+), 154 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7321875..b96aa8b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -83,7 +83,9 @@ dependencies = [ "rusqlite", "rusqlite_migration", "seahash", + "serde", "serde_json", + "smol_buf", "strum", "strum_macros", "tempfile", @@ -493,6 +495,15 @@ version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +[[package]] +name = "smol_buf" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "766eb91ad12453fe3d07c1a6c4664136b6f234e00fd924afd9e4ddf098c754bd" +dependencies = [ + "serde", +] + [[package]] name = "strum" version = "0.26.3" diff --git a/Cargo.toml b/Cargo.toml index 7684ce9..a2d9f49 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,8 @@ thiserror = "1.0.59" winnow = "0.5.34" serde_json = "1.0.117" +serde = { version = "1.0.204", features = ["derive"] } +smol_buf = { version = "0.1.0", features = ["serde"] } [dev-dependencies] divan = "0.1.14" diff --git a/benches/pyreport.rs b/benches/pyreport.rs index 031e053..d66fbbc 100644 --- a/benches/pyreport.rs +++ b/benches/pyreport.rs @@ -1,198 +1,290 @@ -use codecov_rs::{ - error::Result, - parsers::{common::ReportBuilderCtx, pyreport::report_json}, - report::{models, Report, ReportBuilder}, -}; -use divan::Bencher; -use winnow::Parser as _; +use divan::{AllocProfiler, Bencher}; + +#[global_allocator] +static ALLOC: AllocProfiler = AllocProfiler::system(); fn main() { divan::main(); } -#[divan::bench] -fn simple_report() { - let reports = &[ - r#"{"files": {"src/report.rs": [0, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}}}"#, - r#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#, - r#"{"files": {}, "sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#, - r#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}, "sessions": {}}"#, - r#"{"files": {}, "sessions": {}}"#, - ]; - - for input in reports { - run_parsing(input); - } -} - -// parsing this is quite slow -#[divan::bench(sample_count = 10)] -fn complex_report(bencher: Bencher) { - // this is a ~11M `report_json` - let path = "./reports/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-report_json.json"; - if let Ok(report) = std::fs::read_to_string(path) { - bencher.bench(|| run_parsing(&report)); - } -} +static SIMPLE_REPORTS: &[&str] = &[ + r#"{"files": {"src/report.rs": [0, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}}}"#, + r#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#, + r#"{"files": {}, "sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#, + r#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}, "sessions": {}}"#, + r#"{"files": {}, "sessions": {}}"#, +]; -fn run_parsing(input: &str) { - let report_builder = TestReport::default(); - let mut stream = report_json::ReportOutputStream::<&str, TestReport, TestReport> { - input, - state: ReportBuilderCtx::new(report_builder), - }; - report_json::parse_report_json - .parse_next(&mut stream) - .unwrap(); -} +mod serde { + #![allow(unused)] + use std::collections::HashMap; -#[derive(Debug, Default)] -struct TestReport { - files: Vec, - uploads: Vec, -} + use ::serde::{de::IgnoredAny, Deserialize}; + use serde_json::Value; + use smol_buf::Str24; -impl Report for TestReport { - fn list_files(&self) -> Result> { - todo!() - } + use super::*; - fn list_contexts(&self) -> Result> { - todo!() + #[divan::bench] + fn simple_report() { + for input in SIMPLE_REPORTS { + let _report: Report = serde_json::from_str(input).unwrap(); + } } - fn list_coverage_samples(&self) -> Result> { - todo!() + // parsing this is quite slow + #[divan::bench(sample_count = 10)] + fn complex_report(bencher: Bencher) { + // this is a ~11M `report_json` + let path = "./reports/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-report_json.json"; + if let Ok(report) = std::fs::read(path) { + bencher.bench(|| -> Report { serde_json::from_slice(&report).unwrap() }); + } } - fn list_branches_for_sample( - &self, - _sample: &models::CoverageSample, - ) -> Result> { - todo!() + #[derive(Debug, Deserialize)] + struct Report { + files: HashMap, + sessions: HashMap, } - fn get_method_for_sample( - &self, - _sample: &models::CoverageSample, - ) -> Result> { - todo!() - } + #[derive(Debug, Deserialize)] + // this really is: + // - index in chunks + // - file totals + // - session totals + // - diff totals + struct File(usize, IgnoredAny, IgnoredAny, IgnoredAny); - fn list_spans_for_sample( - &self, - _sample: &models::CoverageSample, - ) -> Result> { - todo!() + #[derive(Debug, Deserialize)] + struct Session { + #[serde(rename = "d")] + timestamp: Option, + #[serde(rename = "a")] + raw_upload_url: Option, + #[serde(rename = "f", default)] + flags: Vec, + #[serde(rename = "c")] + provider: Option, + #[serde(rename = "n")] + build: Option, + #[serde(rename = "N")] + name: Option, + #[serde(rename = "j")] + job_name: Option, + #[serde(rename = "u")] + ci_run_url: Option, + #[serde(rename = "p")] + state: Option, + #[serde(rename = "e")] + env: Option, + #[serde(rename = "st")] + session_type: Option, + #[serde(rename = "se")] + session_extras: Option, } +} - fn list_contexts_for_sample( - &self, - _sample: &models::CoverageSample, - ) -> Result> { - todo!() - } +mod winnow { + use std::collections::HashMap; - fn list_samples_for_file( - &self, - _file: &models::SourceFile, - ) -> Result> { - todo!() - } + use ::winnow::Parser as _; + use codecov_rs::{ + error::Result, + parsers::{common::ReportBuilderCtx, pyreport::report_json}, + report::{models, Report, ReportBuilder}, + }; - fn list_raw_uploads(&self) -> Result> { - todo!() - } + use super::*; - fn merge(&mut self, _other: &Self) -> Result<()> { - todo!() + #[divan::bench] + fn simple_report() { + for input in SIMPLE_REPORTS { + run_parsing(input); + } } - fn totals(&self) -> Result { - todo!() + // parsing this is quite slow + #[divan::bench(sample_count = 10)] + fn complex_report(bencher: Bencher) { + // this is a ~11M `report_json` + let path = "./reports/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-report_json.json"; + if let Ok(report) = std::fs::read_to_string(path) { + bencher.bench(|| run_parsing(&report)); + } } -} -impl ReportBuilder for TestReport { - fn insert_file(&mut self, path: String) -> Result { - let file = models::SourceFile { - id: seahash::hash(path.as_bytes()) as i64, - path, + fn run_parsing(input: &str) -> (HashMap, HashMap) { + let report_builder = TestReport::default(); + let mut stream = report_json::ReportOutputStream::<&str, TestReport, TestReport> { + input, + state: ReportBuilderCtx::new(report_builder), }; - self.files.push(file.clone()); - Ok(file) + report_json::parse_report_json + .parse_next(&mut stream) + .unwrap() } - fn insert_raw_upload( - &mut self, - mut upload_details: models::RawUpload, - ) -> Result { - upload_details.id = self.uploads.len() as i64; - self.uploads.push(upload_details.clone()); - Ok(upload_details) + #[derive(Debug, Default)] + struct TestReport { + num_files: i64, + num_uploads: i64, } - fn insert_context( - &mut self, - _context_type: models::ContextType, - _name: &str, - ) -> Result { - todo!() - } + impl Report for TestReport { + fn list_files(&self) -> Result> { + todo!() + } - fn insert_coverage_sample( - &mut self, - _sample: models::CoverageSample, - ) -> Result { - todo!() - } + fn list_contexts(&self) -> Result> { + todo!() + } - fn multi_insert_coverage_sample( - &mut self, - _samples: Vec<&mut models::CoverageSample>, - ) -> Result<()> { - todo!() - } + fn list_coverage_samples(&self) -> Result> { + todo!() + } - fn insert_branches_data( - &mut self, - _branch: models::BranchesData, - ) -> Result { - todo!() - } + fn list_branches_for_sample( + &self, + _sample: &models::CoverageSample, + ) -> Result> { + todo!() + } - fn multi_insert_branches_data( - &mut self, - _branches: Vec<&mut models::BranchesData>, - ) -> Result<()> { - todo!() - } + fn get_method_for_sample( + &self, + _sample: &models::CoverageSample, + ) -> Result> { + todo!() + } - fn insert_method_data(&mut self, _method: models::MethodData) -> Result { - todo!() - } + fn list_spans_for_sample( + &self, + _sample: &models::CoverageSample, + ) -> Result> { + todo!() + } - fn multi_insert_method_data(&mut self, _methods: Vec<&mut models::MethodData>) -> Result<()> { - todo!() - } + fn list_contexts_for_sample( + &self, + _sample: &models::CoverageSample, + ) -> Result> { + todo!() + } - fn insert_span_data(&mut self, _span: models::SpanData) -> Result { - todo!() - } + fn list_samples_for_file( + &self, + _file: &models::SourceFile, + ) -> Result> { + todo!() + } - fn multi_insert_span_data(&mut self, _spans: Vec<&mut models::SpanData>) -> Result<()> { - todo!() - } + fn list_raw_uploads(&self) -> Result> { + todo!() + } - fn associate_context(&mut self, _assoc: models::ContextAssoc) -> Result { - todo!() - } + fn merge(&mut self, _other: &Self) -> Result<()> { + todo!() + } - fn multi_associate_context(&mut self, _assocs: Vec<&mut models::ContextAssoc>) -> Result<()> { - todo!() + fn totals(&self) -> Result { + todo!() + } } - fn build(self) -> Result { - Ok(self) + impl ReportBuilder for TestReport { + fn insert_file(&mut self, path: String) -> Result { + let file = models::SourceFile { + id: self.num_files, + path, + }; + self.num_files += 1; + Ok(file) + } + + fn insert_raw_upload( + &mut self, + mut upload_details: models::RawUpload, + ) -> Result { + upload_details.id = self.num_uploads; + self.num_uploads += 1; + Ok(upload_details) + } + + fn insert_context( + &mut self, + _context_type: models::ContextType, + _name: &str, + ) -> Result { + todo!() + } + + fn insert_coverage_sample( + &mut self, + _sample: models::CoverageSample, + ) -> Result { + todo!() + } + + fn multi_insert_coverage_sample( + &mut self, + _samples: Vec<&mut models::CoverageSample>, + ) -> Result<()> { + todo!() + } + + fn insert_branches_data( + &mut self, + _branch: models::BranchesData, + ) -> Result { + todo!() + } + + fn multi_insert_branches_data( + &mut self, + _branches: Vec<&mut models::BranchesData>, + ) -> Result<()> { + todo!() + } + + fn insert_method_data( + &mut self, + _method: models::MethodData, + ) -> Result { + todo!() + } + + fn multi_insert_method_data( + &mut self, + _methods: Vec<&mut models::MethodData>, + ) -> Result<()> { + todo!() + } + + fn insert_span_data(&mut self, _span: models::SpanData) -> Result { + todo!() + } + + fn multi_insert_span_data(&mut self, _spans: Vec<&mut models::SpanData>) -> Result<()> { + todo!() + } + + fn associate_context( + &mut self, + _assoc: models::ContextAssoc, + ) -> Result { + todo!() + } + + fn multi_associate_context( + &mut self, + _assocs: Vec<&mut models::ContextAssoc>, + ) -> Result<()> { + todo!() + } + + fn build(self) -> Result { + Ok(self) + } } }