From 5f3091af9ceef4b5f7310bd862556f1b6706f96c Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Fri, 30 Aug 2024 11:11:24 +0200 Subject: [PATCH] Parse `report_json` files with serde (#18) Replaces the hand-written `winnow`-based parser with a bunch of struct definitions along with deriving `serde::Deserialize`. --- Cargo.lock | 54 +- Cargo.toml | 1 - core/Cargo.toml | 3 + core/benches/pyreport.rs | 179 +--- core/src/error.rs | 3 + core/src/parsers/pyreport/mod.rs | 17 +- core/src/parsers/pyreport/report_json.rs | 1190 +++++++--------------- core/src/report/mod.rs | 2 +- core/tests/test_pyreport_shim.rs | 30 +- 9 files changed, 454 insertions(+), 1025 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3742f4b..23de815 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -40,9 +40,12 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "cc" -version = "1.1.8" +version = "1.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "504bdec147f2cc13c8b57ed9401fd8a147cc66b67ad5cb241394244f2c947549" +checksum = "57b6a275aa2903740dc87da01c62040406b8812552e97129a63ea8850a17c6e6" +dependencies = [ + "shlex", +] [[package]] name = "cfg-if" @@ -52,18 +55,18 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "4.5.13" +version = "4.5.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc" +checksum = "ed6719fffa43d0d87e5fd8caeab59be1554fb028cd30edc88fc4369b17971019" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.13" +version = "4.5.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99" +checksum = "216aec2b177652e3846684cbfe25c9964d18ec45234f0f5da5157b207ed1aab6" dependencies = [ "anstyle", "clap_lex", @@ -87,6 +90,7 @@ dependencies = [ "rusqlite", "rusqlite_migration", "seahash", + "serde", "serde_json", "tempfile", "thiserror", @@ -156,9 +160,9 @@ checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" [[package]] name = "fastrand" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" +checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" [[package]] name = "getrandom" @@ -228,9 +232,9 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "libc" -version = "0.2.155" +version = "0.2.158" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" [[package]] name = "libsqlite3-sys" @@ -380,9 +384,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.36" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] @@ -450,9 +454,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.34" +version = "0.38.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +checksum = "a85d50532239da68e9addb745ba38ff4612a242c1c7ceea689c4bc7c2f43c36f" dependencies = [ "bitflags", "errno", @@ -475,18 +479,18 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" [[package]] name = "serde" -version = "1.0.205" +version = "1.0.209" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e33aedb1a7135da52b7c21791455563facbbcc43d0f0f66165b42c21b3dfb150" +checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.205" +version = "1.0.209" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "692d6f5ac90220161d6774db30c662202721e64aed9058d2c394f451261420c1" +checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170" dependencies = [ "proc-macro2", "quote", @@ -495,9 +499,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.122" +version = "1.0.127" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da" +checksum = "8043c06d9f82bd7271361ed64f415fe5e12a77fdb52e573e7f06a516dea329ad" dependencies = [ "itoa", "memchr", @@ -505,6 +509,12 @@ dependencies = [ "serde", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "smallvec" version = "1.13.2" @@ -513,9 +523,9 @@ checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "syn" -version = "2.0.72" +version = "2.0.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" +checksum = "578e081a14e0cefc3279b0472138c513f37b41a08d5a3cca9b6e4e8ceb6cd525" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index b29b84d..3c9f71c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,5 +5,4 @@ members = ["bindings", "core"] default-members = ["core"] [profile.release] - debug = 1 diff --git a/core/Cargo.toml b/core/Cargo.toml index bbbce34..f47fb6b 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" [features] default = ["pyreport"] pyreport = [] +testing = [] [dependencies] include_dir = "0.7.3" @@ -14,6 +15,7 @@ rand = "0.8.5" rusqlite = { version = "0.31.0", features = ["bundled", "limits"] } rusqlite_migration = { version = "1.2.0", features = ["from-directory"] } seahash = "4.1.0" +serde = { version = "1.0.204", features = ["derive"] } serde_json = "1.0.117" thiserror = "1.0.59" winnow = "0.5.34" @@ -25,3 +27,4 @@ tempfile = "3.9.0" [[bench]] name = "pyreport" harness = false +required-features = ["testing"] diff --git a/core/benches/pyreport.rs b/core/benches/pyreport.rs index c0687e6..b5ae90e 100644 --- a/core/benches/pyreport.rs +++ b/core/benches/pyreport.rs @@ -1,10 +1,5 @@ -use codecov_rs::{ - error::Result, - parsers::{common::ReportBuilderCtx, pyreport::report_json}, - report::{models, Report, ReportBuilder}, -}; +use codecov_rs::{parsers::pyreport::report_json, report::test::TestReportBuilder}; use divan::Bencher; -use winnow::Parser as _; fn main() { divan::main(); @@ -13,11 +8,11 @@ fn main() { #[divan::bench] fn simple_report() { let reports = &[ - r#"{"files": {"src/report.rs": [0, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}}}"#, - r#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#, - r#"{"files": {}, "sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#, - r#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}, "sessions": {}}"#, - r#"{"files": {}, "sessions": {}}"#, + &br#"{"files": {"src/report.rs": [0, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}}}"#[..], + &br#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#[..], + &br#"{"files": {}, "sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#[..], + &br#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}, "sessions": {}}"#[..], + &br#"{"files": {}, "sessions": {}}"#[..], ]; for input in reports { @@ -26,17 +21,17 @@ fn simple_report() { } // parsing this is quite slow -#[divan::bench(sample_count = 10)] +#[divan::bench] fn complex_report(bencher: Bencher) { // this is a ~11M `report_json` let path = "./fixtures/pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-report_json.json"; - let Ok(report) = std::fs::read_to_string(path) else { + let Ok(report) = std::fs::read(path) else { println!("Failed to read test report"); return; }; - if report.starts_with("version https://git-lfs.github.com/spec/v1\n") { + if report.starts_with(b"version https://git-lfs.github.com/spec/v1\n") { println!("Sample report has not been pulled from Git LFS"); return; } @@ -44,157 +39,7 @@ fn complex_report(bencher: Bencher) { bencher.bench(|| run_parsing(&report)); } -fn run_parsing(input: &str) { - let report_builder = TestReport::default(); - let mut stream = report_json::ReportOutputStream::<&str, TestReport, TestReport> { - input, - state: ReportBuilderCtx::new(report_builder), - }; - report_json::parse_report_json - .parse_next(&mut stream) - .unwrap(); -} - -#[derive(Debug, Default)] -struct TestReport { - files: Vec, - uploads: Vec, -} - -impl Report for TestReport { - fn list_files(&self) -> Result> { - todo!() - } - - fn list_contexts(&self) -> Result> { - todo!() - } - - fn list_coverage_samples(&self) -> Result> { - todo!() - } - - fn list_branches_for_sample( - &self, - _sample: &models::CoverageSample, - ) -> Result> { - todo!() - } - - fn get_method_for_sample( - &self, - _sample: &models::CoverageSample, - ) -> Result> { - todo!() - } - - fn list_spans_for_sample( - &self, - _sample: &models::CoverageSample, - ) -> Result> { - todo!() - } - - fn list_contexts_for_sample( - &self, - _sample: &models::CoverageSample, - ) -> Result> { - todo!() - } - - fn list_samples_for_file( - &self, - _file: &models::SourceFile, - ) -> Result> { - todo!() - } - - fn list_raw_uploads(&self) -> Result> { - todo!() - } - - fn merge(&mut self, _other: &Self) -> Result<()> { - todo!() - } - - fn totals(&self) -> Result { - todo!() - } -} - -impl ReportBuilder for TestReport { - fn insert_file(&mut self, path: &str) -> Result { - let file = models::SourceFile::new(path); - self.files.push(file.clone()); - Ok(file) - } - - fn insert_raw_upload( - &mut self, - mut upload_details: models::RawUpload, - ) -> Result { - upload_details.id = self.uploads.len() as i64; - self.uploads.push(upload_details.clone()); - Ok(upload_details) - } - - fn insert_context(&mut self, _name: &str) -> Result { - todo!() - } - - fn insert_coverage_sample( - &mut self, - _sample: models::CoverageSample, - ) -> Result { - todo!() - } - - fn multi_insert_coverage_sample( - &mut self, - _samples: Vec<&mut models::CoverageSample>, - ) -> Result<()> { - todo!() - } - - fn insert_branches_data( - &mut self, - _branch: models::BranchesData, - ) -> Result { - todo!() - } - - fn multi_insert_branches_data( - &mut self, - _branches: Vec<&mut models::BranchesData>, - ) -> Result<()> { - todo!() - } - - fn insert_method_data(&mut self, _method: models::MethodData) -> Result { - todo!() - } - - fn multi_insert_method_data(&mut self, _methods: Vec<&mut models::MethodData>) -> Result<()> { - todo!() - } - - fn insert_span_data(&mut self, _span: models::SpanData) -> Result { - todo!() - } - - fn multi_insert_span_data(&mut self, _spans: Vec<&mut models::SpanData>) -> Result<()> { - todo!() - } - - fn associate_context(&mut self, _assoc: models::ContextAssoc) -> Result { - todo!() - } - - fn multi_associate_context(&mut self, _assocs: Vec<&mut models::ContextAssoc>) -> Result<()> { - todo!() - } - - fn build(self) -> Result { - Ok(self) - } +fn run_parsing(input: &[u8]) { + let mut report_builder = TestReportBuilder::default(); + report_json::parse_report_json(input, &mut report_builder).unwrap(); } diff --git a/core/src/error.rs b/core/src/error.rs index 5a551e4..5f793bb 100644 --- a/core/src/error.rs +++ b/core/src/error.rs @@ -17,6 +17,9 @@ pub enum CodecovError { #[error("parser error: '{0}'")] ParserError(winnow::error::ContextError), + #[error("parser error: '{0}'")] + Json(#[from] serde_json::Error), + #[error("io error: '{0}'")] IOError(#[from] std::io::Error), diff --git a/core/src/parsers/pyreport/mod.rs b/core/src/parsers/pyreport/mod.rs index bf015b3..fb3a9a0 100644 --- a/core/src/parsers/pyreport/mod.rs +++ b/core/src/parsers/pyreport/mod.rs @@ -3,7 +3,6 @@ use std::{fs::File, path::PathBuf}; use memmap2::Mmap; use winnow::Parser; -use super::common::ReportBuilderCtx; use crate::{ error::{CodecovError, Result}, report::{ReportBuilder, SqliteReport, SqliteReportBuilder, SqliteReportBuilderTx}, @@ -72,27 +71,19 @@ fn parse_pyreport_with_builder( // and prevent us from consuming `report_builder` to actually build a // `SqliteReport`. { - let report_builder_tx = report_builder.transaction()?; + let mut report_builder_tx = report_builder.transaction()?; // Memory-map the input file so we don't have to read the whole thing into RAM let mmap_handle = unsafe { Mmap::map(report_json_file)? }; - let buf = unsafe { std::str::from_utf8_unchecked(&mmap_handle[..]) }; - let mut stream = - report_json::ReportOutputStream::<&str, SqliteReport, SqliteReportBuilderTx> { - input: buf, - state: ReportBuilderCtx::new(report_builder_tx), - }; - let (files, sessions) = report_json::parse_report_json - .parse_next(&mut stream) - .map_err(|e| e.into_inner().unwrap_or_default()) - .map_err(CodecovError::ParserError)?; + let report_json::ParsedReportJson { files, sessions } = + report_json::parse_report_json(&mmap_handle, &mut report_builder_tx)?; // Replace our mmap handle so the first one can be unmapped let mmap_handle = unsafe { Mmap::map(chunks_file)? }; let buf = unsafe { std::str::from_utf8_unchecked(&mmap_handle[..]) }; // Move `report_builder` from the report JSON's parse context to this one - let chunks_ctx = chunks::ParseCtx::new(stream.state.report_builder, files, sessions); + let chunks_ctx = chunks::ParseCtx::new(report_builder_tx, files, sessions); let mut chunks_stream = chunks::ReportOutputStream::<&str, SqliteReport, SqliteReportBuilderTx> { input: buf, diff --git a/core/src/parsers/pyreport/report_json.rs b/core/src/parsers/pyreport/report_json.rs index 27ed265..4a03070 100644 --- a/core/src/parsers/pyreport/report_json.rs +++ b/core/src/parsers/pyreport/report_json.rs @@ -1,827 +1,411 @@ -use std::collections::HashMap; - -use winnow::{ - combinator::{cut_err, delimited, separated}, - error::{ContextError, ErrMode, ErrorKind, FromExternalError}, - PResult, Parser, Stateful, +//! Parses a "report JSON" object which contains information about the files and +//! "sessions" in a report. A session is more-or-less a single upload, and they +//! are represented in our schema as a "context" which may be tied to a line. +//! +//! At a high level, the format looks something like: +//! ```json +//! { +//! "files": { +//! "filename": ReportFileSummary, +//! ... +//! }, +//! "sessions": { +//! "session index": Session, +//! ... +//! } +//! } +//! ``` +//! +//! The types can only be completely understood by reading their implementations +//! in our Python code: +//! - [`ReportFileSummary`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/reports/types.py#L361-L367) +//! - [`Session`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/utils/sessions.py#L111-L128O) +//! +//! ## Files +//! +//! The `files` are key-value pairs where the key is a filename and the value is +//! a `ReportFileSummary`. We primarily care about the chunks_index field and +//! can compute the totals on-demand later. +//! +//! The format is messy and can only be fully understood by reading the Python +//! source in our `shared` repository's +//! [`shared/reports/resources.py`](https://github.com/codecov/shared/tree/main/shared/reports/resources.py) and +//! [`shared/reports/types.py`](https://github.com/codecov/shared/blob/main/shared/reports/types.py). +//! Nevertheless, the common case will be described here. +//! +//! At a high level, the input looks like: +//! ```notrust +//! "filename.rs": [ +//! chunks_index: int, +//! file_totals: ReportTotals, +//! session_totals: null, // (formerly SessionTotalsArray, but ignored now) +//! diff_totals: ReportTotals (probably), +//! ] +//! ``` +//! with `int` being normal and the other types being from our Python code: +//! - [`ReportFileSummary`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/reports/types.py#L361-L367) +//! - [`ReportTotals`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/reports/types.py#L30-L45) +//! - [`SessionTotalsArray`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/reports/types.py#L263-L272) +//! +//! `SessionTotalsArray` no longer exists, but older reports may still have it. +//! It's a dict mapping a session ID to a `SessionTotals` (which is just a type +//! alias for `ReportTotals` and a "meta" key with extra information including +//! how many sessions there are in the map, and old reports may still have it. +//! There's an even older format which is just a flat list. In any case, we +//! ignore the field now. +//! +//! Input example: +//! ```json +//! "src/report.rs": [ +//! 0, # index in chunks +//! [ # file totals +//! 0, # > files +//! 45, # > lines +//! 45, # > hits +//! 0, # > misses +//! 0, # > partials +//! "100", # > coverage % +//! 0, # > branches +//! 0, # > methods +//! 0, # > messages +//! 0, # > sessions +//! 0, # > complexity +//! 0, # > complexity_total +//! 0 # > diff +//! ], +//! { # session totals (usually null nowadays) +//! "0": [ # > key: session id +//! 0, # > files +//! 45, # > lines +//! 45, # > hits +//! 0, # > misses +//! 0, # > partials +//! "100" # > coverage +//! ], +//! "meta": { +//! "session_count": 1 +//! } +//! }, +//! null # diff totals +//! ], +//! ``` +//! +//! ## Sessions +//! +//! The `sessions` are key-value pairs where the key is a session index and the +//! value is an encoded `Session`. A session essentially just an upload. We can +//! compute session-specific coverage totals on-demand later and only care about +//! other details for now. +//! +//! The format is messy and can only be fully understood by reading the Python +//! source in our `shared` repository's +//! [`shared/reports/resources.py`](https://github.com/codecov/shared/tree/main/shared/reports/resources.py), +//! [`shared/reports/types.py`](https://github.com/codecov/shared/blob/main/shared/reports/types.py), +//! and [`shared/utils/sessions.py`](https://github.com/codecov/shared/blob/main/shared/utils/sessions.py). +//! Nevertheless, the common case will be described here. +//! +//! At a high level, the input looks like: +//! ```notrust +//! "session index": [ +//! "t": ReportTotals, # Coverage totals for this report +//! "d": int, # time +//! "a": str, # archive (URL of raw upload) +//! "f": list[str], # flags +//! "c": str, # provider +//! "n": str, # build +//! "N": str, # name +//! "j": str, # CI job name +//! "u": str, # CI job run URL +//! "p": str, # state +//! "e": str, # env +//! "st": str, # session type +//! "se": dict, # session extras +//! ] +//! ``` +//! with most types being normal and others coming from our Python code: +//! - [`ReportTotals`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/reports/types.py#L30-L45). +//! - [`Session`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/utils/sessions.py#L111-L128O) +//! +//! Input example: +//! ```notrust +//! "0": { # session index +//! "t": [ # session totals +//! 3, # files in session +//! 94, # lines +//! 52, # hits +//! 42, # misses +//! 0, # partials +//! "55.31915", # coverage % +//! 0, # branches +//! 0, # methods +//! 0, # messages +//! 0, # sessions +//! 0, # complexity +//! 0, # complexity_total +//! 0 # diff +//! ], +//! "d": 1704827412, # timestamp +//! # archive (raw upload URL) +//! "a": "v4/raw/2024-01-09////340c0c0b-a955-46a0-9de9-3a9b5f2e81e2.txt", +//! "f": [], # flags +//! "c": null, # provider +//! "n": null, # build +//! "N": null, # name +//! "j": "codecov-rs CI", # CI job name +//! # CI job run URL +//! "u": "https://github.com/codecov/codecov-rs/actions/runs/7465738121", +//! "p": null, # state +//! "e": null, # env +//! "st": "uploaded", # session type +//! "se": {} # session extras +//! } +//! ``` + +use std::collections::{BTreeMap, HashMap}; + +use serde::{de::IgnoredAny, Deserialize}; +use serde_json::Value; + +use crate::{ + error::CodecovError, + report::{models, Report, ReportBuilder}, }; -use super::super::{ - common::{ - winnow::{ws, StrStream}, - ReportBuilderCtx, - }, - json::{parse_kv, specific_key, JsonVal}, -}; -use crate::report::{models, Report, ReportBuilder}; - -pub type ReportOutputStream = Stateful>; - -/// Parses a key-value pair where the key is a filename and the value is a -/// `ReportFileSummary`. We primarily care about the chunks_index field and can -/// compute the totals on-demand later. -/// -/// The format is messy and can only be fully understood by reading the Python -/// source in our `shared` repository's -/// [`shared/reports/resources.py`](https://github.com/codecov/shared/tree/main/shared/reports/resources.py) and -/// [`shared/reports/types.py`](https://github.com/codecov/shared/blob/main/shared/reports/types.py). -/// Nevertheless, the common case will be described here. -/// -/// At a high level, the input looks like: -/// ```notrust -/// "filename.rs": [ -/// chunks_index: int, -/// file_totals: ReportTotals, -/// session_totals: null, // (formerly SessionTotalsArray, but ignored now) -/// diff_totals: ReportTotals (probably), -/// ] -/// ``` -/// with `int` being normal and the other types being from our Python code: -/// - [`ReportFileSummary`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/reports/types.py#L361-L367) -/// - [`ReportTotals`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/reports/types.py#L30-L45) -/// - [`SessionTotalsArray`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/reports/types.py#L263-L272) -/// -/// `SessionTotalsArray` no longer exists, but older reports may still have it. -/// It's a dict mapping a session ID to a `SessionTotals` (which is just a type -/// alias for `ReportTotals` and a "meta" key with extra information including -/// how many sessions there are in the map, and old reports may still have it. -/// There's an even older format which is just a flat list. In any case, we -/// ignore the field now. -/// -/// Input example: -/// ```notrust -/// "src/report.rs": [ -/// 0, # index in chunks -/// [ # file totals -/// 0, # > files -/// 45, # > lines -/// 45, # > hits -/// 0, # > misses -/// 0, # > partials -/// "100", # > coverage % -/// 0, # > branches -/// 0, # > methods -/// 0, # > messages -/// 0, # > sessions -/// 0, # > complexity -/// 0, # > complexity_total -/// 0 # > diff -/// ], -/// { # session totals (usually null nowadays) -/// "0": [ # > key: session id -/// 0, # > files -/// 45, # > lines -/// 45, # > hits -/// 0, # > misses -/// 0, # > partials -/// "100" # > coverage -/// ], -/// "meta": { -/// "session_count": 1 -/// } -/// }, -/// null # diff totals -/// ], -/// ``` -pub fn report_file>( - buf: &mut ReportOutputStream, -) -> PResult<(usize, i64)> { - let (filename, file_summary) = delimited(ws, parse_kv, ws).parse_next(buf)?; - - let Some(chunks_index) = file_summary - .get(0) - // winnow's f64 parser handles scientific notation and such OOTB so we use it for all - // numbers. This is expected to be u64 - .and_then(JsonVal::as_f64) - .map(|f| f as u64) - else { - return Err(ErrMode::Cut(ContextError::new())); - }; - - let file = buf - .state - .report_builder - .insert_file(&filename) - .map_err(|e| ErrMode::from_external_error(buf, ErrorKind::Fail, e))?; - - Ok((chunks_index as usize, file.id)) +#[derive(Debug, Deserialize)] +struct ReportJson { + // NOTE: these two are `BTreeMap` only to have stable iteration order in tests + files: BTreeMap, + sessions: BTreeMap, } -/// Parses a key-value pair where the key is a session index and the value is an -/// encoded `Session`. A session essentially just an upload. We can compute -/// session-specific coverage totals on-demand later and only care about other -/// details for now. -/// -/// The format is messy and can only be fully understood by reading the Python -/// source in our `shared` repository's -/// [`shared/reports/resources.py`](https://github.com/codecov/shared/tree/main/shared/reports/resources.py), -/// [`shared/reports/types.py`](https://github.com/codecov/shared/blob/main/shared/reports/types.py), -/// and [`shared/utils/sessions.py`](https://github.com/codecov/shared/blob/main/shared/utils/sessions.py). -/// Nevertheless, the common case will be described here. -/// -/// At a high level, the input looks like: -/// ```notrust -/// "session index": [ -/// "t": ReportTotals, # Coverage totals for this report -/// "d": int, # time -/// "a": str, # archive (URL of raw upload) -/// "f": list[str], # flags -/// "c": str, # provider -/// "n": str, # build -/// "N": str, # name -/// "j": str, # CI job name -/// "u": str, # CI job run URL -/// "p": str, # state -/// "e": str, # env -/// "st": str, # session type -/// "se": dict, # session extras -/// ] -/// ``` -/// with most types being normal and others coming from our Python code: -/// - [`ReportTotals`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/reports/types.py#L30-L45). -/// - [`Session`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/utils/sessions.py#L111-L128O) -/// -/// Input example: -/// ```notrust -/// "0": { # session index -/// "t": [ # session totals -/// 3, # files in session -/// 94, # lines -/// 52, # hits -/// 42, # misses -/// 0, # partials -/// "55.31915", # coverage % -/// 0, # branches -/// 0, # methods -/// 0, # messages -/// 0, # sessions -/// 0, # complexity -/// 0, # complexity_total -/// 0 # diff -/// ], -/// "d": 1704827412, # timestamp -/// # archive (raw upload URL) -/// "a": "v4/raw/2024-01-09////340c0c0b-a955-46a0-9de9-3a9b5f2e81e2.txt", -/// "f": [], # flags -/// "c": null, # provider -/// "n": null, # build -/// "N": null, # name -/// "j": "codecov-rs CI", # CI job name -/// # CI job run URL -/// "u": "https://github.com/codecov/codecov-rs/actions/runs/7465738121", -/// "p": null, # state -/// "e": null, # env -/// "st": "uploaded", # session type -/// "se": {} # session extras -/// } -/// ``` -pub fn report_session>( - buf: &mut ReportOutputStream, -) -> PResult<(usize, i64)> { - let (session_index, encoded_session) = delimited(ws, parse_kv, ws).parse_next(buf)?; - let Ok(session_index) = session_index.parse::() else { - return Err(ErrMode::Cut(ContextError::new())); - }; - let JsonVal::Object(values) = encoded_session else { - return Err(ErrMode::Cut(ContextError::new())); - }; - - let raw_upload = models::RawUpload { - timestamp: values.get("d").and_then(JsonVal::as_f64).map(|f| f as i64), - raw_upload_url: values.get("a").and_then(JsonVal::as_str).map(str::to_owned), - flags: values.get("f").cloned(), - provider: values.get("c").and_then(JsonVal::as_str).map(str::to_owned), - build: values.get("n").and_then(JsonVal::as_str).map(str::to_owned), - name: values.get("N").and_then(JsonVal::as_str).map(str::to_owned), - job_name: values.get("j").and_then(JsonVal::as_str).map(str::to_owned), - ci_run_url: values.get("u").and_then(JsonVal::as_str).map(str::to_owned), - state: values.get("p").and_then(JsonVal::as_str).map(str::to_owned), - env: values.get("e").and_then(JsonVal::as_str).map(str::to_owned), - session_type: values - .get("st") - .and_then(JsonVal::as_str) - .map(str::to_owned), - session_extras: values.get("se").cloned(), - ..Default::default() - }; - - let raw_upload = buf - .state - .report_builder - .insert_raw_upload(raw_upload) - .map_err(|e| ErrMode::from_external_error(buf, ErrorKind::Fail, e))?; - - Ok((session_index, raw_upload.id)) +#[derive(Debug, Deserialize)] +// this really is: +// - index in chunks +// - file totals +// - session totals +// - diff totals +struct File(usize, IgnoredAny, IgnoredAny, IgnoredAny); + +#[derive(Debug, Deserialize)] +struct Session { + #[serde(rename = "d")] + timestamp: Option, + #[serde(rename = "a")] + raw_upload_url: Option, + #[serde(rename = "f")] + flags: Option, + #[serde(rename = "c")] + provider: Option, + #[serde(rename = "n")] + build: Option, + #[serde(rename = "N")] + name: Option, + #[serde(rename = "j")] + job_name: Option, + #[serde(rename = "u")] + ci_run_url: Option, + #[serde(rename = "p")] + state: Option, + #[serde(rename = "e")] + env: Option, + #[serde(rename = "st")] + session_type: Option, + #[serde(rename = "se")] + session_extras: Option, } -/// Parses the JSON object that corresponds to the "files" key. Because there -/// could be many files, we parse each key/value pair one at a time. -pub fn report_files_dict>( - buf: &mut ReportOutputStream, -) -> PResult> { - cut_err(delimited( - (ws, '{', ws), - separated(0.., report_file, (ws, ',', ws)), - (ws, '}', ws), - )) - .parse_next(buf) +#[derive(Debug)] +pub struct ParsedReportJson { + pub files: HashMap, + pub sessions: HashMap, } -/// Parses the JSON object that corresponds to the "sessions" key. Because there -/// could be many sessions, we parse each key/value pair one at a time. -pub fn report_sessions_dict>( - buf: &mut ReportOutputStream, -) -> PResult> { - cut_err(delimited( - (ws, '{', ws), - separated(0.., report_session, (ws, ',', ws)), - (ws, '}', ws), - )) - .parse_next(buf) -} +pub fn parse_report_json( + input: &[u8], + builder: &mut B, +) -> Result +where + B: ReportBuilder, + R: Report, +{ + let report: ReportJson = serde_json::from_slice(input)?; + + let mut files = HashMap::with_capacity(report.files.len()); + for (filename, file) in report.files { + let chunk_index = file.0; + + let file = builder.insert_file(&filename)?; + files.insert(chunk_index, file.id); + } -/// Parses a "report JSON" object which contains information about the files and -/// "sessions" in a report. A session is more-or-less a single upload, and they -/// are represented in our schema as a "context" which may be tied to a line. -/// -/// At a high level, the format looks something like: -/// ```notrust -/// { -/// "files": { -/// "filename": ReportFileSummary, -/// ... -/// }, -/// "sessions": { -/// "session index": Session, -/// ... -/// } -/// } -/// ``` -/// -/// The types can only be completely understood by reading their implementations -/// in our Python code: -/// - [`ReportFileSummary`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/reports/types.py#L361-L367) -/// - [`Session`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/utils/sessions.py#L111-L128O) -pub fn parse_report_json>( - buf: &mut ReportOutputStream, -) -> PResult<(HashMap, HashMap)> { - let parse_files = delimited(specific_key("files"), report_files_dict, (ws, ',', ws)); - let parse_sessions = delimited(specific_key("sessions"), report_sessions_dict, ws); - cut_err(delimited( - (ws, '{', ws), - (parse_files, parse_sessions), - (ws, '}', ws), - )) - .parse_next(buf) + let mut sessions = HashMap::with_capacity(report.sessions.len()); + for (session_index, session) in report.sessions { + let raw_upload = models::RawUpload { + id: 0, + timestamp: session.timestamp, + raw_upload_url: session.raw_upload_url, + flags: session.flags, + provider: session.provider, + build: session.build, + name: session.name, + job_name: session.job_name, + ci_run_url: session.ci_run_url, + state: session.state, + env: session.env, + session_type: session.session_type, + session_extras: session.session_extras, + }; + + let raw_upload = builder.insert_raw_upload(raw_upload)?; + + sessions.insert(session_index, raw_upload.id); + } + + Ok(ParsedReportJson { files, sessions }) } #[cfg(test)] mod tests { use super::*; - use crate::report::test::{TestReport, TestReportBuilder}; + use crate::report::test::TestReportBuilder; + + #[test] + fn test_report_json_simple_valid_case() { + let input = br#"{"files": {"src/report.rs": [0, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}}}"#; - type TestStream<'a> = ReportOutputStream<&'a str, TestReport, TestReportBuilder>; + let mut report_builder = TestReportBuilder::default(); + let _parsed = parse_report_json(input, &mut report_builder).unwrap(); - struct Ctx { - parse_ctx: ReportBuilderCtx, + let report = report_builder.build().unwrap(); + assert_eq!(report.files, &[models::SourceFile::new("src/report.rs")]); + assert_eq!( + report.uploads, + &[models::RawUpload { + id: 0, + job_name: Some("codecov-rs CI".into()), + ..Default::default() + }] + ); } - fn hash_id(path: &str) -> i64 { - seahash::hash(path.as_bytes()) as i64 + #[test] + fn test_report_json_two_files_two_sessions() { + let input = br#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#; + + let mut report_builder = TestReportBuilder::default(); + let _parsed = parse_report_json(input, &mut report_builder).unwrap(); + + let report = report_builder.build().unwrap(); + assert_eq!( + report.files, + &[ + models::SourceFile::new("src/report.rs"), + models::SourceFile::new("src/report/models.rs") + ] + ); + assert_eq!( + report.uploads, + &[ + models::RawUpload { + id: 0, + job_name: Some("codecov-rs CI".into()), + ..Default::default() + }, + models::RawUpload { + id: 1, + job_name: Some("codecov-rs CI 2".into()), + ..Default::default() + }, + ] + ); } - fn setup() -> Ctx { - let report_builder = TestReportBuilder::default(); - let parse_ctx = ReportBuilderCtx::new(report_builder); - Ctx { parse_ctx } + #[test] + fn test_report_json_empty_files() { + let input = br#"{"files": {}, "sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#; + + let mut report_builder = TestReportBuilder::default(); + let _parsed = parse_report_json(input, &mut report_builder).unwrap(); + + let report = report_builder.build().unwrap(); + assert_eq!(report.files, &[]); + assert_eq!( + report.uploads, + &[ + models::RawUpload { + id: 0, + job_name: Some("codecov-rs CI".into()), + ..Default::default() + }, + models::RawUpload { + id: 1, + job_name: Some("codecov-rs CI 2".into()), + ..Default::default() + }, + ] + ); } - mod report_json { - use serde_json::json; - - use super::*; - use crate::parsers::json::JsonMap; - - fn test_report_file(path: &str, input: &str) -> PResult<(usize, i64)> { - let ctx = setup(); - let mut buf = TestStream { - input, - state: ctx.parse_ctx, - }; - - let res = report_file.parse_next(&mut buf); - if res.is_ok() { - assert_eq!( - buf.state.report_builder.build().unwrap().files, - &[models::SourceFile::new(path)] - ); - } - res - } - - #[test] - fn test_report_file_simple_valid_case() { - assert_eq!( - test_report_file("src/report.rs", "\"src/report.rs\": [0, [], {}, null]",), - Ok((0, hash_id("src/report.rs"))) - ); - } - - #[test] - fn test_report_file_malformed_key() { - assert_eq!( - test_report_file("src/report.rs", "src/report.rs\": [0, [], {}, null]",), - Err(ErrMode::Backtrack(ContextError::new())) - ); - } - - #[test] - fn test_report_key_wrong_type() { - assert_eq!( - test_report_file("src/report.rs", "5: [0, [], {}, null]",), - Err(ErrMode::Backtrack(ContextError::new())) - ); - } - - #[test] - fn test_report_file_chunks_index_wrong_type() { - assert_eq!( - test_report_file("src/report.rs", "\"src/report.rs\": [\"0\", [], {}, null]",), - Err(ErrMode::Cut(ContextError::new())) - ); - } - - #[test] - fn test_report_file_file_summary_wrong_type() { - assert_eq!( - test_report_file( - "src/report.rs", - "\"src/report.rs\": {\"chunks_index\": 0, \"totals\": []}", - ), - Err(ErrMode::Cut(ContextError::new())) - ); - } - - #[test] - fn test_report_file_file_summary_empty() { - assert_eq!( - test_report_file("src/report.rs", "\"src/report.rs\": []",), - Err(ErrMode::Cut(ContextError::new())) - ); - } - - fn test_report_session(job_name: Option<&str>, input: &str) -> PResult<(usize, i64)> { - let ctx = setup(); - let mut buf = TestStream { - input, - state: ctx.parse_ctx, - }; - - let res = report_session.parse_next(&mut buf); - if res.is_ok() { - let report = buf.state.report_builder.build().unwrap(); - assert_eq!( - report.uploads, - &[models::RawUpload { - id: 0, - job_name: job_name.map(str::to_owned), - ..Default::default() - }] - ); - } - res - } - - #[test] - fn test_report_session_simple_valid_case() { - assert_eq!( - test_report_session(Some("codecov-rs CI"), "\"0\": {\"j\": \"codecov-rs CI\"}",), - Ok((0, 0)) - ); - } - - #[test] - fn test_report_session_fully_populated() { - let ctx = setup(); - let timestamp = 1704827412; - let job_name = "codecov-rs CI"; - let ci_run_url = "https://github.com/codecov/codecov-rs/actions/runs/7465738121"; - let input = "\"0\": { - \"t\": [3, 94, 52, 42, 3, \"55.31915\", 2, 2, 0, 0, 3, 5, 0], - \"d\": 1704827412, - \"a\": \"v4/raw/2024-01-09////340c0c0b-a955-46a0-9de9-3a9b5f2e81e2.txt\", - \"f\": [\"flag\"], - \"c\": \"github-actions\", - \"n\": \"build\", - \"N\": \"name\", - \"j\": \"codecov-rs CI\", - \"u\": \"https://github.com/codecov/codecov-rs/actions/runs/7465738121\", - \"p\": \"state\", - \"e\": \"env\", - \"st\": \"uploaded\", - \"se\": {} - }"; - let mut buf = TestStream { - input, - state: ctx.parse_ctx, - }; - - let inserted_upload = models::RawUpload { - id: 0, - timestamp: Some(timestamp), - raw_upload_url: Some( - "v4/raw/2024-01-09////340c0c0b-a955-46a0-9de9-3a9b5f2e81e2.txt" - .to_string(), - ), - flags: Some(json!(["flag"])), - provider: Some("github-actions".to_string()), - build: Some("build".to_string()), - name: Some("name".to_string()), - job_name: Some(job_name.to_string()), - ci_run_url: Some(ci_run_url.to_string()), - state: Some("state".to_string()), - env: Some("env".to_string()), - session_type: Some("uploaded".to_string()), - session_extras: Some(JsonVal::Object(JsonMap::new())), - }; - - assert_eq!(report_session.parse_next(&mut buf), Ok((0, 0))); - - let report = buf.state.report_builder.build().unwrap(); - assert_eq!(report.uploads, &[inserted_upload]); - } - - #[test] - fn test_report_session_malformed_session_index() { - assert_eq!( - test_report_session(Some("codecov-rs CI"), "'0\": {\"j\": \"codecov-rs CI\"}",), - Err(ErrMode::Backtrack(ContextError::new())) - ); - } - - #[test] - fn test_report_session_session_index_not_numeric() { - assert_eq!( - test_report_session(Some("codecov-rs CI"), "\"str\": {\"j\": \"codecov-rs CI\"}",), - Err(ErrMode::Cut(ContextError::new())) - ); - } - - #[test] - fn test_report_session_session_index_float() { - assert_eq!( - test_report_session( - Some("codecov-rs CI"), - "\"3.34\": {\"j\": \"codecov-rs CI\"}", - ), - Err(ErrMode::Cut(ContextError::new())) - ); - } - - #[test] - fn test_report_session_missing_job_key() { - assert_eq!( - test_report_session(None, "\"0\": {\"x\": \"codecov-rs CI\"}",), - Ok((0, 0)) - ); - } - - #[test] - fn test_report_session_job_key_wrong_type() { - assert_eq!(test_report_session(None, "\"0\": {\"j\": []}",), Ok((0, 0))); - } - - #[test] - fn test_report_session_encoded_session_wrong_type() { - assert_eq!( - test_report_session(Some("codecov-rs CI"), "\"0\": [\"j\", []]",), - Err(ErrMode::Cut(ContextError::new())) - ); - } - - fn test_report_files_dict(paths: &[&str], input: &str) -> PResult> { - let ctx = setup(); - let mut buf = TestStream { - input, - state: ctx.parse_ctx, - }; - - let res = report_files_dict.parse_next(&mut buf); - if res.is_ok() { - let report = buf.state.report_builder.build().unwrap(); - - let expected_files: Vec<_> = paths - .iter() - .map(|path| models::SourceFile::new(path)) - .collect(); - assert_eq!(report.files, expected_files); - } - res - } - - #[test] - fn test_report_files_dict_single_valid_file() { - assert_eq!( - test_report_files_dict( - &["src/report.rs"], - "{\"src/report.rs\": [0, [], {}, null]}", - ), - Ok(HashMap::from([(0, hash_id("src/report.rs"))])) - ); - } - - #[test] - fn test_report_files_dict_multiple_valid_files() { - assert_eq!(test_report_files_dict( - &["src/report.rs", "src/report/models.rs"], - "{\"src/report.rs\": [0, [], {}, null], \"src/report/models.rs\": [1, [], {}, null]}", - ), Ok(HashMap::from([(0, hash_id("src/report.rs")), (1, hash_id("src/report/models.rs"))]))); - } - - #[test] - fn test_report_files_dict_multiple_valid_files_trailing_comma() { - assert_eq!(test_report_files_dict( - &["src/report.rs", "src/report/models.rs"], - "{\"src/report.rs\": [0, [], {}, null], \"src/report/models.rs\": [1, [], {}, null],}", - ), Err(ErrMode::Cut(ContextError::new()))); - } - - #[test] - fn test_report_files_dict_multiple_files_same_index() { - // TODO this is how winnow handles accumulating into collections but it's not - // the behavior that we want. we want to error - assert_eq!(test_report_files_dict( - &["src/report.rs", "src/report/models.rs"], - "{\"src/report.rs\": [0, [], {}, null], \"src/report/models.rs\": [0, [], {}, null]}", - ), Ok(HashMap::from([(0, hash_id("src/report/models.rs"))]))); - } - - #[test] - fn test_report_files_dict_single_invalid_file() { - assert_eq!( - test_report_files_dict( - &["src/report.rs"], - "{\"src/report.rs\": [null, [], {}, null]}", - ), - Err(ErrMode::Cut(ContextError::new())) - ); - } - - #[test] - fn test_report_files_dict_invalid_file_after_valid_file() { - assert_eq!(test_report_files_dict( - &["src/report.rs", "src/report/models.rs"], - "{\"src/report.rs\": [0, [], {}, null], \"src/report/models.rs\": [null, [], {}, null]}", - ), Err(ErrMode::Cut(ContextError::new()))); - } - - #[test] - fn test_report_files_dict_wrong_type() { - assert_eq!(test_report_files_dict( - &["src/report.rs", "src/report/models.rs"], - "[\"src/report.rs\": [0, [], {}, null], \"src/report/models.rs\": [1, [], {}, null]]", - ), Err(ErrMode::Cut(ContextError::new()))); - } - - #[test] - fn test_report_files_dict_no_files() { - assert_eq!(test_report_files_dict(&[], "{}",), Ok(HashMap::new())); - } - - // This helper is for sessions that include "j" but not "d" or "u". - // Name-building behavior is tested separately + covered in the - // `fully_populated` test case. - fn test_report_sessions_dict( - jobs: &[Option<&str>], - input: &str, - ) -> PResult> { - let ctx = setup(); - let mut buf = TestStream { - input, - state: ctx.parse_ctx, - }; - - let res = report_sessions_dict.parse_next(&mut buf); - if res.is_ok() { - let report = buf.state.report_builder.build().unwrap(); - - let expected_uploads: Vec<_> = jobs - .iter() - .enumerate() - .map(|(i, name)| models::RawUpload { - id: i as i64, - job_name: name.map(str::to_owned), - ..Default::default() - }) - .collect(); - assert_eq!(report.uploads, expected_uploads); - } - res - } - - #[test] - fn test_report_sessions_dict_single_valid_session() { - assert_eq!( - test_report_sessions_dict( - &[Some("codecov-rs CI")], - "{\"0\": {\"j\": \"codecov-rs CI\"}}", - ), - Ok(HashMap::from([(0, 0)])) - ); - } - - #[test] - fn test_report_sessions_dict_multiple_valid_sessions() { - assert_eq!( - test_report_sessions_dict( - &[Some("codecov-rs CI"), Some("codecov-rs CI 2")], - "{\"0\": {\"j\": \"codecov-rs CI\"}, \"1\": {\"j\": \"codecov-rs CI 2\"}}", - ), - Ok(HashMap::from([(0, 0), (1, 1)])) - ); - } - - #[test] - fn test_report_sessions_dict_multiple_valid_sessions_trailing_comma() { - assert_eq!( - test_report_sessions_dict( - &[Some("codecov-rs CI"), Some("codecov-rs CI 2")], - "{\"0\": {\"j\": \"codecov-rs CI\"}, \"1\": {\"j\": \"codecov-rs CI 2\"},}", - ), - Err(ErrMode::Cut(ContextError::new())) - ); - } - - #[test] - fn test_report_sessions_dict_multiple_sessions_same_index() { - // TODO this is how winnow handles accumulating into collections but it's not - // the behavior that we want. we want to error - assert_eq!( - test_report_sessions_dict( - &[Some("codecov-rs CI"), Some("codecov-rs CI 2")], - "{\"0\": {\"j\": \"codecov-rs CI\"}, \"0\": {\"j\": \"codecov-rs CI 2\"}}", - ), - Ok(HashMap::from([(0, 1)])) - ); - } - - #[test] - fn test_report_sessions_dict_single_malformed_session() { - assert_eq!( - test_report_sessions_dict(&[None], "{\"0\": {\"xj\": \"codecov-rs CI\"}}",), - Ok(HashMap::from([(0, 0)])) - ); - } - - #[test] - fn test_report_sessions_dict_invalid_session_after_valid_session() { - assert_eq!( - test_report_sessions_dict( - &[Some("codecov-rs CI"), None], - "{\"0\": {\"j\": \"codecov-rs CI\"}, \"1\": {\"xj\": \"codecov-rs CI 2\"}}", - ), - Ok(HashMap::from([(0, 0), (1, 1)])) - ); - } - - #[test] - fn test_report_sessions_dict_wrong_type() { - assert_eq!( - test_report_sessions_dict( - &[Some("codecov-rs CI")], - "{\"0\": [\"j\": \"codecov-rs CI\"}]", - ), - Err(ErrMode::Cut(ContextError::new())) - ); - } - - #[test] - fn test_report_sessions_dict_no_sessions() { - assert_eq!(test_report_sessions_dict(&[], "{}",), Ok(HashMap::new())); - } - - fn test_report_json( - paths: &[&str], - jobs: &[&str], - input: &str, - ) -> PResult<(HashMap, HashMap)> { - let ctx = setup(); - let mut buf = TestStream { - input, - state: ctx.parse_ctx, - }; - - let res = parse_report_json.parse_next(&mut buf); - if res.is_ok() { - let report = buf.state.report_builder.build().unwrap(); - - let expected_files: Vec<_> = paths - .iter() - .map(|path| models::SourceFile::new(path)) - .collect(); - assert_eq!(report.files, expected_files); - - let expected_uploads: Vec<_> = jobs - .iter() - .enumerate() - .map(|(i, name)| models::RawUpload { - id: i as i64, - job_name: Some(name.to_string()), - ..Default::default() - }) - .collect(); - assert_eq!(report.uploads, expected_uploads); - } - res - } - - #[test] - fn test_report_json_simple_valid_case() { - assert_eq!(test_report_json( - &["src/report.rs"], - &["codecov-rs CI"], - "{\"files\": {\"src/report.rs\": [0, {}, [], null]}, \"sessions\": {\"0\": {\"j\": \"codecov-rs CI\"}}}", - ), Ok((HashMap::from([(0, hash_id("src/report.rs"))]), HashMap::from([(0, 0)])))) - } - - #[test] - fn test_report_json_two_files_two_sessions() { - assert_eq!(test_report_json( - &["src/report.rs", "src/report/models.rs"], - &["codecov-rs CI", "codecov-rs CI 2"], - "{\"files\": {\"src/report.rs\": [0, {}, [], null], \"src/report/models.rs\": [1, {}, [], null]}, \"sessions\": {\"0\": {\"j\": \"codecov-rs CI\"}, \"1\": {\"j\": \"codecov-rs CI 2\"}}}", - ), Ok((HashMap::from([(0, hash_id("src/report.rs")), (1, hash_id("src/report/models.rs"))]), HashMap::from([(0, 0), (1, 1)])))); - } - - #[test] - fn test_report_json_empty_files() { - assert_eq!(test_report_json( - &[], - &["codecov-rs CI","codecov-rs CI 2"], - "{\"files\": {}, \"sessions\": {\"0\": {\"j\": \"codecov-rs CI\"}, \"1\": {\"j\": \"codecov-rs CI 2\"}}}", - ), Ok((HashMap::new(), HashMap::from([(0, 0), (1, 1)])))); - } - - #[test] - fn test_report_json_empty_sessions() { - assert_eq!(test_report_json( - &["src/report.rs", "src/report/models.rs"], - &[], - "{\"files\": {\"src/report.rs\": [0, {}, [], null], \"src/report/models.rs\": [1, {}, [], null]}, \"sessions\": {}}", - ), Ok((HashMap::from([(0, hash_id("src/report.rs")), (1, hash_id("src/report/models.rs"))]), HashMap::new()))); - } - - #[test] - fn test_report_json_empty() { - assert_eq!( - test_report_json(&[], &[], "{\"files\": {}, \"sessions\": {}}",), - Ok((HashMap::new(), HashMap::new())) - ); - } - - #[test] - fn test_report_json_sessions_before_files() { - assert_eq!(test_report_json( - &["src/report.rs", "src/report/models.rs"], - &["codecov-rs CI", "codecov-rs CI 2"], - "{\"sessions\": {\"0\": {\"j\": \"codecov-rs CI\"}, \"1\": {\"j\": \"codecov-rs CI 2\"}}, \"files\": {\"src/report.rs\": [0, {}, [], null], \"src/report/models.rs\": [1, {}, [], null]}}", - ), Err(ErrMode::Cut(ContextError::new()))); - } - - #[test] - fn test_report_json_missing_files() { - assert_eq!(test_report_json( - &["src/report.rs", "src/report/models.rs"], - &["codecov-rs CI","codecov-rs CI 2"], - "{\"sessions\": {\"0\": {\"j\": \"codecov-rs CI\"}, \"1\": {\"j\": \"codecov-rs CI 2\"}}}", - ), Err(ErrMode::Cut(ContextError::new()))); - } - - #[test] - fn test_report_json_missing_sessions() { - assert_eq!(test_report_json( - &["src/report.rs", "src/report/models.rs"], - &["codecov-rs CI", "codecov-rs CI 2"], - "{\"files\": {\"src/report.rs\": [0, {}, [], null], \"src/report/models.rs\": [1, {}, [], null]}}", - ), Err(ErrMode::Cut(ContextError::new()))); - } - - #[test] - fn test_report_json_one_invalid_file() { - assert_eq!(test_report_json( - &["src/report.rs", "src/report/models.rs"], - &["codecov-rs CI", "codecov-rs CI 2"], - "{\"files\": {\"src/report.rs\": [0, {}, [], null], \"src/report/models.rs\": [null, {}, [], null]}, \"sessions\": {\"0\": {\"j\": \"codecov-rs CI\"}, \"1\": {\"j\": \"codecov-rs CI 2\"}}}", - ), Err(ErrMode::Cut(ContextError::new()))); - } - - #[test] - fn test_report_json_one_invalid_session() { - assert_eq!(test_report_json( - &["src/report.rs", "src/report/models.rs"], - &["codecov-rs CI", "codecov-rs CI 2"], - "{\"files\": {\"src/report.rs\": [0, {}, [], null], \"src/report/models.rs\": [1, {}, [], null]}, \"sessions\": {\"0\": {\"j\": \"codecov-rs CI\"}, \"j\": {\"xj\": \"codecov-rs CI 2\"}}}", - ), Err(ErrMode::Cut(ContextError::new()))); - } + #[test] + fn test_report_json_empty_sessions() { + let input = br#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}, "sessions": {}}"#; + + let mut report_builder = TestReportBuilder::default(); + let _parsed = parse_report_json(input, &mut report_builder).unwrap(); + + let report = report_builder.build().unwrap(); + assert_eq!( + report.files, + &[ + models::SourceFile::new("src/report.rs"), + models::SourceFile::new("src/report/models.rs") + ] + ); + assert_eq!(report.uploads, &[]); + } + + #[test] + fn test_report_json_empty() { + let input = br#"{"files": {}, "sessions": {}}"#; + + let mut report_builder = TestReportBuilder::default(); + let _parsed = parse_report_json(input, &mut report_builder).unwrap(); + + let report = report_builder.build().unwrap(); + assert_eq!(report.files, &[]); + assert_eq!(report.uploads, &[]); + } + + #[test] + fn test_report_json_missing_files() { + let input = + br#"{"sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#; + + let mut report_builder = TestReportBuilder::default(); + parse_report_json(input, &mut report_builder).unwrap_err(); + } + + #[test] + fn test_report_json_missing_sessions() { + let input = br#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}}"#; + + let mut report_builder = TestReportBuilder::default(); + parse_report_json(input, &mut report_builder).unwrap_err(); + } + + #[test] + fn test_report_json_one_invalid_file() { + let input = br#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [null, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#; + + let mut report_builder = TestReportBuilder::default(); + parse_report_json(input, &mut report_builder).unwrap_err(); + } + + #[test] + fn test_report_json_one_invalid_session() { + let input = br#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}, "j": {"xj": "codecov-rs CI 2"}}}"#; + + let mut report_builder = TestReportBuilder::default(); + parse_report_json(input, &mut report_builder).unwrap_err(); } } diff --git a/core/src/report/mod.rs b/core/src/report/mod.rs index f502a4d..1d924a6 100644 --- a/core/src/report/mod.rs +++ b/core/src/report/mod.rs @@ -6,7 +6,7 @@ pub use sqlite::{SqliteReport, SqliteReportBuilder, SqliteReportBuilderTx}; #[cfg(feature = "pyreport")] pub mod pyreport; -#[cfg(test)] +#[cfg(any(test, feature = "testing"))] pub mod test; use crate::error::Result; diff --git a/core/tests/test_pyreport_shim.rs b/core/tests/test_pyreport_shim.rs index d0b7f0a..58d5730 100644 --- a/core/tests/test_pyreport_shim.rs +++ b/core/tests/test_pyreport_shim.rs @@ -6,10 +6,9 @@ use std::{ }; use codecov_rs::{ - parsers::{ - common::ReportBuilderCtx, - pyreport, - pyreport::{chunks, report_json}, + parsers::pyreport::{ + self, chunks, + report_json::{self, ParsedReportJson}, }, report::{ models, pyreport::ToPyreport, Report, ReportBuilder, SqliteReport, SqliteReportBuilder, @@ -22,8 +21,6 @@ use winnow::Parser; mod common; -type ReportJsonStream<'a> = - report_json::ReportOutputStream<&'a str, SqliteReport, SqliteReportBuilder>; type ChunksStream<'a> = chunks::ReportOutputStream<&'a str, SqliteReport, SqliteReportBuilder>; struct Ctx { @@ -45,15 +42,6 @@ fn test_parse_report_json() { let rng_seed = 5; let mut rng = StdRng::seed_from_u64(rng_seed); - let test_ctx = setup(); - let parse_ctx = ReportBuilderCtx::new( - SqliteReportBuilder::new_with_seed(test_ctx.db_file, rng_seed).unwrap(), - ); - let mut buf = ReportJsonStream { - input: &input, - state: parse_ctx, - }; - let expected_files = vec![ models::SourceFile::new("src/report.rs"), models::SourceFile::new("src/report/models.rs"), @@ -84,13 +72,19 @@ fn test_parse_report_json() { let expected_json_sessions = HashMap::from([(0, expected_session.id)]); - let (actual_files, actual_sessions) = report_json::parse_report_json - .parse_next(&mut buf) + let test_ctx = setup(); + let mut report_builder = + SqliteReportBuilder::new_with_seed(test_ctx.db_file, rng_seed).unwrap(); + + let ParsedReportJson { + files: actual_files, + sessions: actual_sessions, + } = report_json::parse_report_json(input.as_bytes(), &mut report_builder) .expect("Failed to parse"); assert_eq!(actual_files, expected_json_files); assert_eq!(actual_sessions, expected_json_sessions); - let report = buf.state.report_builder.build().unwrap(); + let report = report_builder.build().unwrap(); let files = report.list_files().unwrap(); assert_eq!(files, expected_files);