From 5f3091af9ceef4b5f7310bd862556f1b6706f96c Mon Sep 17 00:00:00 2001
From: Arpad Borsos <swatinem@swatinem.de>
Date: Fri, 30 Aug 2024 11:11:24 +0200
Subject: [PATCH] Parse `report_json` files with serde (#18)

Replaces the hand-written `winnow`-based parser with a bunch of struct definitions along with deriving `serde::Deserialize`.
---
 Cargo.lock                               |   54 +-
 Cargo.toml                               |    1 -
 core/Cargo.toml                          |    3 +
 core/benches/pyreport.rs                 |  179 +---
 core/src/error.rs                        |    3 +
 core/src/parsers/pyreport/mod.rs         |   17 +-
 core/src/parsers/pyreport/report_json.rs | 1190 +++++++---------------
 core/src/report/mod.rs                   |    2 +-
 core/tests/test_pyreport_shim.rs         |   30 +-
 9 files changed, 454 insertions(+), 1025 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 3742f4b..23de815 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -40,9 +40,12 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 
 [[package]]
 name = "cc"
-version = "1.1.8"
+version = "1.1.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "504bdec147f2cc13c8b57ed9401fd8a147cc66b67ad5cb241394244f2c947549"
+checksum = "57b6a275aa2903740dc87da01c62040406b8812552e97129a63ea8850a17c6e6"
+dependencies = [
+ "shlex",
+]
 
 [[package]]
 name = "cfg-if"
@@ -52,18 +55,18 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 
 [[package]]
 name = "clap"
-version = "4.5.13"
+version = "4.5.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc"
+checksum = "ed6719fffa43d0d87e5fd8caeab59be1554fb028cd30edc88fc4369b17971019"
 dependencies = [
  "clap_builder",
 ]
 
 [[package]]
 name = "clap_builder"
-version = "4.5.13"
+version = "4.5.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99"
+checksum = "216aec2b177652e3846684cbfe25c9964d18ec45234f0f5da5157b207ed1aab6"
 dependencies = [
  "anstyle",
  "clap_lex",
@@ -87,6 +90,7 @@ dependencies = [
  "rusqlite",
  "rusqlite_migration",
  "seahash",
+ "serde",
  "serde_json",
  "tempfile",
  "thiserror",
@@ -156,9 +160,9 @@ checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
 
 [[package]]
 name = "fastrand"
-version = "2.1.0"
+version = "2.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a"
+checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6"
 
 [[package]]
 name = "getrandom"
@@ -228,9 +232,9 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
 
 [[package]]
 name = "libc"
-version = "0.2.155"
+version = "0.2.158"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
+checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439"
 
 [[package]]
 name = "libsqlite3-sys"
@@ -380,9 +384,9 @@ dependencies = [
 
 [[package]]
 name = "quote"
-version = "1.0.36"
+version = "1.0.37"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
+checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
 dependencies = [
  "proc-macro2",
 ]
@@ -450,9 +454,9 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.34"
+version = "0.38.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f"
+checksum = "a85d50532239da68e9addb745ba38ff4612a242c1c7ceea689c4bc7c2f43c36f"
 dependencies = [
  "bitflags",
  "errno",
@@ -475,18 +479,18 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b"
 
 [[package]]
 name = "serde"
-version = "1.0.205"
+version = "1.0.209"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e33aedb1a7135da52b7c21791455563facbbcc43d0f0f66165b42c21b3dfb150"
+checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.205"
+version = "1.0.209"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "692d6f5ac90220161d6774db30c662202721e64aed9058d2c394f451261420c1"
+checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -495,9 +499,9 @@ dependencies = [
 
 [[package]]
 name = "serde_json"
-version = "1.0.122"
+version = "1.0.127"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da"
+checksum = "8043c06d9f82bd7271361ed64f415fe5e12a77fdb52e573e7f06a516dea329ad"
 dependencies = [
  "itoa",
  "memchr",
@@ -505,6 +509,12 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
 [[package]]
 name = "smallvec"
 version = "1.13.2"
@@ -513,9 +523,9 @@ checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
 
 [[package]]
 name = "syn"
-version = "2.0.72"
+version = "2.0.76"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af"
+checksum = "578e081a14e0cefc3279b0472138c513f37b41a08d5a3cca9b6e4e8ceb6cd525"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/Cargo.toml b/Cargo.toml
index b29b84d..3c9f71c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,5 +5,4 @@ members = ["bindings", "core"]
 default-members = ["core"]
 
 [profile.release]
-
 debug = 1
diff --git a/core/Cargo.toml b/core/Cargo.toml
index bbbce34..f47fb6b 100644
--- a/core/Cargo.toml
+++ b/core/Cargo.toml
@@ -6,6 +6,7 @@ edition = "2021"
 [features]
 default = ["pyreport"]
 pyreport = []
+testing = []
 
 [dependencies]
 include_dir = "0.7.3"
@@ -14,6 +15,7 @@ rand = "0.8.5"
 rusqlite = { version = "0.31.0", features = ["bundled", "limits"] }
 rusqlite_migration = { version = "1.2.0", features = ["from-directory"] }
 seahash = "4.1.0"
+serde = { version = "1.0.204", features = ["derive"] }
 serde_json = "1.0.117"
 thiserror = "1.0.59"
 winnow = "0.5.34"
@@ -25,3 +27,4 @@ tempfile = "3.9.0"
 [[bench]]
 name = "pyreport"
 harness = false
+required-features = ["testing"]
diff --git a/core/benches/pyreport.rs b/core/benches/pyreport.rs
index c0687e6..b5ae90e 100644
--- a/core/benches/pyreport.rs
+++ b/core/benches/pyreport.rs
@@ -1,10 +1,5 @@
-use codecov_rs::{
-    error::Result,
-    parsers::{common::ReportBuilderCtx, pyreport::report_json},
-    report::{models, Report, ReportBuilder},
-};
+use codecov_rs::{parsers::pyreport::report_json, report::test::TestReportBuilder};
 use divan::Bencher;
-use winnow::Parser as _;
 
 fn main() {
     divan::main();
@@ -13,11 +8,11 @@ fn main() {
 #[divan::bench]
 fn simple_report() {
     let reports = &[
-        r#"{"files": {"src/report.rs": [0, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}}}"#,
-        r#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#,
-        r#"{"files": {}, "sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#,
-        r#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}, "sessions": {}}"#,
-        r#"{"files": {}, "sessions": {}}"#,
+        &br#"{"files": {"src/report.rs": [0, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}}}"#[..],
+        &br#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#[..],
+        &br#"{"files": {}, "sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#[..],
+        &br#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}, "sessions": {}}"#[..],
+        &br#"{"files": {}, "sessions": {}}"#[..],
     ];
 
     for input in reports {
@@ -26,17 +21,17 @@ fn simple_report() {
 }
 
 // parsing this is quite slow
-#[divan::bench(sample_count = 10)]
+#[divan::bench]
 fn complex_report(bencher: Bencher) {
     // this is a ~11M `report_json`
     let path =
         "./fixtures/pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-report_json.json";
-    let Ok(report) = std::fs::read_to_string(path) else {
+    let Ok(report) = std::fs::read(path) else {
         println!("Failed to read test report");
         return;
     };
 
-    if report.starts_with("version https://git-lfs.github.com/spec/v1\n") {
+    if report.starts_with(b"version https://git-lfs.github.com/spec/v1\n") {
         println!("Sample report has not been pulled from Git LFS");
         return;
     }
@@ -44,157 +39,7 @@ fn complex_report(bencher: Bencher) {
     bencher.bench(|| run_parsing(&report));
 }
 
-fn run_parsing(input: &str) {
-    let report_builder = TestReport::default();
-    let mut stream = report_json::ReportOutputStream::<&str, TestReport, TestReport> {
-        input,
-        state: ReportBuilderCtx::new(report_builder),
-    };
-    report_json::parse_report_json
-        .parse_next(&mut stream)
-        .unwrap();
-}
-
-#[derive(Debug, Default)]
-struct TestReport {
-    files: Vec<models::SourceFile>,
-    uploads: Vec<models::RawUpload>,
-}
-
-impl Report for TestReport {
-    fn list_files(&self) -> Result<Vec<models::SourceFile>> {
-        todo!()
-    }
-
-    fn list_contexts(&self) -> Result<Vec<models::Context>> {
-        todo!()
-    }
-
-    fn list_coverage_samples(&self) -> Result<Vec<models::CoverageSample>> {
-        todo!()
-    }
-
-    fn list_branches_for_sample(
-        &self,
-        _sample: &models::CoverageSample,
-    ) -> Result<Vec<models::BranchesData>> {
-        todo!()
-    }
-
-    fn get_method_for_sample(
-        &self,
-        _sample: &models::CoverageSample,
-    ) -> Result<Option<models::MethodData>> {
-        todo!()
-    }
-
-    fn list_spans_for_sample(
-        &self,
-        _sample: &models::CoverageSample,
-    ) -> Result<Vec<models::SpanData>> {
-        todo!()
-    }
-
-    fn list_contexts_for_sample(
-        &self,
-        _sample: &models::CoverageSample,
-    ) -> Result<Vec<models::Context>> {
-        todo!()
-    }
-
-    fn list_samples_for_file(
-        &self,
-        _file: &models::SourceFile,
-    ) -> Result<Vec<models::CoverageSample>> {
-        todo!()
-    }
-
-    fn list_raw_uploads(&self) -> Result<Vec<models::RawUpload>> {
-        todo!()
-    }
-
-    fn merge(&mut self, _other: &Self) -> Result<()> {
-        todo!()
-    }
-
-    fn totals(&self) -> Result<models::ReportTotals> {
-        todo!()
-    }
-}
-
-impl ReportBuilder<TestReport> for TestReport {
-    fn insert_file(&mut self, path: &str) -> Result<models::SourceFile> {
-        let file = models::SourceFile::new(path);
-        self.files.push(file.clone());
-        Ok(file)
-    }
-
-    fn insert_raw_upload(
-        &mut self,
-        mut upload_details: models::RawUpload,
-    ) -> Result<models::RawUpload> {
-        upload_details.id = self.uploads.len() as i64;
-        self.uploads.push(upload_details.clone());
-        Ok(upload_details)
-    }
-
-    fn insert_context(&mut self, _name: &str) -> Result<models::Context> {
-        todo!()
-    }
-
-    fn insert_coverage_sample(
-        &mut self,
-        _sample: models::CoverageSample,
-    ) -> Result<models::CoverageSample> {
-        todo!()
-    }
-
-    fn multi_insert_coverage_sample(
-        &mut self,
-        _samples: Vec<&mut models::CoverageSample>,
-    ) -> Result<()> {
-        todo!()
-    }
-
-    fn insert_branches_data(
-        &mut self,
-        _branch: models::BranchesData,
-    ) -> Result<models::BranchesData> {
-        todo!()
-    }
-
-    fn multi_insert_branches_data(
-        &mut self,
-        _branches: Vec<&mut models::BranchesData>,
-    ) -> Result<()> {
-        todo!()
-    }
-
-    fn insert_method_data(&mut self, _method: models::MethodData) -> Result<models::MethodData> {
-        todo!()
-    }
-
-    fn multi_insert_method_data(&mut self, _methods: Vec<&mut models::MethodData>) -> Result<()> {
-        todo!()
-    }
-
-    fn insert_span_data(&mut self, _span: models::SpanData) -> Result<models::SpanData> {
-        todo!()
-    }
-
-    fn multi_insert_span_data(&mut self, _spans: Vec<&mut models::SpanData>) -> Result<()> {
-        todo!()
-    }
-
-    fn associate_context(&mut self, _assoc: models::ContextAssoc) -> Result<models::ContextAssoc> {
-        todo!()
-    }
-
-    fn multi_associate_context(&mut self, _assocs: Vec<&mut models::ContextAssoc>) -> Result<()> {
-        todo!()
-    }
-
-    fn build(self) -> Result<Self> {
-        Ok(self)
-    }
+fn run_parsing(input: &[u8]) {
+    let mut report_builder = TestReportBuilder::default();
+    report_json::parse_report_json(input, &mut report_builder).unwrap();
 }
diff --git a/core/src/error.rs b/core/src/error.rs
index 5a551e4..5f793bb 100644
--- a/core/src/error.rs
+++ b/core/src/error.rs
@@ -17,6 +17,9 @@ pub enum CodecovError {
     #[error("parser error: '{0}'")]
     ParserError(winnow::error::ContextError),
 
+    #[error("parser error: '{0}'")]
+    Json(#[from] serde_json::Error),
+
     #[error("io error: '{0}'")]
     IOError(#[from] std::io::Error),
 
diff --git a/core/src/parsers/pyreport/mod.rs b/core/src/parsers/pyreport/mod.rs
index bf015b3..fb3a9a0 100644
--- a/core/src/parsers/pyreport/mod.rs
+++ b/core/src/parsers/pyreport/mod.rs
@@ -3,7 +3,6 @@ use std::{fs::File, path::PathBuf};
 use memmap2::Mmap;
 use winnow::Parser;
 
-use super::common::ReportBuilderCtx;
 use crate::{
     error::{CodecovError, Result},
     report::{ReportBuilder, SqliteReport, SqliteReportBuilder, SqliteReportBuilderTx},
@@ -72,27 +71,19 @@ fn parse_pyreport_with_builder(
     // and prevent us from consuming `report_builder` to actually build a
     // `SqliteReport`.
     {
-        let report_builder_tx = report_builder.transaction()?;
+        let mut report_builder_tx = report_builder.transaction()?;
 
         // Memory-map the input file so we don't have to read the whole thing into RAM
         let mmap_handle = unsafe { Mmap::map(report_json_file)? };
-        let buf = unsafe { std::str::from_utf8_unchecked(&mmap_handle[..]) };
-        let mut stream =
-            report_json::ReportOutputStream::<&str, SqliteReport, SqliteReportBuilderTx> {
-                input: buf,
-                state: ReportBuilderCtx::new(report_builder_tx),
-            };
-        let (files, sessions) = report_json::parse_report_json
-            .parse_next(&mut stream)
-            .map_err(|e| e.into_inner().unwrap_or_default())
-            .map_err(CodecovError::ParserError)?;
+        let report_json::ParsedReportJson { files, sessions } =
+            report_json::parse_report_json(&mmap_handle, &mut report_builder_tx)?;
 
         // Replace our mmap handle so the first one can be unmapped
         let mmap_handle = unsafe { Mmap::map(chunks_file)? };
         let buf = unsafe { std::str::from_utf8_unchecked(&mmap_handle[..]) };
 
         // Move `report_builder` from the report JSON's parse context to this one
-        let chunks_ctx = chunks::ParseCtx::new(stream.state.report_builder, files, sessions);
+        let chunks_ctx = chunks::ParseCtx::new(report_builder_tx, files, sessions);
         let mut chunks_stream =
             chunks::ReportOutputStream::<&str, SqliteReport, SqliteReportBuilderTx> {
                 input: buf,
diff --git a/core/src/parsers/pyreport/report_json.rs b/core/src/parsers/pyreport/report_json.rs
index 27ed265..4a03070 100644
--- a/core/src/parsers/pyreport/report_json.rs
+++ b/core/src/parsers/pyreport/report_json.rs
@@ -1,827 +1,411 @@
-use std::collections::HashMap;
-
-use winnow::{
-    combinator::{cut_err, delimited, separated},
-    error::{ContextError, ErrMode, ErrorKind, FromExternalError},
-    PResult, Parser, Stateful,
+//! Parses a "report JSON" object which contains information about the files and
+//! "sessions" in a report. A session is more-or-less a single upload, and they
+//! are represented in our schema as a "context" which may be tied to a line.
+//!
+//! At a high level, the format looks something like:
+//! ```json
+//! {
+//!     "files": {
+//!         "filename": ReportFileSummary,
+//!         ...
+//!     },
+//!     "sessions": {
+//!         "session index": Session,
+//!         ...
+//!     }
+//! }
+//! ```
+//!
+//! The types can only be completely understood by reading their implementations
+//! in our Python code:
+//! - [`ReportFileSummary`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/reports/types.py#L361-L367)
+//! - [`Session`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/utils/sessions.py#L111-L128O)
+//!
+//! ## Files
+//!
+//! The `files` are key-value pairs where the key is a filename and the value is
+//! a `ReportFileSummary`. We primarily care about the chunks_index field and
+//! can compute the totals on-demand later.
+//!
+//! The format is messy and can only be fully understood by reading the Python
+//! source in our `shared` repository's
+//! [`shared/reports/resources.py`](https://github.com/codecov/shared/tree/main/shared/reports/resources.py) and
+//! [`shared/reports/types.py`](https://github.com/codecov/shared/blob/main/shared/reports/types.py).
+//! Nevertheless, the common case will be described here.
+//!
+//! At a high level, the input looks like:
+//! ```notrust
+//! "filename.rs": [
+//!     chunks_index: int,
+//!     file_totals: ReportTotals,
+//!     session_totals: null, // (formerly SessionTotalsArray, but ignored now)
+//!     diff_totals: ReportTotals (probably),
+//! ]
+//! ```
+//! with `int` being normal and the other types being from our Python code:
+//! - [`ReportFileSummary`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/reports/types.py#L361-L367)
+//! - [`ReportTotals`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/reports/types.py#L30-L45)
+//! - [`SessionTotalsArray`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/reports/types.py#L263-L272)
+//!
+//! `SessionTotalsArray` no longer exists, but older reports may still have it.
+//! It's a dict mapping a session ID to a `SessionTotals` (which is just a type
+//! alias for `ReportTotals` and a "meta" key with extra information including
+//! how many sessions there are in the map, and old reports may still have it.
+//! There's an even older format which is just a flat list. In any case, we
+//! ignore the field now.
+//!
+//! Input example:
+//! ```json
+//!    "src/report.rs": [
+//!      0,             # index in chunks
+//!      [              # file totals
+//!        0,           # > files
+//!        45,          # > lines
+//!        45,          # > hits
+//!        0,           # > misses
+//!        0,           # > partials
+//!        "100",       # > coverage %
+//!        0,           # > branches
+//!        0,           # > methods
+//!        0,           # > messages
+//!        0,           # > sessions
+//!        0,           # > complexity
+//!        0,           # > complexity_total
+//!        0            # > diff
+//!      ],
+//!      {              # session totals (usually null nowadays)
+//!        "0": [       # > key: session id
+//!          0,         # > files
+//!          45,        # > lines
+//!          45,        # > hits
+//!          0,         # > misses
+//!          0,         # > partials
+//!          "100"      # > coverage
+//!        ],
+//!        "meta": {
+//!          "session_count": 1
+//!        }
+//!      },
+//!      null           # diff totals
+//!    ],
+//! ```
+//!
+//! ## Sessions
+//!
+//! The `sessions` are key-value pairs where the key is a session index and the
+//! value is an encoded `Session`. A session essentially just an upload. We can
+//! compute session-specific coverage totals on-demand later and only care about
+//! other details for now.
+//!
+//! The format is messy and can only be fully understood by reading the Python
+//! source in our `shared` repository's
+//! [`shared/reports/resources.py`](https://github.com/codecov/shared/tree/main/shared/reports/resources.py),
+//! [`shared/reports/types.py`](https://github.com/codecov/shared/blob/main/shared/reports/types.py),
+//! and [`shared/utils/sessions.py`](https://github.com/codecov/shared/blob/main/shared/utils/sessions.py).
+//! Nevertheless, the common case will be described here.
+//!
+//! At a high level, the input looks like:
+//! ```notrust
+//! "session index": [
+//!     "t": ReportTotals,          # Coverage totals for this report
+//!     "d": int,                   # time
+//!     "a": str,                   # archive (URL of raw upload)
+//!     "f": list[str],             # flags
+//!     "c": str,                   # provider
+//!     "n": str,                   # build
+//!     "N": str,                   # name
+//!     "j": str,                   # CI job name
+//!     "u": str,                   # CI job run URL
+//!     "p": str,                   # state
+//!     "e": str,                   # env
+//!     "st": str,                  # session type
+//!     "se": dict,                 # session extras
+//! ]
+//! ```
+//! with most types being normal and others coming from our Python code:
+//! - [`ReportTotals`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/reports/types.py#L30-L45).
+//! - [`Session`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/utils/sessions.py#L111-L128O)
+//!
+//! Input example:
+//! ```notrust
+//!    "0": {                   # session index
+//!      "t": [                 # session totals
+//!        3,                   # files in session
+//!        94,                  # lines
+//!        52,                  # hits
+//!        42,                  # misses
+//!        0,                   # partials
+//!        "55.31915",          # coverage %
+//!        0,                   # branches
+//!        0,                   # methods
+//!        0,                   # messages
+//!        0,                   # sessions
+//!        0,                   # complexity
+//!        0,                   # complexity_total
+//!        0                    # diff
+//!      ],
+//!      "d": 1704827412,       # timestamp
+//!                             # archive (raw upload URL)
+//!      "a": "v4/raw/2024-01-09/<cut>/<cut>/<cut>/340c0c0b-a955-46a0-9de9-3a9b5f2e81e2.txt",
+//!      "f": [],               # flags
+//!      "c": null,             # provider
+//!      "n": null,             # build
+//!      "N": null,             # name
+//!      "j": "codecov-rs CI",  # CI job name
+//!                             # CI job run URL
+//!      "u": "https://github.com/codecov/codecov-rs/actions/runs/7465738121",
+//!      "p": null,             # state
+//!      "e": null,             # env
+//!      "st": "uploaded",      # session type
+//!      "se": {}               # session extras
+//!    }
+//! ```
+
+use std::collections::{BTreeMap, HashMap};
+
+use serde::{de::IgnoredAny, Deserialize};
+use serde_json::Value;
+
+use crate::{
+    error::CodecovError,
+    report::{models, Report, ReportBuilder},
 };
 
-use super::super::{
-    common::{
-        winnow::{ws, StrStream},
-        ReportBuilderCtx,
-    },
-    json::{parse_kv, specific_key, JsonVal},
-};
-use crate::report::{models, Report, ReportBuilder};
-
-pub type ReportOutputStream<S, R, B> = Stateful<S, ReportBuilderCtx<R, B>>;
-
-/// Parses a key-value pair where the key is a filename and the value is a
-/// `ReportFileSummary`. We primarily care about the chunks_index field and can
-/// compute the totals on-demand later.
-///
-/// The format is messy and can only be fully understood by reading the Python
-/// source in our `shared` repository's
-/// [`shared/reports/resources.py`](https://github.com/codecov/shared/tree/main/shared/reports/resources.py) and
-/// [`shared/reports/types.py`](https://github.com/codecov/shared/blob/main/shared/reports/types.py).
-/// Nevertheless, the common case will be described here.
-///
-/// At a high level, the input looks like:
-/// ```notrust
-/// "filename.rs": [
-///     chunks_index: int,
-///     file_totals: ReportTotals,
-///     session_totals: null, // (formerly SessionTotalsArray, but ignored now)
-///     diff_totals: ReportTotals (probably),
-/// ]
-/// ```
-/// with `int` being normal and the other types being from our Python code:
-/// - [`ReportFileSummary`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/reports/types.py#L361-L367)
-/// - [`ReportTotals`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/reports/types.py#L30-L45)
-/// - [`SessionTotalsArray`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/reports/types.py#L263-L272)
-///
-/// `SessionTotalsArray` no longer exists, but older reports may still have it.
-/// It's a dict mapping a session ID to a `SessionTotals` (which is just a type
-/// alias for `ReportTotals` and a "meta" key with extra information including
-/// how many sessions there are in the map, and old reports may still have it.
-/// There's an even older format which is just a flat list. In any case, we
-/// ignore the field now.
-///
-/// Input example:
-/// ```notrust
-///    "src/report.rs": [
-///      0,             # index in chunks
-///      [              # file totals
-///        0,           # > files
-///        45,          # > lines
-///        45,          # > hits
-///        0,           # > misses
-///        0,           # > partials
-///        "100",       # > coverage %
-///        0,           # > branches
-///        0,           # > methods
-///        0,           # > messages
-///        0,           # > sessions
-///        0,           # > complexity
-///        0,           # > complexity_total
-///        0            # > diff
-///      ],
-///      {              # session totals (usually null nowadays)
-///        "0": [       # > key: session id
-///          0,         # > files
-///          45,        # > lines
-///          45,        # > hits
-///          0,         # > misses
-///          0,         # > partials
-///          "100"      # > coverage
-///        ],
-///        "meta": {
-///          "session_count": 1
-///        }
-///      },
-///      null           # diff totals
-///    ],
-/// ```
-pub fn report_file<S: StrStream, R: Report, B: ReportBuilder<R>>(
-    buf: &mut ReportOutputStream<S, R, B>,
-) -> PResult<(usize, i64)> {
-    let (filename, file_summary) = delimited(ws, parse_kv, ws).parse_next(buf)?;
-
-    let Some(chunks_index) = file_summary
-        .get(0)
-        // winnow's f64 parser handles scientific notation and such OOTB so we use it for all
-        // numbers. This is expected to be u64
-        .and_then(JsonVal::as_f64)
-        .map(|f| f as u64)
-    else {
-        return Err(ErrMode::Cut(ContextError::new()));
-    };
-
-    let file = buf
-        .state
-        .report_builder
-        .insert_file(&filename)
-        .map_err(|e| ErrMode::from_external_error(buf, ErrorKind::Fail, e))?;
-
-    Ok((chunks_index as usize, file.id))
+#[derive(Debug, Deserialize)]
+struct ReportJson {
+    // NOTE: these two are `BTreeMap` only to have stable iteration order in tests
+    files: BTreeMap<String, File>,
+    sessions: BTreeMap<usize, Session>,
 }
 
-/// Parses a key-value pair where the key is a session index and the value is an
-/// encoded `Session`. A session essentially just an upload. We can compute
-/// session-specific coverage totals on-demand later and only care about other
-/// details for now.
-///
-/// The format is messy and can only be fully understood by reading the Python
-/// source in our `shared` repository's
-/// [`shared/reports/resources.py`](https://github.com/codecov/shared/tree/main/shared/reports/resources.py),
-/// [`shared/reports/types.py`](https://github.com/codecov/shared/blob/main/shared/reports/types.py),
-/// and [`shared/utils/sessions.py`](https://github.com/codecov/shared/blob/main/shared/utils/sessions.py).
-/// Nevertheless, the common case will be described here.
-///
-/// At a high level, the input looks like:
-/// ```notrust
-/// "session index": [
-///     "t": ReportTotals,          # Coverage totals for this report
-///     "d": int,                   # time
-///     "a": str,                   # archive (URL of raw upload)
-///     "f": list[str],             # flags
-///     "c": str,                   # provider
-///     "n": str,                   # build
-///     "N": str,                   # name
-///     "j": str,                   # CI job name
-///     "u": str,                   # CI job run URL
-///     "p": str,                   # state
-///     "e": str,                   # env
-///     "st": str,                  # session type
-///     "se": dict,                 # session extras
-/// ]
-/// ```
-/// with most types being normal and others coming from our Python code:
-/// - [`ReportTotals`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/reports/types.py#L30-L45).
-/// - [`Session`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/utils/sessions.py#L111-L128O)
-///
-/// Input example:
-/// ```notrust
-///    "0": {                   # session index
-///      "t": [                 # session totals
-///        3,                   # files in session
-///        94,                  # lines
-///        52,                  # hits
-///        42,                  # misses
-///        0,                   # partials
-///        "55.31915",          # coverage %
-///        0,                   # branches
-///        0,                   # methods
-///        0,                   # messages
-///        0,                   # sessions
-///        0,                   # complexity
-///        0,                   # complexity_total
-///        0                    # diff
-///      ],
-///      "d": 1704827412,       # timestamp
-///                             # archive (raw upload URL)
-///      "a": "v4/raw/2024-01-09/<cut>/<cut>/<cut>/340c0c0b-a955-46a0-9de9-3a9b5f2e81e2.txt",
-///      "f": [],               # flags
-///      "c": null,             # provider
-///      "n": null,             # build
-///      "N": null,             # name
-///      "j": "codecov-rs CI",  # CI job name
-///                             # CI job run URL
-///      "u": "https://github.com/codecov/codecov-rs/actions/runs/7465738121",
-///      "p": null,             # state
-///      "e": null,             # env
-///      "st": "uploaded",      # session type
-///      "se": {}               # session extras
-///    }
-/// ```
-pub fn report_session<S: StrStream, R: Report, B: ReportBuilder<R>>(
-    buf: &mut ReportOutputStream<S, R, B>,
-) -> PResult<(usize, i64)> {
-    let (session_index, encoded_session) = delimited(ws, parse_kv, ws).parse_next(buf)?;
-    let Ok(session_index) = session_index.parse::<usize>() else {
-        return Err(ErrMode::Cut(ContextError::new()));
-    };
-    let JsonVal::Object(values) = encoded_session else {
-        return Err(ErrMode::Cut(ContextError::new()));
-    };
-
-    let raw_upload = models::RawUpload {
-        timestamp: values.get("d").and_then(JsonVal::as_f64).map(|f| f as i64),
-        raw_upload_url: values.get("a").and_then(JsonVal::as_str).map(str::to_owned),
-        flags: values.get("f").cloned(),
-        provider: values.get("c").and_then(JsonVal::as_str).map(str::to_owned),
-        build: values.get("n").and_then(JsonVal::as_str).map(str::to_owned),
-        name: values.get("N").and_then(JsonVal::as_str).map(str::to_owned),
-        job_name: values.get("j").and_then(JsonVal::as_str).map(str::to_owned),
-        ci_run_url: values.get("u").and_then(JsonVal::as_str).map(str::to_owned),
-        state: values.get("p").and_then(JsonVal::as_str).map(str::to_owned),
-        env: values.get("e").and_then(JsonVal::as_str).map(str::to_owned),
-        session_type: values
-            .get("st")
-            .and_then(JsonVal::as_str)
-            .map(str::to_owned),
-        session_extras: values.get("se").cloned(),
-        ..Default::default()
-    };
-
-    let raw_upload = buf
-        .state
-        .report_builder
-        .insert_raw_upload(raw_upload)
-        .map_err(|e| ErrMode::from_external_error(buf, ErrorKind::Fail, e))?;
-
-    Ok((session_index, raw_upload.id))
+#[derive(Debug, Deserialize)]
+// this really is:
+// - index in chunks
+// - file totals
+// - session totals
+// - diff totals
+struct File(usize, IgnoredAny, IgnoredAny, IgnoredAny);
+
+#[derive(Debug, Deserialize)]
+struct Session {
+    #[serde(rename = "d")]
+    timestamp: Option<i64>,
+    #[serde(rename = "a")]
+    raw_upload_url: Option<String>,
+    #[serde(rename = "f")]
+    flags: Option<Value>,
+    #[serde(rename = "c")]
+    provider: Option<String>,
+    #[serde(rename = "n")]
+    build: Option<String>,
+    #[serde(rename = "N")]
+    name: Option<String>,
+    #[serde(rename = "j")]
+    job_name: Option<String>,
+    #[serde(rename = "u")]
+    ci_run_url: Option<String>,
+    #[serde(rename = "p")]
+    state: Option<String>,
+    #[serde(rename = "e")]
+    env: Option<String>,
+    #[serde(rename = "st")]
+    session_type: Option<String>,
+    #[serde(rename = "se")]
+    session_extras: Option<Value>,
 }
 
-/// Parses the JSON object that corresponds to the "files" key. Because there
-/// could be many files, we parse each key/value pair one at a time.
-pub fn report_files_dict<S: StrStream, R: Report, B: ReportBuilder<R>>(
-    buf: &mut ReportOutputStream<S, R, B>,
-) -> PResult<HashMap<usize, i64>> {
-    cut_err(delimited(
-        (ws, '{', ws),
-        separated(0.., report_file, (ws, ',', ws)),
-        (ws, '}', ws),
-    ))
-    .parse_next(buf)
+#[derive(Debug)]
+pub struct ParsedReportJson {
+    pub files: HashMap<usize, i64>,
+    pub sessions: HashMap<usize, i64>,
 }
 
-/// Parses the JSON object that corresponds to the "sessions" key. Because there
-/// could be many sessions, we parse each key/value pair one at a time.
-pub fn report_sessions_dict<S: StrStream, R: Report, B: ReportBuilder<R>>(
-    buf: &mut ReportOutputStream<S, R, B>,
-) -> PResult<HashMap<usize, i64>> {
-    cut_err(delimited(
-        (ws, '{', ws),
-        separated(0.., report_session, (ws, ',', ws)),
-        (ws, '}', ws),
-    ))
-    .parse_next(buf)
-}
+pub fn parse_report_json<B, R>(
+    input: &[u8],
+    builder: &mut B,
+) -> Result<ParsedReportJson, CodecovError>
+where
+    B: ReportBuilder<R>,
+    R: Report,
+{
+    let report: ReportJson = serde_json::from_slice(input)?;
+
+    let mut files = HashMap::with_capacity(report.files.len());
+    for (filename, file) in report.files {
+        let chunk_index = file.0;
+
+        let file = builder.insert_file(&filename)?;
+        files.insert(chunk_index, file.id);
+    }
 
-/// Parses a "report JSON" object which contains information about the files and
-/// "sessions" in a report. A session is more-or-less a single upload, and they
-/// are represented in our schema as a "context" which may be tied to a line.
-///
-/// At a high level, the format looks something like:
-/// ```notrust
-/// {
-///     "files": {
-///         "filename": ReportFileSummary,
-///         ...
-///     },
-///     "sessions": {
-///         "session index": Session,
-///         ...
-///     }
-/// }
-/// ```
-///
-/// The types can only be completely understood by reading their implementations
-/// in our Python code:
-/// - [`ReportFileSummary`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/reports/types.py#L361-L367)
-/// - [`Session`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/utils/sessions.py#L111-L128O)
-pub fn parse_report_json<S: StrStream, R: Report, B: ReportBuilder<R>>(
-    buf: &mut ReportOutputStream<S, R, B>,
-) -> PResult<(HashMap<usize, i64>, HashMap<usize, i64>)> {
-    let parse_files = delimited(specific_key("files"), report_files_dict, (ws, ',', ws));
-    let parse_sessions = delimited(specific_key("sessions"), report_sessions_dict, ws);
-    cut_err(delimited(
-        (ws, '{', ws),
-        (parse_files, parse_sessions),
-        (ws, '}', ws),
-    ))
-    .parse_next(buf)
+    let mut sessions = HashMap::with_capacity(report.sessions.len());
+    for (session_index, session) in report.sessions {
+        let raw_upload = models::RawUpload {
+            id: 0,
+            timestamp: session.timestamp,
+            raw_upload_url: session.raw_upload_url,
+            flags: session.flags,
+            provider: session.provider,
+            build: session.build,
+            name: session.name,
+            job_name: session.job_name,
+            ci_run_url: session.ci_run_url,
+            state: session.state,
+            env: session.env,
+            session_type: session.session_type,
+            session_extras: session.session_extras,
+        };
+
+        let raw_upload = builder.insert_raw_upload(raw_upload)?;
+
+        sessions.insert(session_index, raw_upload.id);
+    }
+
+    Ok(ParsedReportJson { files, sessions })
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::report::test::{TestReport, TestReportBuilder};
+    use crate::report::test::TestReportBuilder;
+
+    #[test]
+    fn test_report_json_simple_valid_case() {
+        let input = br#"{"files": {"src/report.rs": [0, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}}}"#;
 
-    type TestStream<'a> = ReportOutputStream<&'a str, TestReport, TestReportBuilder>;
+        let mut report_builder = TestReportBuilder::default();
+        let _parsed = parse_report_json(input, &mut report_builder).unwrap();
 
-    struct Ctx {
-        parse_ctx: ReportBuilderCtx<TestReport, TestReportBuilder>,
+        let report = report_builder.build().unwrap();
+        assert_eq!(report.files, &[models::SourceFile::new("src/report.rs")]);
+        assert_eq!(
+            report.uploads,
+            &[models::RawUpload {
+                id: 0,
+                job_name: Some("codecov-rs CI".into()),
+                ..Default::default()
+            }]
+        );
     }
 
-    fn hash_id(path: &str) -> i64 {
-        seahash::hash(path.as_bytes()) as i64
+    #[test]
+    fn test_report_json_two_files_two_sessions() {
+        let input = br#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#;
+
+        let mut report_builder = TestReportBuilder::default();
+        let _parsed = parse_report_json(input, &mut report_builder).unwrap();
+
+        let report = report_builder.build().unwrap();
+        assert_eq!(
+            report.files,
+            &[
+                models::SourceFile::new("src/report.rs"),
+                models::SourceFile::new("src/report/models.rs")
+            ]
+        );
+        assert_eq!(
+            report.uploads,
+            &[
+                models::RawUpload {
+                    id: 0,
+                    job_name: Some("codecov-rs CI".into()),
+                    ..Default::default()
+                },
+                models::RawUpload {
+                    id: 1,
+                    job_name: Some("codecov-rs CI 2".into()),
+                    ..Default::default()
+                },
+            ]
+        );
     }
 
-    fn setup() -> Ctx {
-        let report_builder = TestReportBuilder::default();
-        let parse_ctx = ReportBuilderCtx::new(report_builder);
-        Ctx { parse_ctx }
+    #[test]
+    fn test_report_json_empty_files() {
+        let input = br#"{"files": {}, "sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#;
+
+        let mut report_builder = TestReportBuilder::default();
+        let _parsed = parse_report_json(input, &mut report_builder).unwrap();
+
+        let report = report_builder.build().unwrap();
+        assert_eq!(report.files, &[]);
+        assert_eq!(
+            report.uploads,
+            &[
+                models::RawUpload {
+                    id: 0,
+                    job_name: Some("codecov-rs CI".into()),
+                    ..Default::default()
+                },
+                models::RawUpload {
+                    id: 1,
+                    job_name: Some("codecov-rs CI 2".into()),
+                    ..Default::default()
+                },
+            ]
+        );
     }
 
-    mod report_json {
-        use serde_json::json;
-
-        use super::*;
-        use crate::parsers::json::JsonMap;
-
-        fn test_report_file(path: &str, input: &str) -> PResult<(usize, i64)> {
-            let ctx = setup();
-            let mut buf = TestStream {
-                input,
-                state: ctx.parse_ctx,
-            };
-
-            let res = report_file.parse_next(&mut buf);
-            if res.is_ok() {
-                assert_eq!(
-                    buf.state.report_builder.build().unwrap().files,
-                    &[models::SourceFile::new(path)]
-                );
-            }
-            res
-        }
-
-        #[test]
-        fn test_report_file_simple_valid_case() {
-            assert_eq!(
-                test_report_file("src/report.rs", "\"src/report.rs\": [0, [], {}, null]",),
-                Ok((0, hash_id("src/report.rs")))
-            );
-        }
-
-        #[test]
-        fn test_report_file_malformed_key() {
-            assert_eq!(
-                test_report_file("src/report.rs", "src/report.rs\": [0, [], {}, null]",),
-                Err(ErrMode::Backtrack(ContextError::new()))
-            );
-        }
-
-        #[test]
-        fn test_report_key_wrong_type() {
-            assert_eq!(
-                test_report_file("src/report.rs", "5: [0, [], {}, null]",),
-                Err(ErrMode::Backtrack(ContextError::new()))
-            );
-        }
-
-        #[test]
-        fn test_report_file_chunks_index_wrong_type() {
-            assert_eq!(
-                test_report_file("src/report.rs", "\"src/report.rs\": [\"0\", [], {}, null]",),
-                Err(ErrMode::Cut(ContextError::new()))
-            );
-        }
-
-        #[test]
-        fn test_report_file_file_summary_wrong_type() {
-            assert_eq!(
-                test_report_file(
-                    "src/report.rs",
-                    "\"src/report.rs\": {\"chunks_index\": 0, \"totals\": []}",
-                ),
-                Err(ErrMode::Cut(ContextError::new()))
-            );
-        }
-
-        #[test]
-        fn test_report_file_file_summary_empty() {
-            assert_eq!(
-                test_report_file("src/report.rs", "\"src/report.rs\": []",),
-                Err(ErrMode::Cut(ContextError::new()))
-            );
-        }
-
-        fn test_report_session(job_name: Option<&str>, input: &str) -> PResult<(usize, i64)> {
-            let ctx = setup();
-            let mut buf = TestStream {
-                input,
-                state: ctx.parse_ctx,
-            };
-
-            let res = report_session.parse_next(&mut buf);
-            if res.is_ok() {
-                let report = buf.state.report_builder.build().unwrap();
-                assert_eq!(
-                    report.uploads,
-                    &[models::RawUpload {
-                        id: 0,
-                        job_name: job_name.map(str::to_owned),
-                        ..Default::default()
-                    }]
-                );
-            }
-            res
-        }
-
-        #[test]
-        fn test_report_session_simple_valid_case() {
-            assert_eq!(
-                test_report_session(Some("codecov-rs CI"), "\"0\": {\"j\": \"codecov-rs CI\"}",),
-                Ok((0, 0))
-            );
-        }
-
-        #[test]
-        fn test_report_session_fully_populated() {
-            let ctx = setup();
-            let timestamp = 1704827412;
-            let job_name = "codecov-rs CI";
-            let ci_run_url = "https://github.com/codecov/codecov-rs/actions/runs/7465738121";
-            let input = "\"0\": {
-                \"t\": [3, 94, 52, 42, 3, \"55.31915\", 2, 2, 0, 0, 3, 5, 0],
-                \"d\": 1704827412,
-                \"a\": \"v4/raw/2024-01-09/<cut>/<cut>/<cut>/340c0c0b-a955-46a0-9de9-3a9b5f2e81e2.txt\",
-                \"f\": [\"flag\"],
-                \"c\": \"github-actions\",
-                \"n\": \"build\",
-                \"N\": \"name\",
-                \"j\": \"codecov-rs CI\",
-                \"u\": \"https://github.com/codecov/codecov-rs/actions/runs/7465738121\",
-                \"p\": \"state\",
-                \"e\": \"env\",
-                \"st\": \"uploaded\",
-                \"se\": {}
-            }";
-            let mut buf = TestStream {
-                input,
-                state: ctx.parse_ctx,
-            };
-
-            let inserted_upload = models::RawUpload {
-                id: 0,
-                timestamp: Some(timestamp),
-                raw_upload_url: Some(
-                    "v4/raw/2024-01-09/<cut>/<cut>/<cut>/340c0c0b-a955-46a0-9de9-3a9b5f2e81e2.txt"
-                        .to_string(),
-                ),
-                flags: Some(json!(["flag"])),
-                provider: Some("github-actions".to_string()),
-                build: Some("build".to_string()),
-                name: Some("name".to_string()),
-                job_name: Some(job_name.to_string()),
-                ci_run_url: Some(ci_run_url.to_string()),
-                state: Some("state".to_string()),
-                env: Some("env".to_string()),
-                session_type: Some("uploaded".to_string()),
-                session_extras: Some(JsonVal::Object(JsonMap::new())),
-            };
-
-            assert_eq!(report_session.parse_next(&mut buf), Ok((0, 0)));
-
-            let report = buf.state.report_builder.build().unwrap();
-            assert_eq!(report.uploads, &[inserted_upload]);
-        }
-
-        #[test]
-        fn test_report_session_malformed_session_index() {
-            assert_eq!(
-                test_report_session(Some("codecov-rs CI"), "'0\": {\"j\": \"codecov-rs CI\"}",),
-                Err(ErrMode::Backtrack(ContextError::new()))
-            );
-        }
-
-        #[test]
-        fn test_report_session_session_index_not_numeric() {
-            assert_eq!(
-                test_report_session(Some("codecov-rs CI"), "\"str\": {\"j\": \"codecov-rs CI\"}",),
-                Err(ErrMode::Cut(ContextError::new()))
-            );
-        }
-
-        #[test]
-        fn test_report_session_session_index_float() {
-            assert_eq!(
-                test_report_session(
-                    Some("codecov-rs CI"),
-                    "\"3.34\": {\"j\": \"codecov-rs CI\"}",
-                ),
-                Err(ErrMode::Cut(ContextError::new()))
-            );
-        }
-
-        #[test]
-        fn test_report_session_missing_job_key() {
-            assert_eq!(
-                test_report_session(None, "\"0\": {\"x\": \"codecov-rs CI\"}",),
-                Ok((0, 0))
-            );
-        }
-
-        #[test]
-        fn test_report_session_job_key_wrong_type() {
-            assert_eq!(test_report_session(None, "\"0\": {\"j\": []}",), Ok((0, 0)));
-        }
-
-        #[test]
-        fn test_report_session_encoded_session_wrong_type() {
-            assert_eq!(
-                test_report_session(Some("codecov-rs CI"), "\"0\": [\"j\", []]",),
-                Err(ErrMode::Cut(ContextError::new()))
-            );
-        }
-
-        fn test_report_files_dict(paths: &[&str], input: &str) -> PResult<HashMap<usize, i64>> {
-            let ctx = setup();
-            let mut buf = TestStream {
-                input,
-                state: ctx.parse_ctx,
-            };
-
-            let res = report_files_dict.parse_next(&mut buf);
-            if res.is_ok() {
-                let report = buf.state.report_builder.build().unwrap();
-
-                let expected_files: Vec<_> = paths
-                    .iter()
-                    .map(|path| models::SourceFile::new(path))
-                    .collect();
-                assert_eq!(report.files, expected_files);
-            }
-            res
-        }
-
-        #[test]
-        fn test_report_files_dict_single_valid_file() {
-            assert_eq!(
-                test_report_files_dict(
-                    &["src/report.rs"],
-                    "{\"src/report.rs\": [0, [], {}, null]}",
-                ),
-                Ok(HashMap::from([(0, hash_id("src/report.rs"))]))
-            );
-        }
-
-        #[test]
-        fn test_report_files_dict_multiple_valid_files() {
-            assert_eq!(test_report_files_dict(
-                &["src/report.rs", "src/report/models.rs"],
-                "{\"src/report.rs\": [0, [], {}, null], \"src/report/models.rs\": [1, [], {}, null]}",
-            ), Ok(HashMap::from([(0, hash_id("src/report.rs")), (1, hash_id("src/report/models.rs"))])));
-        }
-
-        #[test]
-        fn test_report_files_dict_multiple_valid_files_trailing_comma() {
-            assert_eq!(test_report_files_dict(
-                &["src/report.rs", "src/report/models.rs"],
-                "{\"src/report.rs\": [0, [], {}, null], \"src/report/models.rs\": [1, [], {}, null],}",
-            ), Err(ErrMode::Cut(ContextError::new())));
-        }
-
-        #[test]
-        fn test_report_files_dict_multiple_files_same_index() {
-            // TODO this is how winnow handles accumulating into collections but it's not
-            // the behavior that we want. we want to error
-            assert_eq!(test_report_files_dict(
-                &["src/report.rs", "src/report/models.rs"],
-                "{\"src/report.rs\": [0, [], {}, null], \"src/report/models.rs\": [0, [], {}, null]}",
-            ), Ok(HashMap::from([(0, hash_id("src/report/models.rs"))])));
-        }
-
-        #[test]
-        fn test_report_files_dict_single_invalid_file() {
-            assert_eq!(
-                test_report_files_dict(
-                    &["src/report.rs"],
-                    "{\"src/report.rs\": [null, [], {}, null]}",
-                ),
-                Err(ErrMode::Cut(ContextError::new()))
-            );
-        }
-
-        #[test]
-        fn test_report_files_dict_invalid_file_after_valid_file() {
-            assert_eq!(test_report_files_dict(
-                &["src/report.rs", "src/report/models.rs"],
-                "{\"src/report.rs\": [0, [], {}, null], \"src/report/models.rs\": [null, [], {}, null]}",
-            ), Err(ErrMode::Cut(ContextError::new())));
-        }
-
-        #[test]
-        fn test_report_files_dict_wrong_type() {
-            assert_eq!(test_report_files_dict(
-                &["src/report.rs", "src/report/models.rs"],
-                "[\"src/report.rs\": [0, [], {}, null], \"src/report/models.rs\": [1, [], {}, null]]",
-            ), Err(ErrMode::Cut(ContextError::new())));
-        }
-
-        #[test]
-        fn test_report_files_dict_no_files() {
-            assert_eq!(test_report_files_dict(&[], "{}",), Ok(HashMap::new()));
-        }
-
-        // This helper is for sessions that include "j" but not "d" or "u".
-        // Name-building behavior is tested separately + covered in the
-        // `fully_populated` test case.
-        fn test_report_sessions_dict(
-            jobs: &[Option<&str>],
-            input: &str,
-        ) -> PResult<HashMap<usize, i64>> {
-            let ctx = setup();
-            let mut buf = TestStream {
-                input,
-                state: ctx.parse_ctx,
-            };
-
-            let res = report_sessions_dict.parse_next(&mut buf);
-            if res.is_ok() {
-                let report = buf.state.report_builder.build().unwrap();
-
-                let expected_uploads: Vec<_> = jobs
-                    .iter()
-                    .enumerate()
-                    .map(|(i, name)| models::RawUpload {
-                        id: i as i64,
-                        job_name: name.map(str::to_owned),
-                        ..Default::default()
-                    })
-                    .collect();
-                assert_eq!(report.uploads, expected_uploads);
-            }
-            res
-        }
-
-        #[test]
-        fn test_report_sessions_dict_single_valid_session() {
-            assert_eq!(
-                test_report_sessions_dict(
-                    &[Some("codecov-rs CI")],
-                    "{\"0\": {\"j\": \"codecov-rs CI\"}}",
-                ),
-                Ok(HashMap::from([(0, 0)]))
-            );
-        }
-
-        #[test]
-        fn test_report_sessions_dict_multiple_valid_sessions() {
-            assert_eq!(
-                test_report_sessions_dict(
-                    &[Some("codecov-rs CI"), Some("codecov-rs CI 2")],
-                    "{\"0\": {\"j\": \"codecov-rs CI\"}, \"1\": {\"j\": \"codecov-rs CI 2\"}}",
-                ),
-                Ok(HashMap::from([(0, 0), (1, 1)]))
-            );
-        }
-
-        #[test]
-        fn test_report_sessions_dict_multiple_valid_sessions_trailing_comma() {
-            assert_eq!(
-                test_report_sessions_dict(
-                    &[Some("codecov-rs CI"), Some("codecov-rs CI 2")],
-                    "{\"0\": {\"j\": \"codecov-rs CI\"}, \"1\": {\"j\": \"codecov-rs CI 2\"},}",
-                ),
-                Err(ErrMode::Cut(ContextError::new()))
-            );
-        }
-
-        #[test]
-        fn test_report_sessions_dict_multiple_sessions_same_index() {
-            // TODO this is how winnow handles accumulating into collections but it's not
-            // the behavior that we want. we want to error
-            assert_eq!(
-                test_report_sessions_dict(
-                    &[Some("codecov-rs CI"), Some("codecov-rs CI 2")],
-                    "{\"0\": {\"j\": \"codecov-rs CI\"}, \"0\": {\"j\": \"codecov-rs CI 2\"}}",
-                ),
-                Ok(HashMap::from([(0, 1)]))
-            );
-        }
-
-        #[test]
-        fn test_report_sessions_dict_single_malformed_session() {
-            assert_eq!(
-                test_report_sessions_dict(&[None], "{\"0\": {\"xj\": \"codecov-rs CI\"}}",),
-                Ok(HashMap::from([(0, 0)]))
-            );
-        }
-
-        #[test]
-        fn test_report_sessions_dict_invalid_session_after_valid_session() {
-            assert_eq!(
-                test_report_sessions_dict(
-                    &[Some("codecov-rs CI"), None],
-                    "{\"0\": {\"j\": \"codecov-rs CI\"}, \"1\": {\"xj\": \"codecov-rs CI 2\"}}",
-                ),
-                Ok(HashMap::from([(0, 0), (1, 1)]))
-            );
-        }
-
-        #[test]
-        fn test_report_sessions_dict_wrong_type() {
-            assert_eq!(
-                test_report_sessions_dict(
-                    &[Some("codecov-rs CI")],
-                    "{\"0\": [\"j\": \"codecov-rs CI\"}]",
-                ),
-                Err(ErrMode::Cut(ContextError::new()))
-            );
-        }
-
-        #[test]
-        fn test_report_sessions_dict_no_sessions() {
-            assert_eq!(test_report_sessions_dict(&[], "{}",), Ok(HashMap::new()));
-        }
-
-        fn test_report_json(
-            paths: &[&str],
-            jobs: &[&str],
-            input: &str,
-        ) -> PResult<(HashMap<usize, i64>, HashMap<usize, i64>)> {
-            let ctx = setup();
-            let mut buf = TestStream {
-                input,
-                state: ctx.parse_ctx,
-            };
-
-            let res = parse_report_json.parse_next(&mut buf);
-            if res.is_ok() {
-                let report = buf.state.report_builder.build().unwrap();
-
-                let expected_files: Vec<_> = paths
-                    .iter()
-                    .map(|path| models::SourceFile::new(path))
-                    .collect();
-                assert_eq!(report.files, expected_files);
-
-                let expected_uploads: Vec<_> = jobs
-                    .iter()
-                    .enumerate()
-                    .map(|(i, name)| models::RawUpload {
-                        id: i as i64,
-                        job_name: Some(name.to_string()),
-                        ..Default::default()
-                    })
-                    .collect();
-                assert_eq!(report.uploads, expected_uploads);
-            }
-            res
-        }
-
-        #[test]
-        fn test_report_json_simple_valid_case() {
-            assert_eq!(test_report_json(
-                &["src/report.rs"],
-                &["codecov-rs CI"],
-                "{\"files\": {\"src/report.rs\": [0, {}, [], null]}, \"sessions\": {\"0\": {\"j\": \"codecov-rs CI\"}}}",
-            ), Ok((HashMap::from([(0, hash_id("src/report.rs"))]), HashMap::from([(0, 0)]))))
-        }
-
-        #[test]
-        fn test_report_json_two_files_two_sessions() {
-            assert_eq!(test_report_json(
-                &["src/report.rs", "src/report/models.rs"],
-                &["codecov-rs CI", "codecov-rs CI 2"],
-                "{\"files\": {\"src/report.rs\": [0, {}, [], null], \"src/report/models.rs\": [1, {}, [], null]}, \"sessions\": {\"0\": {\"j\": \"codecov-rs CI\"}, \"1\": {\"j\": \"codecov-rs CI 2\"}}}",
-            ), Ok((HashMap::from([(0, hash_id("src/report.rs")), (1, hash_id("src/report/models.rs"))]), HashMap::from([(0, 0), (1, 1)]))));
-        }
-
-        #[test]
-        fn test_report_json_empty_files() {
-            assert_eq!(test_report_json(
-                &[],
-                &["codecov-rs CI","codecov-rs CI 2"],
-                "{\"files\": {}, \"sessions\": {\"0\": {\"j\": \"codecov-rs CI\"}, \"1\": {\"j\": \"codecov-rs CI 2\"}}}",
-            ), Ok((HashMap::new(), HashMap::from([(0, 0), (1, 1)]))));
-        }
-
-        #[test]
-        fn test_report_json_empty_sessions() {
-            assert_eq!(test_report_json(
-                &["src/report.rs", "src/report/models.rs"],
-                &[],
-                "{\"files\": {\"src/report.rs\": [0, {}, [], null], \"src/report/models.rs\": [1, {}, [], null]}, \"sessions\": {}}",
-            ), Ok((HashMap::from([(0, hash_id("src/report.rs")), (1, hash_id("src/report/models.rs"))]), HashMap::new())));
-        }
-
-        #[test]
-        fn test_report_json_empty() {
-            assert_eq!(
-                test_report_json(&[], &[], "{\"files\": {}, \"sessions\": {}}",),
-                Ok((HashMap::new(), HashMap::new()))
-            );
-        }
-
-        #[test]
-        fn test_report_json_sessions_before_files() {
-            assert_eq!(test_report_json(
-                &["src/report.rs", "src/report/models.rs"],
-                &["codecov-rs CI", "codecov-rs CI 2"],
-                "{\"sessions\": {\"0\": {\"j\": \"codecov-rs CI\"}, \"1\": {\"j\": \"codecov-rs CI 2\"}}, \"files\": {\"src/report.rs\": [0, {}, [], null], \"src/report/models.rs\": [1, {}, [], null]}}",
-            ), Err(ErrMode::Cut(ContextError::new())));
-        }
-
-        #[test]
-        fn test_report_json_missing_files() {
-            assert_eq!(test_report_json(
-                &["src/report.rs", "src/report/models.rs"],
-                &["codecov-rs CI","codecov-rs CI 2"],
-                "{\"sessions\": {\"0\": {\"j\": \"codecov-rs CI\"}, \"1\": {\"j\": \"codecov-rs CI 2\"}}}",
-            ), Err(ErrMode::Cut(ContextError::new())));
-        }
-
-        #[test]
-        fn test_report_json_missing_sessions() {
-            assert_eq!(test_report_json(
-                &["src/report.rs", "src/report/models.rs"],
-                &["codecov-rs CI", "codecov-rs CI 2"],
-                "{\"files\": {\"src/report.rs\": [0, {}, [], null], \"src/report/models.rs\": [1, {}, [], null]}}",
-            ), Err(ErrMode::Cut(ContextError::new())));
-        }
-
-        #[test]
-        fn test_report_json_one_invalid_file() {
-            assert_eq!(test_report_json(
-                &["src/report.rs", "src/report/models.rs"],
-                &["codecov-rs CI", "codecov-rs CI 2"],
-                "{\"files\": {\"src/report.rs\": [0, {}, [], null], \"src/report/models.rs\": [null, {}, [], null]}, \"sessions\": {\"0\": {\"j\": \"codecov-rs CI\"}, \"1\": {\"j\": \"codecov-rs CI 2\"}}}",
-            ), Err(ErrMode::Cut(ContextError::new())));
-        }
-
-        #[test]
-        fn test_report_json_one_invalid_session() {
-            assert_eq!(test_report_json(
-                &["src/report.rs", "src/report/models.rs"],
-                &["codecov-rs CI", "codecov-rs CI 2"],
-                "{\"files\": {\"src/report.rs\": [0, {}, [], null], \"src/report/models.rs\": [1, {}, [], null]}, \"sessions\": {\"0\": {\"j\": \"codecov-rs CI\"}, \"j\": {\"xj\": \"codecov-rs CI 2\"}}}",
-            ), Err(ErrMode::Cut(ContextError::new())));
-        }
+    #[test]
+    fn test_report_json_empty_sessions() {
+        let input = br#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}, "sessions": {}}"#;
+
+        let mut report_builder = TestReportBuilder::default();
+        let _parsed = parse_report_json(input, &mut report_builder).unwrap();
+
+        let report = report_builder.build().unwrap();
+        assert_eq!(
+            report.files,
+            &[
+                models::SourceFile::new("src/report.rs"),
+                models::SourceFile::new("src/report/models.rs")
+            ]
+        );
+        assert_eq!(report.uploads, &[]);
+    }
+
+    #[test]
+    fn test_report_json_empty() {
+        let input = br#"{"files": {}, "sessions": {}}"#;
+
+        let mut report_builder = TestReportBuilder::default();
+        let _parsed = parse_report_json(input, &mut report_builder).unwrap();
+
+        let report = report_builder.build().unwrap();
+        assert_eq!(report.files, &[]);
+        assert_eq!(report.uploads, &[]);
+    }
+
+    #[test]
+    fn test_report_json_missing_files() {
+        let input =
+            br#"{"sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#;
+
+        let mut report_builder = TestReportBuilder::default();
+        parse_report_json(input, &mut report_builder).unwrap_err();
+    }
+
+    #[test]
+    fn test_report_json_missing_sessions() {
+        let input = br#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}}"#;
+
+        let mut report_builder = TestReportBuilder::default();
+        parse_report_json(input, &mut report_builder).unwrap_err();
+    }
+
+    #[test]
+    fn test_report_json_one_invalid_file() {
+        let input = br#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [null, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}, "1": {"j": "codecov-rs CI 2"}}}"#;
+
+        let mut report_builder = TestReportBuilder::default();
+        parse_report_json(input, &mut report_builder).unwrap_err();
+    }
+
+    #[test]
+    fn test_report_json_one_invalid_session() {
+        let input = br#"{"files": {"src/report.rs": [0, {}, [], null], "src/report/models.rs": [1, {}, [], null]}, "sessions": {"0": {"j": "codecov-rs CI"}, "j": {"xj": "codecov-rs CI 2"}}}"#;
+
+        let mut report_builder = TestReportBuilder::default();
+        parse_report_json(input, &mut report_builder).unwrap_err();
     }
 }
diff --git a/core/src/report/mod.rs b/core/src/report/mod.rs
index f502a4d..1d924a6 100644
--- a/core/src/report/mod.rs
+++ b/core/src/report/mod.rs
@@ -6,7 +6,7 @@ pub use sqlite::{SqliteReport, SqliteReportBuilder, SqliteReportBuilderTx};
 #[cfg(feature = "pyreport")]
 pub mod pyreport;
 
-#[cfg(test)]
+#[cfg(any(test, feature = "testing"))]
 pub mod test;
 
 use crate::error::Result;
diff --git a/core/tests/test_pyreport_shim.rs b/core/tests/test_pyreport_shim.rs
index d0b7f0a..58d5730 100644
--- a/core/tests/test_pyreport_shim.rs
+++ b/core/tests/test_pyreport_shim.rs
@@ -6,10 +6,9 @@ use std::{
 };
 
 use codecov_rs::{
-    parsers::{
-        common::ReportBuilderCtx,
-        pyreport,
-        pyreport::{chunks, report_json},
+    parsers::pyreport::{
+        self, chunks,
+        report_json::{self, ParsedReportJson},
     },
     report::{
         models, pyreport::ToPyreport, Report, ReportBuilder, SqliteReport, SqliteReportBuilder,
@@ -22,8 +21,6 @@ use winnow::Parser;
 
 mod common;
 
-type ReportJsonStream<'a> =
-    report_json::ReportOutputStream<&'a str, SqliteReport, SqliteReportBuilder>;
 type ChunksStream<'a> = chunks::ReportOutputStream<&'a str, SqliteReport, SqliteReportBuilder>;
 
 struct Ctx {
@@ -45,15 +42,6 @@ fn test_parse_report_json() {
     let rng_seed = 5;
     let mut rng = StdRng::seed_from_u64(rng_seed);
 
-    let test_ctx = setup();
-    let parse_ctx = ReportBuilderCtx::new(
-        SqliteReportBuilder::new_with_seed(test_ctx.db_file, rng_seed).unwrap(),
-    );
-    let mut buf = ReportJsonStream {
-        input: &input,
-        state: parse_ctx,
-    };
-
     let expected_files = vec![
         models::SourceFile::new("src/report.rs"),
         models::SourceFile::new("src/report/models.rs"),
@@ -84,13 +72,19 @@ fn test_parse_report_json() {
 
     let expected_json_sessions = HashMap::from([(0, expected_session.id)]);
 
-    let (actual_files, actual_sessions) = report_json::parse_report_json
-        .parse_next(&mut buf)
+    let test_ctx = setup();
+    let mut report_builder =
+        SqliteReportBuilder::new_with_seed(test_ctx.db_file, rng_seed).unwrap();
+
+    let ParsedReportJson {
+        files: actual_files,
+        sessions: actual_sessions,
+    } = report_json::parse_report_json(input.as_bytes(), &mut report_builder)
         .expect("Failed to parse");
     assert_eq!(actual_files, expected_json_files);
     assert_eq!(actual_sessions, expected_json_sessions);
 
-    let report = buf.state.report_builder.build().unwrap();
+    let report = report_builder.build().unwrap();
 
     let files = report.list_files().unwrap();
     assert_eq!(files, expected_files);