Skip to content

Commit

Permalink
add entrypoint for mmapped pyreport parser
Browse files Browse the repository at this point in the history
  • Loading branch information
matt-codecov committed Apr 24, 2024
1 parent cdb90d5 commit 1bace35
Show file tree
Hide file tree
Showing 4 changed files with 189 additions and 19 deletions.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ uuid = { version = "1.8.0", features = ["v4"] }
# which should use a different hash function.
seahash = "4.1.0"

memmap2 = "0.9.4"

include_dir = "0.7.3"
lazy_static = "1.4.0"
strum = "0.26.1"
Expand Down
65 changes: 65 additions & 0 deletions src/parsers/pyreport_shim.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,68 @@
use std::{fs::File, path::PathBuf};

use memmap2::Mmap;

use crate::{
parsers::ReportBuilderCtx,
report::{ReportBuilder, SqliteReport, SqliteReportBuilder},
};

pub mod report_json;

pub mod chunks;

/// Parses the two parts of our Python report class and reshapes the data into a
/// `SqliteReport`.
///
/// Reports in our Python codebase are serialized in two parts:
/// - Report JSON, which describes the files and sessions in the report
/// - Chunks file, which describes line-by-line coverage data for each file
///
/// The parser for the report JSON inserts a
/// [`crate::report::models::SourceFile`] for each file
/// and a [`crate::report::models::Context`] for each session. It returns two
/// hashmaps: one which maps each file's "chunk index" to the database PK for
/// the `SourceFile` that was inserted for it, and one which maps each session's
/// "session_id" to the database PK for the `Context` that was inserted for it.
///
/// The parser for the chunks file inserts a
/// [`crate::report::models::CoverageSample`] (and possibly other records) for
/// each coverage measurement contained in the chunks file. It uses the
/// results of the report JSON parser to figure out the appropriate FKs to
/// associate a measurement with its `SourceFile` and `Context`(s).
///
/// TODO: Make this unit testable (currently relying on integration tests)
pub fn parse_pyreport(
report_json_file: &File,
chunks_file: &File,
out_path: PathBuf,
) -> Result<SqliteReport, std::io::Error> {
let report_builder = SqliteReportBuilder::new(out_path);

// Memory-map the input file so we don't have to read the whole thing into RAM
let mmap_handle = unsafe { Mmap::map(report_json_file)? };
let buf = unsafe { std::str::from_utf8_unchecked(&mmap_handle[..]) };
let mut stream = report_json::ReportOutputStream::<&str, SqliteReport, SqliteReportBuilder> {
input: buf,
state: ReportBuilderCtx::new(report_builder),
};
// TODO handle error
let (files, sessions) =
report_json::parse_report_json(&mut stream).expect("Failed to parse report JSON");

// Replace our mmap handle so the first one can be unmapped
let mmap_handle = unsafe { Mmap::map(chunks_file)? };
let buf = unsafe { std::str::from_utf8_unchecked(&mmap_handle[..]) };

// Move `report_builder` from the report JSON's parse context to this one
let chunks_ctx = chunks::ParseCtx::new(stream.state.report_builder, files, sessions);
let mut chunks_stream = chunks::ReportOutputStream::<&str, SqliteReport, SqliteReportBuilder> {
input: buf,
state: chunks_ctx,
};
// TODO handle error
chunks::parse_chunks_file(&mut chunks_stream).expect("Failed to parse chunks file");

// Build and return the `SqliteReport`
Ok(chunks_stream.state.db.report_builder.build())
}
2 changes: 1 addition & 1 deletion tests/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ pub fn sample_data_path() -> PathBuf {
}

pub fn read_sample_file(path: &Path) -> String {
return read_to_string(sample_data_path().join(path)).ok().unwrap();
read_to_string(sample_data_path().join(path)).ok().unwrap()
}
139 changes: 121 additions & 18 deletions tests/test_pyreport_shim.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
use std::{collections::HashMap, path::Path};
use std::{
collections::HashMap,
fs::File,
path::{Path, PathBuf},
};

use codecov_rs::{
parsers::{
pyreport_shim,
pyreport_shim::{chunks, report_json},
ReportBuilderCtx,
},
Expand All @@ -18,19 +23,16 @@ type ChunksStream<'a> = chunks::ReportOutputStream<&'a str, SqliteReport, Sqlite

struct Ctx {
_temp_dir: TempDir,
parse_ctx: ReportBuilderCtx<SqliteReport, SqliteReportBuilder>,
db_file: PathBuf,
}

fn setup() -> Ctx {
let temp_dir = TempDir::new().ok().unwrap();
let db_file = temp_dir.path().to_owned().join("db.sqlite");

let report_builder = SqliteReportBuilder::new(db_file);
let parse_ctx = ReportBuilderCtx::new(report_builder);

Ctx {
_temp_dir: temp_dir,
parse_ctx,
db_file,
}
}

Expand All @@ -42,10 +44,11 @@ fn hash_id(key: &str) -> i64 {
fn test_parse_report_json() {
let input = common::read_sample_file(Path::new("codecov-rs-reports-json-d2a9ba1.txt"));

let ctx = setup();
let test_ctx = setup();
let parse_ctx = ReportBuilderCtx::new(SqliteReportBuilder::new(test_ctx.db_file));
let mut buf = ReportJsonStream {
input: &input,
state: ctx.parse_ctx,
state: parse_ctx,
};

let expected_files = vec![
Expand Down Expand Up @@ -97,7 +100,8 @@ fn test_parse_report_json() {
#[test]
fn test_parse_chunks_file() {
let input = common::read_sample_file(Path::new("codecov-rs-chunks-d2a9ba1.txt"));
let mut ctx = setup();
let test_ctx = setup();
let mut report_builder = SqliteReportBuilder::new(test_ctx.db_file);

// Pretend `parse_report_json` has already run
let mut report_json_files = HashMap::new();
Expand All @@ -109,26 +113,20 @@ fn test_parse_chunks_file() {
.iter()
.enumerate()
{
let file = ctx
.parse_ctx
.report_builder
.insert_file(file.to_string())
.unwrap();
let file = report_builder.insert_file(file.to_string()).unwrap();
report_json_files.insert(i, file.id);
}

// Pretend `parse_report_json` has already run
let mut report_json_sessions = HashMap::new();
let session = ctx
.parse_ctx
.report_builder
let session = report_builder
.insert_context(models::ContextType::Upload, "codecov-rs CI")
.unwrap();
report_json_sessions.insert(0, session.id);

// Set up to call the chunks parser
let chunks_parse_ctx = chunks::ParseCtx::new(
ctx.parse_ctx.report_builder,
report_builder,
report_json_files.clone(),
report_json_sessions.clone(),
);
Expand Down Expand Up @@ -213,3 +211,108 @@ fn test_parse_chunks_file() {
);
}
}

#[test]
fn test_parse_pyreport() {
let report_json_file =
File::open(common::sample_data_path().join("codecov-rs-reports-json-d2a9ba1.txt"))
.expect("Failed to open report json file");
let chunks_file = File::open(common::sample_data_path().join("codecov-rs-chunks-d2a9ba1.txt"))
.expect("Failed to open chunks file");
let test_ctx = setup();

let report = pyreport_shim::parse_pyreport(&report_json_file, &chunks_file, test_ctx.db_file)
.expect("Failed to parse pyreport");

let expected_files = vec![
models::SourceFile {
id: hash_id("src/report.rs"),
path: "src/report.rs".to_string(),
},
models::SourceFile {
id: hash_id("src/report/models.rs"),
path: "src/report/models.rs".to_string(),
},
models::SourceFile {
id: hash_id("src/report/schema.rs"),
path: "src/report/schema.rs".to_string(),
},
];

let expected_sessions = vec![models::Context {
id: hash_id("codecov-rs CI"),
context_type: models::ContextType::Upload,
name: "codecov-rs CI".to_string(),
}];

// Helper function for creating our expected values
fn make_sample(source_file_id: i64, line_no: i64, hits: i64) -> models::CoverageSample {
models::CoverageSample {
id: uuid::Uuid::nil(), // Ignored
source_file_id,
line_no,
coverage_type: models::CoverageType::Line,
hits: Some(hits),
hit_branches: None,
total_branches: None,
}
}
// (start_line, end_line, hits)
let covered_lines: [Vec<(i64, i64, i64)>; 3] = [
vec![
(17, 25, 3),
(39, 43, 2),
(45, 49, 1),
(51, 53, 1),
(55, 59, 1),
(61, 78, 1),
],
vec![
(5, 5, 0),
(12, 12, 0),
(22, 22, 0),
(33, 33, 0),
(45, 45, 1),
],
vec![
(3, 3, 0),
(10, 16, 0),
(18, 27, 0),
(29, 39, 0),
(41, 48, 0),
(50, 50, 0),
(51, 52, 5),
(53, 54, 6),
(55, 56, 5),
],
];
let mut expected_coverage_samples = Vec::new();
for (i, file) in covered_lines.iter().enumerate() {
for (start_line, end_line, hits) in file {
for line_no in *start_line..=*end_line {
expected_coverage_samples.push(make_sample(expected_files[i].id, line_no, *hits));
}
}
}

let actual_coverage_samples = report
.list_coverage_samples()
.expect("Failed to list coverage samples");
let actual_contexts = report.list_contexts().expect("Failed to list contexts");
assert_eq!(actual_contexts, expected_sessions);
assert_eq!(
actual_coverage_samples.len(),
expected_coverage_samples.len()
);
for i in 0..actual_coverage_samples.len() {
expected_coverage_samples[i].id = actual_coverage_samples[i].id;
assert_eq!(actual_coverage_samples[i], expected_coverage_samples[i]);

assert_eq!(
report
.list_contexts_for_sample(&actual_coverage_samples[i])
.unwrap(),
actual_contexts
);
}
}

0 comments on commit 1bace35

Please sign in to comment.