Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sqlite->pyreport: stop serializing session_totals, just use null #26

Merged
merged 1 commit into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions src/parsers/pyreport/report_json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ pub type ReportOutputStream<S, R, B> = Stateful<S, ReportBuilderCtx<R, B>>;
/// "filename.rs": [
/// chunks_index: int,
/// file_totals: ReportTotals,
/// session_totals: SessionTotalsArray,
/// session_totals: null, // (formerly SessionTotalsArray, but ignored now)
/// diff_totals: ReportTotals (probably),
/// ]
/// ```
Expand All @@ -41,9 +41,12 @@ pub type ReportOutputStream<S, R, B> = Stateful<S, ReportBuilderCtx<R, B>>;
/// - [`ReportTotals`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/reports/types.py#L30-L45)
/// - [`SessionTotalsArray`](https://github.com/codecov/shared/blob/e97a9f422a6e224b315d6dc3821f9f5ebe9b2ddd/shared/reports/types.py#L263-L272)
///
/// `SessionTotalsArray` will normally be a dict mapping a session ID to a
/// `SessionTotals` (which is just a type alias for `ReportTotals`) but there is
/// a legacy format.
/// `SessionTotalsArray` no longer exists, but older reports may still have it.
/// It's a dict mapping a session ID to a `SessionTotals` (which is just a type
/// alias for `ReportTotals` and a "meta" key with extra information including
/// how many sessions there are in the map, and old reports may still have it.
/// There's an even older format which is just a flat list. In any case, we
/// ignore the field now.
///
/// Input example:
/// ```notrust
Expand All @@ -64,7 +67,7 @@ pub type ReportOutputStream<S, R, B> = Stateful<S, ReportBuilderCtx<R, B>>;
/// 0, # > complexity_total
/// 0 # > diff
/// ],
/// { # session totals
/// { # session totals (usually null nowadays)
/// "0": [ # > key: session id
/// 0, # > files
/// 45, # > lines
Expand Down
111 changes: 38 additions & 73 deletions src/report/pyreport/queries/files_to_report_json.sql
Original file line number Diff line number Diff line change
@@ -1,13 +1,25 @@
-- Determine whether each `coverage_sample` record is a hit/miss/partial/skip.
-- Normalize complexity fields.
with samples_categorized as (
select
coverage_sample.raw_upload_id,
coverage_sample.local_sample_id,
coverage_sample.source_file_id,
coverage_sample.line_no,
coverage_sample.coverage_type,
iif(coverage_sample.hits > 0 or coverage_sample.hit_branches >= coverage_sample.total_branches, 1, 0) as hit,
iif(coverage_sample.hits = 0 or coverage_sample.hit_branches = 0, 1, 0) as miss,
iif(coverage_sample.hit_branches > 0 and coverage_sample.hit_branches < coverage_sample.total_branches, 1, 0) as partial,
iif(
coverage_sample.hits > 0 or coverage_sample.hit_branches >= coverage_sample.total_branches,
2, -- hit
iif(
coverage_sample.hits = 0 or coverage_sample.hit_branches = 0,
0, -- miss
iif(
coverage_sample.hit_branches > 0 and coverage_sample.hit_branches < coverage_sample.total_branches,
1, -- partial
-1 -- skipped
)
)
) as coverage_status,
-- If a pyreport only has total_complexity, it will basically swap total_complexity and hit_complexity_paths
-- when pre-computing its totals/statistics. This logic performs that swap here.
iif(method_data.hit_complexity_paths is null, method_data.total_complexity, method_data.hit_complexity_paths) as hit_complexity_paths,
Expand All @@ -20,6 +32,8 @@ on
method_data.raw_upload_id = coverage_sample.raw_upload_id
and method_data.local_sample_id = coverage_sample.local_sample_id
),
-- Compute the chunks file index of each `source_file` record. Must match the
-- corresponding logic in `samples_to_chunks.sql`.
source_files_with_index as (
select
row_number() over (order by source_file.id) - 1 as chunk_index,
Expand All @@ -28,91 +42,42 @@ select
from
source_file
),
file_sessions_flattened as (
-- Each (source_file, line) has potentially many samples from different sessions
-- and this CTE flattens them into a single record per (source_file, line).
file_lines_flattened as (
select
samples_categorized.source_file_id,
samples_categorized.line_no,
samples_categorized.coverage_type,
max(samples_categorized.hit) as hit,
max(samples_categorized.miss) as miss,
max(samples_categorized.partial) as partial,
-- We want to pick the "most covered" status for this line. Since 2 is hit,
-- 1 is partial, 0 is miss, and -1 is skip, a simple `max()` does the trick.
-- If it was ever recorded as a full hit, that will override any partials
-- or misses, and so on.
max(samples_categorized.coverage_status) as coverage_status,
max(samples_categorized.hit_complexity_paths) as hit_complexity_paths,
max(samples_categorized.total_complexity) as total_complexity
from
samples_categorized
group by
1, 2, 3
),
file_totals as (
select
file_sessions_flattened.source_file_id,
count(*) as file_lines,
sum(file_sessions_flattened.hit) as file_hits,
sum(file_sessions_flattened.miss) as file_misses,
sum(file_sessions_flattened.partial) as file_partials,
sum(iif(file_sessions_flattened.coverage_type = 'b', 1, 0)) as file_branches,
sum(iif(file_sessions_flattened.coverage_type = 'm', 1, 0)) as file_methods,
coalesce(sum(file_sessions_flattened.hit_complexity_paths), 0) as file_hit_complexity_paths,
coalesce(sum(file_sessions_flattened.total_complexity), 0) as file_total_complexity
from
file_sessions_flattened
group by
1
),
session_indices as (
select
cast(row_number() over (order by raw_upload.id) - 1 as text) as session_index,
raw_upload.id as raw_upload_id
from
raw_upload
),
file_session_totals as (
select
session_indices.session_index,
session_indices.raw_upload_id,
samples_categorized.source_file_id,
count(*) as file_session_lines,
sum(samples_categorized.hit) as file_session_hits,
sum(samples_categorized.miss) as file_session_misses,
sum(samples_categorized.partial) as file_session_partials,
coalesce(sum(samples_categorized.hit_complexity_paths), 0) as file_session_hit_complexity_paths,
coalesce(sum(samples_categorized.total_complexity), 0) as file_session_total_complexity
from
samples_categorized
left join
session_indices
on
session_indices.raw_upload_id = samples_categorized.raw_upload_id
group by
1, 2, 3
)
select
source_files_with_index.chunk_index,
source_files_with_index.id,
source_files_with_index.path,
file_totals.file_lines,
file_totals.file_hits,
file_totals.file_misses,
file_totals.file_partials,
file_totals.file_branches,
file_totals.file_methods,
file_totals.file_hit_complexity_paths,
file_totals.file_total_complexity,
file_session_totals.session_index,
file_session_totals.file_session_lines,
file_session_totals.file_session_hits,
file_session_totals.file_session_misses,
file_session_totals.file_session_partials,
file_session_totals.file_session_hit_complexity_paths,
file_session_totals.file_session_total_complexity
count(*) as file_lines,
sum(iif(file_lines_flattened.coverage_status = 2, 1, 0)) as file_hits,
sum(iif(file_lines_flattened.coverage_status = 0, 1, 0)) as file_misses,
sum(iif(file_lines_flattened.coverage_status = 1, 1, 0)) as file_partials,
sum(iif(file_lines_flattened.coverage_type = 'b', 1, 0)) as file_branches,
sum(iif(file_lines_flattened.coverage_type = 'm', 1, 0)) as file_methods,
coalesce(sum(file_lines_flattened.hit_complexity_paths), 0) as file_hit_complexity_paths,
coalesce(sum(file_lines_flattened.total_complexity), 0) as file_total_complexity
from
source_files_with_index
left join
file_totals
on
source_files_with_index.id = file_totals.source_file_id
file_lines_flattened
left join
file_session_totals
source_files_with_index
on
source_files_with_index.id = file_session_totals.source_file_id;

file_lines_flattened.source_file_id = source_files_with_index.id
group by
1, 2, 3
Loading
Loading