Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lcov report improvements part 2 #1851

Merged
merged 16 commits into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 129 additions & 63 deletions coverage/lcovreport.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,129 @@ def line_hash(line: str) -> str:
return base64.b64encode(hashed).decode("ascii").rstrip("=")


def lcov_lines(
analysis: Analysis,
lines: list[int],
source_lines: list[str],
outfile: IO[str],
) -> None:
"""Emit line coverage records for an analyzed file."""
hash_suffix = ""
for line in lines:
if source_lines:
hash_suffix = "," + line_hash(source_lines[line-1])
# Q: can we get info about the number of times a statement is
# executed? If so, that should be recorded here.
hit = int(line not in analysis.missing)
outfile.write(f"DA:{line},{hit}{hash_suffix}\n")

if analysis.numbers.n_statements > 0:
outfile.write(f"LF:{analysis.numbers.n_statements}\n")
outfile.write(f"LH:{analysis.numbers.n_executed}\n")


def lcov_functions(
fr: FileReporter,
file_analysis: Analysis,
outfile: IO[str],
) -> None:
"""Emit function coverage records for an analyzed file."""
# lcov 2.2 introduces a new format for function coverage records.
# We continue to generate the old format because we don't know what
# version of the lcov tools will be used to read this report.

# "and region.lines" below avoids a crash due to a bug in PyPy 3.8
# where, for whatever reason, when collecting data in --branch mode,
# top-level functions have an empty lines array. Instead we just don't
# emit function records for those.

# suppressions because of https://github.com/pylint-dev/pylint/issues/9923
functions = [
(min(region.start, min(region.lines)), #pylint: disable=nested-min-max
max(region.start, max(region.lines)), #pylint: disable=nested-min-max
region)
for region in fr.code_regions()
if region.kind == "function" and region.lines
]
if not functions:
return

functions.sort()
functions_hit = 0
for first_line, last_line, region in functions:
# A function counts as having been executed if any of it has been
# executed.
analysis = file_analysis.narrow(region.lines)
hit = int(analysis.numbers.n_executed > 0)
functions_hit += hit

outfile.write(f"FN:{first_line},{last_line},{region.name}\n")
outfile.write(f"FNDA:{hit},{region.name}\n")

outfile.write(f"FNF:{len(functions)}\n")
outfile.write(f"FNH:{functions_hit}\n")


def lcov_arcs(
fr: FileReporter,
analysis: Analysis,
lines: list[int],
outfile: IO[str],
) -> None:
"""Emit branch coverage records for an analyzed file."""
branch_stats = analysis.branch_stats()
executed_arcs = analysis.executed_branch_arcs()
missing_arcs = analysis.missing_branch_arcs()

for line in lines:
if line not in branch_stats:
continue

# This is only one of several possible ways to map our sets of executed
# and not-executed arcs to BRDA codes. It seems to produce reasonable
# results when fed through genhtml.
_, taken = branch_stats[line]

if taken == 0:
# When _none_ of the out arcs from 'line' were executed,
# this probably means 'line' was never executed at all.
# Cross-check with the line stats.
assert len(executed_arcs[line]) == 0
assert line in analysis.missing
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Our coverage workflow is now failing due to this line, when running with version 7.6.3.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you provide us with a way to reproduce it? Failing that, can you add this line before the assert, and show us the output?

print(f"{line=}, {analysis.missing=}")

(this should have been on the assert line, my bad)

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm seeing this same error too, but only in CI so far. I haven't been able to reproduce it locally so I can't put the print() line in either...

destinations = [
(dst, "-") for dst in missing_arcs[line]
]
else:
# Q: can we get counts of the number of times each arc was executed?
# branch_stats has "total" and "taken" counts for each branch,
# but it doesn't have "taken" broken down by destination.
destinations = [
(dst, "1") for dst in executed_arcs[line]
]
destinations.extend(
(dst, "0") for dst in missing_arcs[line]
)

# Sort exit arcs after normal arcs. Exit arcs typically come from
# an if statement, at the end of a function, with no else clause.
# This structure reads like you're jumping to the end of the function
# when the conditional expression is false, so it should be presented
# as the second alternative for the branch, after the alternative that
# enters the if clause.
destinations.sort(key=lambda d: (d[0] < 0, d))

for dst, hit in destinations:
branch = fr.arc_description(line, dst)
outfile.write(f"BRDA:{line},0,{branch},{hit}\n")

# Summary of the branch coverage.
brf = sum(t for t, k in branch_stats.values())
brh = brf - sum(t - k for t, k in branch_stats.values())
if brf > 0:
outfile.write(f"BRF:{brf}\n")
outfile.write(f"BRH:{brh}\n")


class LcovReporter:
"""A reporter for writing LCOV coverage reports."""

Expand Down Expand Up @@ -85,72 +208,15 @@ def lcov_file(

outfile.write(f"SF:{rel_fname}\n")

lines = sorted(analysis.statements)
if self.config.lcov_line_checksums:
source_lines = fr.source().splitlines()
else:
source_lines = []

# Emit a DA: record for each line of the file.
lines = sorted(analysis.statements)
hash_suffix = ""
for line in lines:
if self.config.lcov_line_checksums:
hash_suffix = "," + line_hash(source_lines[line-1])
# Q: can we get info about the number of times a statement is
# executed? If so, that should be recorded here.
hit = int(line not in analysis.missing)
outfile.write(f"DA:{line},{hit}{hash_suffix}\n")

if analysis.numbers.n_statements > 0:
outfile.write(f"LF:{analysis.numbers.n_statements}\n")
outfile.write(f"LH:{analysis.numbers.n_executed}\n")

# More information dense branch coverage data, if available.
lcov_lines(analysis, lines, source_lines, outfile)
lcov_functions(fr, analysis, outfile)
if analysis.has_arcs:
branch_stats = analysis.branch_stats()
executed_arcs = analysis.executed_branch_arcs()
missing_arcs = analysis.missing_branch_arcs()

for line in lines:
if line in branch_stats:
# The meaning of a BRDA: line is not well explained in the lcov
# documentation. Based on what genhtml does with them, however,
# the interpretation is supposed to be something like this:
# BRDA: <line>, <block>, <branch>, <hit>
# where <line> is the source line number of the *origin* of the
# branch; <block> is an arbitrary number which distinguishes multiple
# control flow operations on a single line; <branch> is an arbitrary
# number which distinguishes the possible destinations of the specific
# control flow operation identified by <line> + <block>; and <hit> is
# either the hit count for <line> + <block> + <branch> or "-" meaning
# that <line> + <block> was never *reached*. <line> must be >= 1,
# and <block>, <branch>, <hit> must be >= 0.

# This is only one possible way to map our sets of executed and
# not-executed arcs to BRDA codes. It seems to produce reasonable
# results when fed through genhtml.

# Q: can we get counts of the number of times each arc was executed?
# branch_stats has "total" and "taken" counts for each branch, but it
# doesn't have "taken" broken down by destination.
destinations = {}
for dst in executed_arcs[line]:
destinations[(int(dst < 0), abs(dst))] = 1
for dst in missing_arcs[line]:
destinations[(int(dst < 0), abs(dst))] = 0

if all(v == 0 for v in destinations.values()):
# When _none_ of the out arcs from 'line' were executed, presume
# 'line' was never reached.
for branch, _ in enumerate(sorted(destinations.keys())):
outfile.write(f"BRDA:{line},0,{branch},-\n")
else:
for branch, (_, hit) in enumerate(sorted(destinations.items())):
outfile.write(f"BRDA:{line},0,{branch},{hit}\n")

# Summary of the branch coverage.
brf = sum(t for t, k in branch_stats.values())
brh = brf - sum(t - k for t, k in branch_stats.values())
if brf > 0:
outfile.write(f"BRF:{brf}\n")
outfile.write(f"BRH:{brh}\n")
lcov_arcs(fr, analysis, lines, outfile)

outfile.write("end_of_record\n")
81 changes: 51 additions & 30 deletions coverage/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,32 +346,45 @@ def exit_counts(self) -> dict[TLineNo, int]:

return exit_counts

def _finish_action_msg(self, action_msg: str | None, end: TLineNo) -> str:
"""Apply some defaulting and formatting to an arc's description."""
if action_msg is None:
if end < 0:
action_msg = "jump to the function exit"
else:
action_msg = "jump to line {lineno}"
action_msg = action_msg.format(lineno=end)
return action_msg

def missing_arc_description(self, start: TLineNo, end: TLineNo) -> str:
"""Provide an English sentence describing a missing arc."""
if self._missing_arc_fragments is None:
self._analyze_ast()
assert self._missing_arc_fragments is not None

actual_start = start
fragment_pairs = self._missing_arc_fragments.get((start, end), [(None, None)])

msgs = []
for smsg, emsg in fragment_pairs:
if emsg is None:
if end < 0:
emsg = "didn't jump to the function exit"
else:
emsg = "didn't jump to line {lineno}"
emsg = emsg.format(lineno=end)

msg = f"line {actual_start} {emsg}"
if smsg is not None:
msg += f" because {smsg.format(lineno=actual_start)}"
for missing_cause_msg, action_msg in fragment_pairs:
action_msg = self._finish_action_msg(action_msg, end)
msg = f"line {start} didn't {action_msg}"
if missing_cause_msg is not None:
msg += f" because {missing_cause_msg.format(lineno=start)}"

msgs.append(msg)

return " or ".join(msgs)

def arc_description(self, start: TLineNo, end: TLineNo) -> str:
"""Provide an English description of an arc's effect."""
if self._missing_arc_fragments is None:
self._analyze_ast()
assert self._missing_arc_fragments is not None

fragment_pairs = self._missing_arc_fragments.get((start, end), [(None, None)])
action_msg = self._finish_action_msg(fragment_pairs[0][1], end)
return action_msg


class ByteParser:
"""Parse bytecode to understand the structure of code."""
Expand Down Expand Up @@ -452,7 +465,7 @@ class ArcStart:

`lineno` is the line number the arc starts from.

`cause` is an English text fragment used as the `startmsg` for
`cause` is an English text fragment used as the `missing_cause_msg` for
AstArcAnalyzer.missing_arc_fragments. It will be used to describe why an
arc wasn't executed, so should fit well into a sentence of the form,
"Line 17 didn't run because {cause}." The fragment can include "{lineno}"
Expand Down Expand Up @@ -486,10 +499,21 @@ def __call__(
self,
start: TLineNo,
end: TLineNo,
smsg: str | None = None,
emsg: str | None = None,
missing_cause_msg: str | None = None,
action_msg: str | None = None,
) -> None:
...
"""
Record an arc from `start` to `end`.

`missing_cause_msg` is a description of the reason the arc wasn't
taken if it wasn't taken. For example, "the condition on line 10 was
never true."

`action_msg` is a description of what the arc does, like "jump to line
10" or "exit from function 'fooey'."

"""


TArcFragments = Dict[TArc, List[Tuple[Optional[str], Optional[str]]]]

Expand Down Expand Up @@ -550,15 +574,15 @@ def process_raise_exits(self, exits: set[ArcStart], add_arc: TAddArcFn) -> bool:
for xit in exits:
add_arc(
xit.lineno, -self.start, xit.cause,
f"didn't except from function {self.name!r}",
f"except from function {self.name!r}",
)
return True

def process_return_exits(self, exits: set[ArcStart], add_arc: TAddArcFn) -> bool:
for xit in exits:
add_arc(
xit.lineno, -self.start, xit.cause,
f"didn't return from function {self.name!r}",
f"return from function {self.name!r}",
)
return True

Expand Down Expand Up @@ -602,10 +626,10 @@ class AstArcAnalyzer:
`missing_arc_fragments`: a dict mapping (from, to) arcs to lists of
message fragments explaining why the arc is missing from execution::

{ (start, end): [(startmsg, endmsg), ...], }
{ (start, end): [(missing_cause_msg, action_msg), ...], }

For an arc starting from line 17, they should be usable to form complete
sentences like: "Line 17 {endmsg} because {startmsg}".
sentences like: "Line 17 didn't {action_msg} because {missing_cause_msg}".

NOTE: Starting in July 2024, I've been whittling this down to only report
arc that are part of true branches. It's not clear how far this work will
Expand Down Expand Up @@ -716,30 +740,27 @@ def _code_object__FunctionDef(self, node: ast.FunctionDef) -> None:

def _code_object__ClassDef(self, node: ast.ClassDef) -> None:
start = self.line_for_node(node)
exits = self.process_body(node.body)#, from_start=ArcStart(start))
exits = self.process_body(node.body)
for xit in exits:
self.add_arc(
xit.lineno, -start, xit.cause,
f"didn't exit the body of class {node.name!r}",
)
self.add_arc(xit.lineno, -start, xit.cause, f"exit class {node.name!r}")

def add_arc(
self,
start: TLineNo,
end: TLineNo,
smsg: str | None = None,
emsg: str | None = None,
missing_cause_msg: str | None = None,
action_msg: str | None = None,
) -> None:
"""Add an arc, including message fragments to use if it is missing."""
if self.debug: # pragma: debugging
print(f"Adding possible arc: ({start}, {end}): {smsg!r}, {emsg!r}")
print(f"Adding possible arc: ({start}, {end}): {missing_cause_msg!r}, {action_msg!r}")
print(short_stack(), end="\n\n")
self.arcs.add((start, end))
if start in self.current_with_starts:
self.with_entries.add((start, end))

if smsg is not None or emsg is not None:
self.missing_arc_fragments[(start, end)].append((smsg, emsg))
if missing_cause_msg is not None or action_msg is not None:
self.missing_arc_fragments[(start, end)].append((missing_cause_msg, action_msg))

def nearest_blocks(self) -> Iterable[Block]:
"""Yield the blocks in nearest-to-farthest order."""
Expand Down
8 changes: 8 additions & 0 deletions coverage/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,14 @@ def missing_arc_description(
"""
return f"Line {start} didn't jump to line {end}"

def arc_description(
self,
start: TLineNo, # pylint: disable=unused-argument
end: TLineNo
) -> str:
"""Provide an English description of an arc's effect."""
return f"jump to line {end}"

def source_token_lines(self) -> TSourceTokenLines:
"""Generate a series of tokenized lines, one for each line in `source`.

Expand Down
7 changes: 7 additions & 0 deletions coverage/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,13 @@ def missing_arc_description(
) -> str:
return self.parser.missing_arc_description(start, end)

def arc_description(
self,
start: TLineNo,
end: TLineNo
) -> str:
return self.parser.arc_description(start, end)

def source(self) -> str:
if self._source is None:
self._source = get_python_source(self.filename)
Expand Down
Loading