nedbat · nedbat · Oct 2, 2024 · Sep 22, 2024 · Sep 22, 2024 · Sep 11, 2024
diff --git a/coverage/lcovreport.py b/coverage/lcovreport.py
@@ -32,6 +32,129 @@ def line_hash(line: str) -> str:
     return base64.b64encode(hashed).decode("ascii").rstrip("=")
 
 
+def lcov_lines(
+    analysis: Analysis,
+    lines: list[int],
+    source_lines: list[str],
+    outfile: IO[str],
+) -> None:
+    """Emit line coverage records for an analyzed file."""
+    hash_suffix = ""
+    for line in lines:
+        if source_lines:
+            hash_suffix = "," + line_hash(source_lines[line-1])
+        # Q: can we get info about the number of times a statement is
+        # executed?  If so, that should be recorded here.
+        hit = int(line not in analysis.missing)
+        outfile.write(f"DA:{line},{hit}{hash_suffix}\n")
+
+    if analysis.numbers.n_statements > 0:
+        outfile.write(f"LF:{analysis.numbers.n_statements}\n")
+        outfile.write(f"LH:{analysis.numbers.n_executed}\n")
+
+
+def lcov_functions(
+    fr: FileReporter,
+    file_analysis: Analysis,
+    outfile: IO[str],
+) -> None:
+    """Emit function coverage records for an analyzed file."""
+    # lcov 2.2 introduces a new format for function coverage records.
+    # We continue to generate the old format because we don't know what
+    # version of the lcov tools will be used to read this report.
+
+    # "and region.lines" below avoids a crash due to a bug in PyPy 3.8
+    # where, for whatever reason, when collecting data in --branch mode,
+    # top-level functions have an empty lines array.  Instead we just don't
+    # emit function records for those.
+
+    # suppressions because of https://github.com/pylint-dev/pylint/issues/9923
+    functions = [
+        (min(region.start, min(region.lines)), #pylint: disable=nested-min-max
+         max(region.start, max(region.lines)), #pylint: disable=nested-min-max
+         region)
+        for region in fr.code_regions()
+        if region.kind == "function" and region.lines
+    ]
+    if not functions:
+        return
+
+    functions.sort()
+    functions_hit = 0
+    for first_line, last_line, region in functions:
+        # A function counts as having been executed if any of it has been
+        # executed.
+        analysis = file_analysis.narrow(region.lines)
+        hit = int(analysis.numbers.n_executed > 0)
+        functions_hit += hit
+
+        outfile.write(f"FN:{first_line},{last_line},{region.name}\n")
+        outfile.write(f"FNDA:{hit},{region.name}\n")
+
+    outfile.write(f"FNF:{len(functions)}\n")
+    outfile.write(f"FNH:{functions_hit}\n")
+
+
+def lcov_arcs(
+    fr: FileReporter,
+    analysis: Analysis,
+    lines: list[int],
+    outfile: IO[str],
+) -> None:
+    """Emit branch coverage records for an analyzed file."""
+    branch_stats = analysis.branch_stats()
+    executed_arcs = analysis.executed_branch_arcs()
+    missing_arcs = analysis.missing_branch_arcs()
+
+    for line in lines:
+        if line not in branch_stats:
+            continue
+
+        # This is only one of several possible ways to map our sets of executed
+        # and not-executed arcs to BRDA codes.  It seems to produce reasonable
+        # results when fed through genhtml.
+        _, taken = branch_stats[line]
+
+        if taken == 0:
+            # When _none_ of the out arcs from 'line' were executed,
+            # this probably means 'line' was never executed at all.
+            # Cross-check with the line stats.
+            assert len(executed_arcs[line]) == 0
+            assert line in analysis.missing
+            destinations = [
+                (dst, "-") for dst in missing_arcs[line]
+            ]
+        else:
+            # Q: can we get counts of the number of times each arc was executed?
+            # branch_stats has "total" and "taken" counts for each branch,
+            # but it doesn't have "taken" broken down by destination.
+            destinations = [
+                (dst, "1") for dst in executed_arcs[line]
+            ]
+            destinations.extend(
+                (dst, "0") for dst in missing_arcs[line]
+            )
+
+        # Sort exit arcs after normal arcs.  Exit arcs typically come from
+        # an if statement, at the end of a function, with no else clause.
+        # This structure reads like you're jumping to the end of the function
+        # when the conditional expression is false, so it should be presented
+        # as the second alternative for the branch, after the alternative that
+        # enters the if clause.
+        destinations.sort(key=lambda d: (d[0] < 0, d))
+
+        for dst, hit in destinations:
+            branch = fr.arc_description(line, dst)
+            outfile.write(f"BRDA:{line},0,{branch},{hit}\n")
+
+    # Summary of the branch coverage.
+    brf = sum(t for t, k in branch_stats.values())
+    brh = brf - sum(t - k for t, k in branch_stats.values())
+    if brf > 0:
+        outfile.write(f"BRF:{brf}\n")
+        outfile.write(f"BRH:{brh}\n")
+
+
 class LcovReporter:
     """A reporter for writing LCOV coverage reports."""
 
@@ -85,72 +208,15 @@ def lcov_file(
 
         outfile.write(f"SF:{rel_fname}\n")
 
+        lines = sorted(analysis.statements)
         if self.config.lcov_line_checksums:
             source_lines = fr.source().splitlines()
+        else:
+            source_lines = []
 
-        # Emit a DA: record for each line of the file.
-        lines = sorted(analysis.statements)
-        hash_suffix = ""
-        for line in lines:
-            if self.config.lcov_line_checksums:
-                hash_suffix = "," + line_hash(source_lines[line-1])
-            # Q: can we get info about the number of times a statement is
-            # executed?  If so, that should be recorded here.
-            hit = int(line not in analysis.missing)
-            outfile.write(f"DA:{line},{hit}{hash_suffix}\n")
-
-        if analysis.numbers.n_statements > 0:
-            outfile.write(f"LF:{analysis.numbers.n_statements}\n")
-            outfile.write(f"LH:{analysis.numbers.n_executed}\n")
-
-        # More information dense branch coverage data, if available.
+        lcov_lines(analysis, lines, source_lines, outfile)
+        lcov_functions(fr, analysis, outfile)
         if analysis.has_arcs:
-            branch_stats = analysis.branch_stats()
-            executed_arcs = analysis.executed_branch_arcs()
-            missing_arcs = analysis.missing_branch_arcs()
-
-            for line in lines:
-                if line in branch_stats:
-                    # The meaning of a BRDA: line is not well explained in the lcov
-                    # documentation.  Based on what genhtml does with them, however,
-                    # the interpretation is supposed to be something like this:
-                    # BRDA: <line>, <block>, <branch>, <hit>
-                    # where <line> is the source line number of the *origin* of the
-                    # branch; <block> is an arbitrary number which distinguishes multiple
-                    # control flow operations on a single line; <branch> is an arbitrary
-                    # number which distinguishes the possible destinations of the specific
-                    # control flow operation identified by <line> + <block>; and <hit> is
-                    # either the hit count for <line> + <block> + <branch> or "-" meaning
-                    # that <line> + <block> was never *reached*.  <line> must be >= 1,
-                    # and <block>, <branch>, <hit> must be >= 0.
-
-                    # This is only one possible way to map our sets of executed and
-                    # not-executed arcs to BRDA codes. It seems to produce reasonable
-                    # results when fed through genhtml.
-
-                    # Q: can we get counts of the number of times each arc was executed?
-                    # branch_stats has "total" and "taken" counts for each branch, but it
-                    # doesn't have "taken" broken down by destination.
-                    destinations = {}
-                    for dst in executed_arcs[line]:
-                        destinations[(int(dst < 0), abs(dst))] = 1
-                    for dst in missing_arcs[line]:
-                        destinations[(int(dst < 0), abs(dst))] = 0
-
-                    if all(v == 0 for v in destinations.values()):
-                        # When _none_ of the out arcs from 'line' were executed, presume
-                        # 'line' was never reached.
-                        for branch, _ in enumerate(sorted(destinations.keys())):
-                            outfile.write(f"BRDA:{line},0,{branch},-\n")
-                    else:
-                        for branch, (_, hit) in enumerate(sorted(destinations.items())):
-                            outfile.write(f"BRDA:{line},0,{branch},{hit}\n")
-
-            # Summary of the branch coverage.
-            brf = sum(t for t, k in branch_stats.values())
-            brh = brf - sum(t - k for t, k in branch_stats.values())
-            if brf > 0:
-                outfile.write(f"BRF:{brf}\n")
-                outfile.write(f"BRH:{brh}\n")
+            lcov_arcs(fr, analysis, lines, outfile)
 
         outfile.write("end_of_record\n")
diff --git a/coverage/parser.py b/coverage/parser.py
@@ -346,32 +346,45 @@ def exit_counts(self) -> dict[TLineNo, int]:
 
         return exit_counts
 
+    def _finish_action_msg(self, action_msg: str | None, end: TLineNo) -> str:
+        """Apply some defaulting and formatting to an arc's description."""
+        if action_msg is None:
+            if end < 0:
+                action_msg = "jump to the function exit"
+            else:
+                action_msg = "jump to line {lineno}"
+        action_msg = action_msg.format(lineno=end)
+        return action_msg
+
     def missing_arc_description(self, start: TLineNo, end: TLineNo) -> str:
         """Provide an English sentence describing a missing arc."""
         if self._missing_arc_fragments is None:
             self._analyze_ast()
             assert self._missing_arc_fragments is not None
 
-        actual_start = start
         fragment_pairs = self._missing_arc_fragments.get((start, end), [(None, None)])
 
         msgs = []
-        for smsg, emsg in fragment_pairs:
-            if emsg is None:
-                if end < 0:
-                    emsg = "didn't jump to the function exit"
-                else:
-                    emsg = "didn't jump to line {lineno}"
-            emsg = emsg.format(lineno=end)
-
-            msg = f"line {actual_start} {emsg}"
-            if smsg is not None:
-                msg += f" because {smsg.format(lineno=actual_start)}"
+        for missing_cause_msg, action_msg in fragment_pairs:
+            action_msg = self._finish_action_msg(action_msg, end)
+            msg = f"line {start} didn't {action_msg}"
+            if missing_cause_msg is not None:
+                msg += f" because {missing_cause_msg.format(lineno=start)}"
 
             msgs.append(msg)
 
         return " or ".join(msgs)
 
+    def arc_description(self, start: TLineNo, end: TLineNo) -> str:
+        """Provide an English description of an arc's effect."""
+        if self._missing_arc_fragments is None:
+            self._analyze_ast()
+            assert self._missing_arc_fragments is not None
+
+        fragment_pairs = self._missing_arc_fragments.get((start, end), [(None, None)])
+        action_msg = self._finish_action_msg(fragment_pairs[0][1], end)
+        return action_msg
+
 
 class ByteParser:
     """Parse bytecode to understand the structure of code."""
@@ -452,7 +465,7 @@ class ArcStart:
 
     `lineno` is the line number the arc starts from.
 
-    `cause` is an English text fragment used as the `startmsg` for
+    `cause` is an English text fragment used as the `missing_cause_msg` for
     AstArcAnalyzer.missing_arc_fragments.  It will be used to describe why an
     arc wasn't executed, so should fit well into a sentence of the form,
     "Line 17 didn't run because {cause}."  The fragment can include "{lineno}"
@@ -486,10 +499,21 @@ def __call__(
         self,
         start: TLineNo,
         end: TLineNo,
-        smsg: str | None = None,
-        emsg: str | None = None,
+        missing_cause_msg: str | None = None,
+        action_msg: str | None = None,
     ) -> None:
-        ...
+        """
+        Record an arc from `start` to `end`.
+
+        `missing_cause_msg` is a description of the reason the arc wasn't
+        taken if it wasn't taken.  For example, "the condition on line 10 was
+        never true."
+
+        `action_msg` is a description of what the arc does, like "jump to line
+        10" or "exit from function 'fooey'."
+
+        """
+
 
 TArcFragments = Dict[TArc, List[Tuple[Optional[str], Optional[str]]]]
 
@@ -550,15 +574,15 @@ def process_raise_exits(self, exits: set[ArcStart], add_arc: TAddArcFn) -> bool:
         for xit in exits:
             add_arc(
                 xit.lineno, -self.start, xit.cause,
-                f"didn't except from function {self.name!r}",
+                f"except from function {self.name!r}",
             )
         return True
 
     def process_return_exits(self, exits: set[ArcStart], add_arc: TAddArcFn) -> bool:
         for xit in exits:
             add_arc(
                 xit.lineno, -self.start, xit.cause,
-                f"didn't return from function {self.name!r}",
+                f"return from function {self.name!r}",
             )
         return True
 
@@ -602,10 +626,10 @@ class AstArcAnalyzer:
     `missing_arc_fragments`: a dict mapping (from, to) arcs to lists of
     message fragments explaining why the arc is missing from execution::
 
-        { (start, end): [(startmsg, endmsg), ...], }
+        { (start, end): [(missing_cause_msg, action_msg), ...], }
 
     For an arc starting from line 17, they should be usable to form complete
-    sentences like: "Line 17 {endmsg} because {startmsg}".
+    sentences like: "Line 17 didn't {action_msg} because {missing_cause_msg}".
 
     NOTE: Starting in July 2024, I've been whittling this down to only report
     arc that are part of true branches.  It's not clear how far this work will
@@ -716,30 +740,27 @@ def _code_object__FunctionDef(self, node: ast.FunctionDef) -> None:
 
     def _code_object__ClassDef(self, node: ast.ClassDef) -> None:
         start = self.line_for_node(node)
-        exits = self.process_body(node.body)#, from_start=ArcStart(start))
+        exits = self.process_body(node.body)
         for xit in exits:
-            self.add_arc(
-                xit.lineno, -start, xit.cause,
-                f"didn't exit the body of class {node.name!r}",
-            )
+            self.add_arc(xit.lineno, -start, xit.cause, f"exit class {node.name!r}")
 
     def add_arc(
         self,
         start: TLineNo,
         end: TLineNo,
-        smsg: str | None = None,
-        emsg: str | None = None,
+        missing_cause_msg: str | None = None,
+        action_msg: str | None = None,
     ) -> None:
         """Add an arc, including message fragments to use if it is missing."""
         if self.debug:                      # pragma: debugging
-            print(f"Adding possible arc: ({start}, {end}): {smsg!r}, {emsg!r}")
+            print(f"Adding possible arc: ({start}, {end}): {missing_cause_msg!r}, {action_msg!r}")
             print(short_stack(), end="\n\n")
         self.arcs.add((start, end))
         if start in self.current_with_starts:
             self.with_entries.add((start, end))
 
-        if smsg is not None or emsg is not None:
-            self.missing_arc_fragments[(start, end)].append((smsg, emsg))
+        if missing_cause_msg is not None or action_msg is not None:
+            self.missing_arc_fragments[(start, end)].append((missing_cause_msg, action_msg))
 
     def nearest_blocks(self) -> Iterable[Block]:
         """Yield the blocks in nearest-to-farthest order."""

diff --git a/coverage/plugin.py b/coverage/plugin.py
@@ -542,6 +542,14 @@ def missing_arc_description(
         """
         return f"Line {start} didn't jump to line {end}"
 
+    def arc_description(
+        self,
+        start: TLineNo,     # pylint: disable=unused-argument
+        end: TLineNo
+    ) -> str:
+        """Provide an English description of an arc's effect."""
+        return f"jump to line {end}"
+
     def source_token_lines(self) -> TSourceTokenLines:
         """Generate a series of tokenized lines, one for each line in `source`.
 

diff --git a/coverage/python.py b/coverage/python.py
@@ -227,6 +227,13 @@ def missing_arc_description(
     ) -> str:
         return self.parser.missing_arc_description(start, end)
 
+    def arc_description(
+        self,
+        start: TLineNo,
+        end: TLineNo
+    ) -> str:
+        return self.parser.arc_description(start, end)
+
     def source(self) -> str:
         if self._source is None:
             self._source = get_python_source(self.filename)