From d69513e3a0fc83fa4589ab485c1d3fcb9408fd1b Mon Sep 17 00:00:00 2001
From: Simon Cozens <simon@simon-cozens.org>
Date: Mon, 2 Sep 2024 17:41:34 +0100
Subject: [PATCH] Describe how to fix; show unique fails/fixes; report
 nearly-supported languages

---
 Lib/shaperglot/checks/no_orphaned_marks.py  | 32 +++++++--
 Lib/shaperglot/checks/orthographies.py      | 28 +++++++-
 Lib/shaperglot/checks/shaping_differs.py    | 29 ++++++--
 Lib/shaperglot/checks/unencoded_variants.py | 40 +++++++++--
 Lib/shaperglot/cli/__init__.py              | 12 ++++
 Lib/shaperglot/cli/check.py                 | 23 +++++-
 Lib/shaperglot/cli/report.py                | 77 ++++++++-------------
 Lib/shaperglot/reporter.py                  | 44 ++++++++++--
 8 files changed, 210 insertions(+), 75 deletions(-)

diff --git a/Lib/shaperglot/checks/no_orphaned_marks.py b/Lib/shaperglot/checks/no_orphaned_marks.py
index e443ac2..4bd05be 100644
--- a/Lib/shaperglot/checks/no_orphaned_marks.py
+++ b/Lib/shaperglot/checks/no_orphaned_marks.py
@@ -47,13 +47,18 @@ def execute(self, checker) -> None:
                 # good to hear specifically what glyphs we tried to test here.
                 if not checker.lang.get("exemplarChars"):
                     message += " when shaping " + self.input.describe()
-
-                checker.results.fail(
-                    check_name="no-orphaned-marks",
-                    result_code="notdef-produced",
-                    message=message,
-                    context={"text": self.input.check_yaml},
-                )
+                    checker.results.fail(
+                        check_name="no-orphaned-marks",
+                        result_code="notdef-produced",
+                        message=message,
+                        context={"text": self.input.check_yaml},
+                        fixes=[
+                            {
+                                "type": "add_codepoint",
+                                "thing": self.input.text[info.cluster],
+                            },
+                        ],
+                    )
                 break
             if _simple_mark_check(checker.codepoint_for(glyphname)):
                 # Was the previous glyph dotted circle?
@@ -65,6 +70,13 @@ def execute(self, checker) -> None:
                         message="Shaper produced a dotted circle when shaping "
                         + self.input.describe(),
                         context={"text": self.input.check_yaml},
+                        fixes=[
+                            {
+                                "type": "add_feature",
+                                "thing": "to avoid a dotted circle shaping "
+                                + self.input.describe(),
+                            },
+                        ],
                     )
                 elif pos.x_offset == 0 and pos.y_offset == 0:  # Suspicious
                     passed = False
@@ -77,6 +89,12 @@ def execute(self, checker) -> None:
                             "mark": glyphname,
                             "base": previous,
                         },
+                        fixes=[
+                            {
+                                "type": "add_anchor",
+                                "thing": f"{previous}/{glyphname}",
+                            },
+                        ],
                     )
             previous = glyphname
         if passed:
diff --git a/Lib/shaperglot/checks/orthographies.py b/Lib/shaperglot/checks/orthographies.py
index cf57e79..0ffd014 100644
--- a/Lib/shaperglot/checks/orthographies.py
+++ b/Lib/shaperglot/checks/orthographies.py
@@ -1,4 +1,5 @@
 import re
+from typing import Optional
 
 from strictyaml import Map
 
@@ -33,10 +34,10 @@ def __init__(self, lang) -> None:
         self.bases = set(bases) - self.marks
         self.aux = set(aux) - self.marks
 
-    def should_skip(self, checker) -> bool:
-        return False
+    def should_skip(self, _checker) -> Optional[str]:
+        return
 
-    def describe(self):
+    def describe(self) -> str:
         return "that the following glyphs are in the font: " + and_join(
             f"'{g}'" for g in self.all_glyphs
         )
@@ -56,6 +57,13 @@ def execute(self, checker) -> None:
                 result_code="bases-missing",
                 message=f"Some base glyphs were missing: {', '.join(missing)}",
                 context={"glyphs": missing},
+                fixes=[
+                    {
+                        "type": "add_codepoint",
+                        "thing": g,
+                    }
+                    for g in missing
+                ],
             )
         else:
             checker.results.okay(
@@ -74,6 +82,13 @@ def execute(self, checker) -> None:
                 result_code="marks-missing",
                 message=f"Some mark glyphs were missing: {missing_str}",
                 context={"glyphs": missing},
+                fixes=[
+                    {
+                        "type": "add_codepoint",
+                        "thing": g,
+                    }
+                    for g in missing
+                ],
             )
         else:
             checker.results.okay(
@@ -91,6 +106,13 @@ def execute(self, checker) -> None:
                 result_code="aux-missing",
                 message=f"Some auxiliary glyphs were missing: {', '.join(missing)}",
                 context={"glyphs": missing},
+                fixes=[
+                    {
+                        "type": "add_codepoint",
+                        "thing": g,
+                    }
+                    for g in missing
+                ],
             )
         else:
             checker.results.okay(
diff --git a/Lib/shaperglot/checks/shaping_differs.py b/Lib/shaperglot/checks/shaping_differs.py
index 22d5f2e..a1e695a 100644
--- a/Lib/shaperglot/checks/shaping_differs.py
+++ b/Lib/shaperglot/checks/shaping_differs.py
@@ -41,18 +41,17 @@ def execute(self, checker) -> None:
         # won't mean anything
         reported_missing = set()
         for result in checker.results:
-            if (
-                result.result_code == "bases-missing"
-                or result.result_code == "marks-missing"
-            ):
+            if result.result_code in ["bases-missing", "marks-missing"]:
                 reported_missing.update(result.context["glyphs"])
         for text in self.inputs:
             missing_glyphs = [f"'{g}'" for g in text.text if g in reported_missing]
             if missing_glyphs:
                 checker.results.skip(
                     check_name="shaping-differs",
-                    message="Differs check could not run because some characters (%s) were missing from the font."
-                    % ", ".join(missing_glyphs),
+                    message=(
+                        "Differs check could not run because some characters"
+                        f" ({', '.join(missing_glyphs)}) were missing from the font."
+                    ),
                 )
                 return
 
@@ -77,6 +76,12 @@ def execute(self, checker) -> None:
                         "input1": self.inputs[0].check_yaml,
                         "input2": self.inputs[0].check_yaml,
                     },
+                    fixes=[
+                        {
+                            "type": "add_feature",
+                            "thing": "A rule so " + self.describe(),
+                        }
+                    ],
                 )
             return
         # We are looking for a specific difference
@@ -95,6 +100,12 @@ def execute(self, checker) -> None:
                         "input1": self.inputs[0].check_yaml,
                         "input2": self.inputs[0].check_yaml,
                     },
+                    fixes=[
+                        {
+                            "type": "add_feature",
+                            "thing": "A rule such " + self.describe(),
+                        }
+                    ],
                 )
                 return
             glyphs.append((buffer[glyph_ix][0].codepoint, buffer[glyph_ix][1]))
@@ -115,4 +126,10 @@ def execute(self, checker) -> None:
                     "input1": self.inputs[0].check_yaml,
                     "input2": self.inputs[0].check_yaml,
                 },
+                fixes=[
+                    {
+                        "type": "add_feature",
+                        "thing": "A rule such " + self.describe(),
+                    }
+                ],
             )
diff --git a/Lib/shaperglot/checks/unencoded_variants.py b/Lib/shaperglot/checks/unencoded_variants.py
index 240580c..1004ace 100644
--- a/Lib/shaperglot/checks/unencoded_variants.py
+++ b/Lib/shaperglot/checks/unencoded_variants.py
@@ -10,32 +10,58 @@ class UnencodedVariantsCheck(ShaperglotCheck):
     )
 
     def describe(self) -> str:
-        return f"that, when {self.input.describe()}, an unencoded variant glyph is substituted used the `locl` feature"
+        return (
+            f"that, when {self.input.describe()}, an unencoded variant glyph "
+            "is substituted used the `locl` feature"
+        )
 
     def execute(self, checker) -> None:
         if len(self.input.text) > 1:
             raise ValueError(
-                f"Please only pass one codepoint at a time to the unencoded variants check (not '{self.input.text}')"
+                "Please only pass one codepoint at a time to the unencoded "
+                f"variants check (not '{self.input.text}')"
             )
         self.input.features["locl"] = False
         buffer = self.input.shape(checker)
+        if buffer.glyph_infos[0].codepoint == 0:
+            checker.results.fail(
+                check_name="unencoded-variants",
+                result_code="notdef-produced",
+                message="Shaper produced a .notdef",
+                context={"text": self.input.check_yaml},
+                fixes=[
+                    {
+                        "type": "add_codepoint",
+                        "thing": self.input.text[buffer.glyph_infos[0].cluster],
+                    },
+                ],
+            )
+            return
         glyphname = checker.glyphorder[buffer.glyph_infos[0].codepoint]
         # Are there variant versions of this glyph?
         variants = [
             glyph for glyph in checker.glyphorder if glyph.startswith(glyphname + ".")
         ]
+
+        if not self.input.language:
+            self.input.language = checker.lang["language"]
+
         if not variants:
             checker.results.warn(
                 check_name="unencoded-variants",
                 result_code="no-variant",
                 message="No variant glyphs were found for " + glyphname,
                 context={"text": self.input.check_yaml, "glyph": glyphname},
+                fixes=[
+                    {
+                        "type": "add_glyph",
+                        "thing": glyphname + ".locl" + self.input.language.upper(),
+                    }
+                ],
             )
             return
         # Try it again with locl on, set the language to the one we're
         # looking for see if something happens.
-        if not self.input.language:
-            self.input.language = checker.lang["language"]
         self.input.features["locl"] = True
         buffer2 = self.input.shape(checker)
         glyphname2 = checker.glyphorder[buffer2.glyph_infos[0].codepoint]
@@ -45,6 +71,12 @@ def execute(self, checker) -> None:
                 result_code="unchanged-after-locl",
                 message=f"The locl feature did not affect {glyphname}",
                 context={"text": self.input.check_yaml, "glyph": glyphname},
+                fixes=[
+                    {
+                        "type": "add_feature",
+                        "thing": f"a locale rule to substitute {glyphname} with a variant glyph",
+                    }
+                ],
             )
         else:
             checker.results.okay(
diff --git a/Lib/shaperglot/cli/__init__.py b/Lib/shaperglot/cli/__init__.py
index bfa732e..332a187 100644
--- a/Lib/shaperglot/cli/__init__.py
+++ b/Lib/shaperglot/cli/__init__.py
@@ -22,6 +22,12 @@ def main(args=None) -> None:
 
     parser_check = subparsers.add_parser('check', help=check.__doc__)
     parser_check.add_argument('--verbose', '-v', action='count')
+    parser_check.add_argument(
+        '--nearly',
+        type=int,
+        help="Number of fixes left to be considered nearly supported",
+        default=5,
+    )
     parser_check.add_argument('font', metavar='FONT', help='the font file')
     parser_check.add_argument(
         'lang', metavar='LANG', help='one or more ISO639-3 language codes', nargs="+"
@@ -31,6 +37,12 @@ def main(args=None) -> None:
     parser_report = subparsers.add_parser('report', help=report.__doc__)
     parser_report.add_argument('font', metavar='FONT', help='the font file')
     parser_report.add_argument('--verbose', '-v', action='count')
+    parser_report.add_argument(
+        '--nearly',
+        type=int,
+        help="Number of fixes left to be considered nearly supported",
+        default=5,
+    )
     parser_report.add_argument('--csv', action='store_true', help="Output as CSV")
     parser_report.add_argument(
         '--group', action='store_true', help="Group by success/failure"
diff --git a/Lib/shaperglot/cli/check.py b/Lib/shaperglot/cli/check.py
index 6684c10..73a40c5 100644
--- a/Lib/shaperglot/cli/check.py
+++ b/Lib/shaperglot/cli/check.py
@@ -1,8 +1,12 @@
+from collections import defaultdict
+from typing import Optional
+
 from shaperglot.checker import Checker
 from shaperglot.languages import Languages
+from shaperglot.reporter import Reporter
 
 
-def find_lang(lang, langs):
+def find_lang(lang: str, langs: Languages) -> Optional[str]:
     # Find the language in the languages list; could be by ID, by name, etc.
     if lang in langs:
         return lang
@@ -15,12 +19,14 @@ def find_lang(lang, langs):
             or lang_info.get("autonym", "").lower() == lang.lower()
         ):
             return lang_id
+    return None
 
 
 def check(options) -> None:
     """Check a particular language or languages are supported"""
     checker = Checker(options.font)
     langs = Languages()
+    fixes_needed = defaultdict(set)
     for orig_lang in options.lang:
         lang = find_lang(orig_lang, langs)
         if not lang:
@@ -31,6 +37,10 @@ def check(options) -> None:
 
         if results.is_unknown:
             print(f"Cannot determine whether font supports language '{lang}'")
+        elif results.is_nearly_success(options.nearly):
+            print(f"Font nearly supports language '{lang}'")
+            for fixtype, things in results.unique_fixes().items():
+                fixes_needed[fixtype].update(things)
         elif results.is_success:
             print(f"Font supports language '{lang}'")
         else:
@@ -42,3 +52,14 @@ def check(options) -> None:
         elif options.verbose or not results.is_success:
             for message in results.fails:
                 print(f" * {message}")
+
+    if fixes_needed:
+        show_how_to_fix(fixes_needed)
+
+
+def show_how_to_fix(fixes: Reporter):
+    print("\nTo add full support to nearly-supported languages:")
+    for category, fixes in fixes.items():
+        plural = "s" if len(fixes) > 1 else ""
+        print(f" * {category.replace("_", ' ').capitalize()}{plural}: ", end="")
+        print("; ".join(sorted(fixes)))
diff --git a/Lib/shaperglot/cli/report.py b/Lib/shaperglot/cli/report.py
index 22d8a0c..fdbb4fb 100644
--- a/Lib/shaperglot/cli/report.py
+++ b/Lib/shaperglot/cli/report.py
@@ -1,3 +1,4 @@
+from collections import defaultdict
 import os
 import re
 from textwrap import fill
@@ -13,8 +14,10 @@ def report(options) -> None:
     checker = Checker(options.font)
     langs = Languages()
     messages = []
+    nearly = []
     supported = []
     unsupported = []
+    fixes_needed = defaultdict(set)
 
     if options.csv:
         print(
@@ -36,18 +39,26 @@ def report(options) -> None:
         if results.is_success:
             supported.append(lang)
             msg = "supports"
+        elif results.is_nearly_success(options.nearly):
+            nearly.append(lang)
+            msg = "nearly supports"
         else:
             unsupported.append(lang)
             msg = "does not fully support"
+
+        for fixtype, things in results.unique_fixes().items():
+            fixes_needed[fixtype].update(things)
         if options.group:
             continue
         print(f"Font {msg} language '{lang}' ({langs[lang]['name']})")
 
-        messages.extend(results)
         if options.verbose and options.verbose > 1:
             for status, message in results:
                 print(f" * {status.value}: {message}")
 
+    if options.csv:
+        return
+
     if options.group:
         if supported:
             print("Supported languages")
@@ -55,6 +66,12 @@ def report(options) -> None:
         for lang in supported:
             print(f"Font supports language '{lang}' ({langs[lang]['name']})")
 
+        if nearly:
+            print("\nNearly supported languages")
+            print("===================\n")
+        for lang in nearly:
+            print(f"Font nearly supports language '{lang}' ({langs[lang]['name']})")
+
         if unsupported:
             print("\nUnsupported languages")
             print("====================\n")
@@ -63,22 +80,21 @@ def report(options) -> None:
                 f"Font does not fully support language '{lang}' ({langs[lang]['name']})"
             )
     # Collate a useful fixing guide
-    if options.csv:
-        return
-
-    short_summary(supported, unsupported)
+    short_summary(supported, nearly, unsupported)
     if options.verbose:
-        long_summary(messages, unsupported)
+        long_summary(fixes_needed, unsupported)
 
 
-def short_summary(supported, unsupported) -> None:
+def short_summary(supported, nearly, unsupported) -> None:
     print("\n== Summary ==\n")
-    print(f"* {len(supported)+len(unsupported)} languages checked")
+    print(f"* {len(supported)+len(nearly)+len(unsupported)} languages checked")
     if supported:
         print(f"* {len(supported)} languages supported")
+    if nearly:
+        print(f"* {len(supported)} languages nearly supported")
 
 
-def long_summary(messages, unsupported) -> None:
+def long_summary(fixes_needed, unsupported) -> None:
     if unsupported:
         print(
             fill(
@@ -88,44 +104,11 @@ def long_summary(messages, unsupported) -> None:
             )
         )
         print("\nTo add support:")
-    missing_bases = set()
-    missing_marks = set()
-    missing_anchors = set()
-    for msg in messages:
-        if msg.result_code == "bases-missing":
-            missing_bases |= set(msg.context["glyphs"])
-        if msg.result_code == "marks-missing":
-            missing_marks |= set(msg.context["glyphs"])
-        if msg.result_code == "orphaned-mark":
-            missing_anchors.add((msg.context["base"], msg.context["mark"]))
-    if missing_marks:
-        print(
-            fill(
-                " * Add mark glyphs: "
-                + ", ".join(["\u25cc" + x for x in sorted(missing_marks)]),
-                subsequent_indent="    ",
-                width=os.get_terminal_size()[0] - 2,
-            )
-        )
-    if missing_bases:
-        print(
-            fill(
-                " * Support characters: " + ", ".join(sorted(missing_bases)),
-                subsequent_indent="    ",
-                width=os.get_terminal_size()[0] - 2,
-            )
-        )
-    if missing_anchors:
-        print(
-            fill(
-                " * Add anchor attachments: "
-                + ", ".join(
-                    [base + '/' + mark for base, mark in sorted(missing_anchors)]
-                ),
-                subsequent_indent="    ",
-                width=os.get_terminal_size()[0] - 2,
-            )
-        )
+    for category, fixes in fixes_needed.items():
+        plural = "s" if len(fixes) > 1 else ""
+        print(f" * {category.replace("_", ' ').capitalize()}{plural}: ")
+        for fix in sorted(fixes):
+            print("    - " + fix)
 
 
 def report_csv(langcode, lang, results: Iterable[Result]) -> None:
diff --git a/Lib/shaperglot/reporter.py b/Lib/shaperglot/reporter.py
index 89e55ca..4a9e4e8 100644
--- a/Lib/shaperglot/reporter.py
+++ b/Lib/shaperglot/reporter.py
@@ -1,6 +1,9 @@
+from collections import defaultdict
 from collections.abc import Sequence
 from dataclasses import dataclass, field
 from enum import Enum
+import json
+from typing import Dict
 
 from termcolor import colored
 
@@ -19,32 +22,40 @@ class Message:
     message: str
     result_code: str = "ok"
     context: dict = field(default_factory=dict)
+    fixes: list = field(default_factory=list)
 
     def __repr__(self) -> str:
         return self.result.value + ": " + self.message
 
+    def __hash__(self) -> int:
+        hsh = hash(self.check_name + self.message + self.result.value)
+        return hsh
+
+    def __eq__(self, other) -> bool:
+        return self.check_name == other.check_name and self.message == other.message
+
 
 class Reporter(Sequence):
     def __init__(self) -> None:
-        self.results = []
+        self.results = {}
 
     def __getitem__(self, index):
-        return self.results[index]
+        return list(self.results.keys())[index]
 
     def __len__(self) -> int:
-        return len(self.results)
+        return len(self.results.keys())
 
     def okay(self, **kwargs) -> None:
-        self.results.append(Message(result=Result.PASS, **kwargs))
+        self.results[Message(result=Result.PASS, **kwargs)] = 1
 
     def warn(self, **kwargs) -> None:
-        self.results.append(Message(result=Result.WARN, **kwargs))
+        self.results[Message(result=Result.WARN, **kwargs)] = 1
 
     def fail(self, **kwargs) -> None:
-        self.results.append(Message(result=Result.FAIL, **kwargs))
+        self.results[Message(result=Result.FAIL, **kwargs)] = 1
 
     def skip(self, **kwargs) -> None:
-        self.results.append(Message(result=Result.SKIP, **kwargs))
+        self.results[Message(result=Result.SKIP, **kwargs)] = 1
 
     @property
     def is_unknown(self) -> bool:
@@ -54,6 +65,9 @@ def is_unknown(self) -> bool:
     def is_success(self) -> bool:
         return len(self.passes) > 0 and not self.fails
 
+    def is_nearly_success(self, nearly: int) -> bool:
+        return not self.is_success and self.count_fixes <= nearly
+
     @property
     def passes(self) -> list:
         return [x for x in self.results if x.result == Result.PASS]
@@ -65,3 +79,19 @@ def fails(self) -> list:
     @property
     def warns(self) -> list:
         return [x for x in self.results if x.result == Result.WARN]
+
+    def unique_fixes(self) -> Dict[str, list[str]]:
+        fixes = defaultdict(list)
+        for result in self.results:
+            for fix in result.fixes:
+                fixtype = fix["type"]
+                fixthing = fix["thing"]
+                if fixthing not in fixes[fixtype]:
+                    fixes[fixtype].append(fixthing)
+        return fixes
+
+    @property
+    def count_fixes(self) -> int:
+        """Return the number of fixes required to pass all checks"""
+        fixes = self.unique_fixes()
+        return sum([len(stuff) for stuff in fixes.values()])