From d69513e3a0fc83fa4589ab485c1d3fcb9408fd1b Mon Sep 17 00:00:00 2001 From: Simon Cozens Date: Mon, 2 Sep 2024 17:41:34 +0100 Subject: [PATCH] Describe how to fix; show unique fails/fixes; report nearly-supported languages --- Lib/shaperglot/checks/no_orphaned_marks.py | 32 +++++++-- Lib/shaperglot/checks/orthographies.py | 28 +++++++- Lib/shaperglot/checks/shaping_differs.py | 29 ++++++-- Lib/shaperglot/checks/unencoded_variants.py | 40 +++++++++-- Lib/shaperglot/cli/__init__.py | 12 ++++ Lib/shaperglot/cli/check.py | 23 +++++- Lib/shaperglot/cli/report.py | 77 ++++++++------------- Lib/shaperglot/reporter.py | 44 ++++++++++-- 8 files changed, 210 insertions(+), 75 deletions(-) diff --git a/Lib/shaperglot/checks/no_orphaned_marks.py b/Lib/shaperglot/checks/no_orphaned_marks.py index e443ac2..4bd05be 100644 --- a/Lib/shaperglot/checks/no_orphaned_marks.py +++ b/Lib/shaperglot/checks/no_orphaned_marks.py @@ -47,13 +47,18 @@ def execute(self, checker) -> None: # good to hear specifically what glyphs we tried to test here. if not checker.lang.get("exemplarChars"): message += " when shaping " + self.input.describe() - - checker.results.fail( - check_name="no-orphaned-marks", - result_code="notdef-produced", - message=message, - context={"text": self.input.check_yaml}, - ) + checker.results.fail( + check_name="no-orphaned-marks", + result_code="notdef-produced", + message=message, + context={"text": self.input.check_yaml}, + fixes=[ + { + "type": "add_codepoint", + "thing": self.input.text[info.cluster], + }, + ], + ) break if _simple_mark_check(checker.codepoint_for(glyphname)): # Was the previous glyph dotted circle? @@ -65,6 +70,13 @@ def execute(self, checker) -> None: message="Shaper produced a dotted circle when shaping " + self.input.describe(), context={"text": self.input.check_yaml}, + fixes=[ + { + "type": "add_feature", + "thing": "to avoid a dotted circle shaping " + + self.input.describe(), + }, + ], ) elif pos.x_offset == 0 and pos.y_offset == 0: # Suspicious passed = False @@ -77,6 +89,12 @@ def execute(self, checker) -> None: "mark": glyphname, "base": previous, }, + fixes=[ + { + "type": "add_anchor", + "thing": f"{previous}/{glyphname}", + }, + ], ) previous = glyphname if passed: diff --git a/Lib/shaperglot/checks/orthographies.py b/Lib/shaperglot/checks/orthographies.py index cf57e79..0ffd014 100644 --- a/Lib/shaperglot/checks/orthographies.py +++ b/Lib/shaperglot/checks/orthographies.py @@ -1,4 +1,5 @@ import re +from typing import Optional from strictyaml import Map @@ -33,10 +34,10 @@ def __init__(self, lang) -> None: self.bases = set(bases) - self.marks self.aux = set(aux) - self.marks - def should_skip(self, checker) -> bool: - return False + def should_skip(self, _checker) -> Optional[str]: + return - def describe(self): + def describe(self) -> str: return "that the following glyphs are in the font: " + and_join( f"'{g}'" for g in self.all_glyphs ) @@ -56,6 +57,13 @@ def execute(self, checker) -> None: result_code="bases-missing", message=f"Some base glyphs were missing: {', '.join(missing)}", context={"glyphs": missing}, + fixes=[ + { + "type": "add_codepoint", + "thing": g, + } + for g in missing + ], ) else: checker.results.okay( @@ -74,6 +82,13 @@ def execute(self, checker) -> None: result_code="marks-missing", message=f"Some mark glyphs were missing: {missing_str}", context={"glyphs": missing}, + fixes=[ + { + "type": "add_codepoint", + "thing": g, + } + for g in missing + ], ) else: checker.results.okay( @@ -91,6 +106,13 @@ def execute(self, checker) -> None: result_code="aux-missing", message=f"Some auxiliary glyphs were missing: {', '.join(missing)}", context={"glyphs": missing}, + fixes=[ + { + "type": "add_codepoint", + "thing": g, + } + for g in missing + ], ) else: checker.results.okay( diff --git a/Lib/shaperglot/checks/shaping_differs.py b/Lib/shaperglot/checks/shaping_differs.py index 22d5f2e..a1e695a 100644 --- a/Lib/shaperglot/checks/shaping_differs.py +++ b/Lib/shaperglot/checks/shaping_differs.py @@ -41,18 +41,17 @@ def execute(self, checker) -> None: # won't mean anything reported_missing = set() for result in checker.results: - if ( - result.result_code == "bases-missing" - or result.result_code == "marks-missing" - ): + if result.result_code in ["bases-missing", "marks-missing"]: reported_missing.update(result.context["glyphs"]) for text in self.inputs: missing_glyphs = [f"'{g}'" for g in text.text if g in reported_missing] if missing_glyphs: checker.results.skip( check_name="shaping-differs", - message="Differs check could not run because some characters (%s) were missing from the font." - % ", ".join(missing_glyphs), + message=( + "Differs check could not run because some characters" + f" ({', '.join(missing_glyphs)}) were missing from the font." + ), ) return @@ -77,6 +76,12 @@ def execute(self, checker) -> None: "input1": self.inputs[0].check_yaml, "input2": self.inputs[0].check_yaml, }, + fixes=[ + { + "type": "add_feature", + "thing": "A rule so " + self.describe(), + } + ], ) return # We are looking for a specific difference @@ -95,6 +100,12 @@ def execute(self, checker) -> None: "input1": self.inputs[0].check_yaml, "input2": self.inputs[0].check_yaml, }, + fixes=[ + { + "type": "add_feature", + "thing": "A rule such " + self.describe(), + } + ], ) return glyphs.append((buffer[glyph_ix][0].codepoint, buffer[glyph_ix][1])) @@ -115,4 +126,10 @@ def execute(self, checker) -> None: "input1": self.inputs[0].check_yaml, "input2": self.inputs[0].check_yaml, }, + fixes=[ + { + "type": "add_feature", + "thing": "A rule such " + self.describe(), + } + ], ) diff --git a/Lib/shaperglot/checks/unencoded_variants.py b/Lib/shaperglot/checks/unencoded_variants.py index 240580c..1004ace 100644 --- a/Lib/shaperglot/checks/unencoded_variants.py +++ b/Lib/shaperglot/checks/unencoded_variants.py @@ -10,32 +10,58 @@ class UnencodedVariantsCheck(ShaperglotCheck): ) def describe(self) -> str: - return f"that, when {self.input.describe()}, an unencoded variant glyph is substituted used the `locl` feature" + return ( + f"that, when {self.input.describe()}, an unencoded variant glyph " + "is substituted used the `locl` feature" + ) def execute(self, checker) -> None: if len(self.input.text) > 1: raise ValueError( - f"Please only pass one codepoint at a time to the unencoded variants check (not '{self.input.text}')" + "Please only pass one codepoint at a time to the unencoded " + f"variants check (not '{self.input.text}')" ) self.input.features["locl"] = False buffer = self.input.shape(checker) + if buffer.glyph_infos[0].codepoint == 0: + checker.results.fail( + check_name="unencoded-variants", + result_code="notdef-produced", + message="Shaper produced a .notdef", + context={"text": self.input.check_yaml}, + fixes=[ + { + "type": "add_codepoint", + "thing": self.input.text[buffer.glyph_infos[0].cluster], + }, + ], + ) + return glyphname = checker.glyphorder[buffer.glyph_infos[0].codepoint] # Are there variant versions of this glyph? variants = [ glyph for glyph in checker.glyphorder if glyph.startswith(glyphname + ".") ] + + if not self.input.language: + self.input.language = checker.lang["language"] + if not variants: checker.results.warn( check_name="unencoded-variants", result_code="no-variant", message="No variant glyphs were found for " + glyphname, context={"text": self.input.check_yaml, "glyph": glyphname}, + fixes=[ + { + "type": "add_glyph", + "thing": glyphname + ".locl" + self.input.language.upper(), + } + ], ) return # Try it again with locl on, set the language to the one we're # looking for see if something happens. - if not self.input.language: - self.input.language = checker.lang["language"] self.input.features["locl"] = True buffer2 = self.input.shape(checker) glyphname2 = checker.glyphorder[buffer2.glyph_infos[0].codepoint] @@ -45,6 +71,12 @@ def execute(self, checker) -> None: result_code="unchanged-after-locl", message=f"The locl feature did not affect {glyphname}", context={"text": self.input.check_yaml, "glyph": glyphname}, + fixes=[ + { + "type": "add_feature", + "thing": f"a locale rule to substitute {glyphname} with a variant glyph", + } + ], ) else: checker.results.okay( diff --git a/Lib/shaperglot/cli/__init__.py b/Lib/shaperglot/cli/__init__.py index bfa732e..332a187 100644 --- a/Lib/shaperglot/cli/__init__.py +++ b/Lib/shaperglot/cli/__init__.py @@ -22,6 +22,12 @@ def main(args=None) -> None: parser_check = subparsers.add_parser('check', help=check.__doc__) parser_check.add_argument('--verbose', '-v', action='count') + parser_check.add_argument( + '--nearly', + type=int, + help="Number of fixes left to be considered nearly supported", + default=5, + ) parser_check.add_argument('font', metavar='FONT', help='the font file') parser_check.add_argument( 'lang', metavar='LANG', help='one or more ISO639-3 language codes', nargs="+" @@ -31,6 +37,12 @@ def main(args=None) -> None: parser_report = subparsers.add_parser('report', help=report.__doc__) parser_report.add_argument('font', metavar='FONT', help='the font file') parser_report.add_argument('--verbose', '-v', action='count') + parser_report.add_argument( + '--nearly', + type=int, + help="Number of fixes left to be considered nearly supported", + default=5, + ) parser_report.add_argument('--csv', action='store_true', help="Output as CSV") parser_report.add_argument( '--group', action='store_true', help="Group by success/failure" diff --git a/Lib/shaperglot/cli/check.py b/Lib/shaperglot/cli/check.py index 6684c10..73a40c5 100644 --- a/Lib/shaperglot/cli/check.py +++ b/Lib/shaperglot/cli/check.py @@ -1,8 +1,12 @@ +from collections import defaultdict +from typing import Optional + from shaperglot.checker import Checker from shaperglot.languages import Languages +from shaperglot.reporter import Reporter -def find_lang(lang, langs): +def find_lang(lang: str, langs: Languages) -> Optional[str]: # Find the language in the languages list; could be by ID, by name, etc. if lang in langs: return lang @@ -15,12 +19,14 @@ def find_lang(lang, langs): or lang_info.get("autonym", "").lower() == lang.lower() ): return lang_id + return None def check(options) -> None: """Check a particular language or languages are supported""" checker = Checker(options.font) langs = Languages() + fixes_needed = defaultdict(set) for orig_lang in options.lang: lang = find_lang(orig_lang, langs) if not lang: @@ -31,6 +37,10 @@ def check(options) -> None: if results.is_unknown: print(f"Cannot determine whether font supports language '{lang}'") + elif results.is_nearly_success(options.nearly): + print(f"Font nearly supports language '{lang}'") + for fixtype, things in results.unique_fixes().items(): + fixes_needed[fixtype].update(things) elif results.is_success: print(f"Font supports language '{lang}'") else: @@ -42,3 +52,14 @@ def check(options) -> None: elif options.verbose or not results.is_success: for message in results.fails: print(f" * {message}") + + if fixes_needed: + show_how_to_fix(fixes_needed) + + +def show_how_to_fix(fixes: Reporter): + print("\nTo add full support to nearly-supported languages:") + for category, fixes in fixes.items(): + plural = "s" if len(fixes) > 1 else "" + print(f" * {category.replace("_", ' ').capitalize()}{plural}: ", end="") + print("; ".join(sorted(fixes))) diff --git a/Lib/shaperglot/cli/report.py b/Lib/shaperglot/cli/report.py index 22d8a0c..fdbb4fb 100644 --- a/Lib/shaperglot/cli/report.py +++ b/Lib/shaperglot/cli/report.py @@ -1,3 +1,4 @@ +from collections import defaultdict import os import re from textwrap import fill @@ -13,8 +14,10 @@ def report(options) -> None: checker = Checker(options.font) langs = Languages() messages = [] + nearly = [] supported = [] unsupported = [] + fixes_needed = defaultdict(set) if options.csv: print( @@ -36,18 +39,26 @@ def report(options) -> None: if results.is_success: supported.append(lang) msg = "supports" + elif results.is_nearly_success(options.nearly): + nearly.append(lang) + msg = "nearly supports" else: unsupported.append(lang) msg = "does not fully support" + + for fixtype, things in results.unique_fixes().items(): + fixes_needed[fixtype].update(things) if options.group: continue print(f"Font {msg} language '{lang}' ({langs[lang]['name']})") - messages.extend(results) if options.verbose and options.verbose > 1: for status, message in results: print(f" * {status.value}: {message}") + if options.csv: + return + if options.group: if supported: print("Supported languages") @@ -55,6 +66,12 @@ def report(options) -> None: for lang in supported: print(f"Font supports language '{lang}' ({langs[lang]['name']})") + if nearly: + print("\nNearly supported languages") + print("===================\n") + for lang in nearly: + print(f"Font nearly supports language '{lang}' ({langs[lang]['name']})") + if unsupported: print("\nUnsupported languages") print("====================\n") @@ -63,22 +80,21 @@ def report(options) -> None: f"Font does not fully support language '{lang}' ({langs[lang]['name']})" ) # Collate a useful fixing guide - if options.csv: - return - - short_summary(supported, unsupported) + short_summary(supported, nearly, unsupported) if options.verbose: - long_summary(messages, unsupported) + long_summary(fixes_needed, unsupported) -def short_summary(supported, unsupported) -> None: +def short_summary(supported, nearly, unsupported) -> None: print("\n== Summary ==\n") - print(f"* {len(supported)+len(unsupported)} languages checked") + print(f"* {len(supported)+len(nearly)+len(unsupported)} languages checked") if supported: print(f"* {len(supported)} languages supported") + if nearly: + print(f"* {len(supported)} languages nearly supported") -def long_summary(messages, unsupported) -> None: +def long_summary(fixes_needed, unsupported) -> None: if unsupported: print( fill( @@ -88,44 +104,11 @@ def long_summary(messages, unsupported) -> None: ) ) print("\nTo add support:") - missing_bases = set() - missing_marks = set() - missing_anchors = set() - for msg in messages: - if msg.result_code == "bases-missing": - missing_bases |= set(msg.context["glyphs"]) - if msg.result_code == "marks-missing": - missing_marks |= set(msg.context["glyphs"]) - if msg.result_code == "orphaned-mark": - missing_anchors.add((msg.context["base"], msg.context["mark"])) - if missing_marks: - print( - fill( - " * Add mark glyphs: " - + ", ".join(["\u25cc" + x for x in sorted(missing_marks)]), - subsequent_indent=" ", - width=os.get_terminal_size()[0] - 2, - ) - ) - if missing_bases: - print( - fill( - " * Support characters: " + ", ".join(sorted(missing_bases)), - subsequent_indent=" ", - width=os.get_terminal_size()[0] - 2, - ) - ) - if missing_anchors: - print( - fill( - " * Add anchor attachments: " - + ", ".join( - [base + '/' + mark for base, mark in sorted(missing_anchors)] - ), - subsequent_indent=" ", - width=os.get_terminal_size()[0] - 2, - ) - ) + for category, fixes in fixes_needed.items(): + plural = "s" if len(fixes) > 1 else "" + print(f" * {category.replace("_", ' ').capitalize()}{plural}: ") + for fix in sorted(fixes): + print(" - " + fix) def report_csv(langcode, lang, results: Iterable[Result]) -> None: diff --git a/Lib/shaperglot/reporter.py b/Lib/shaperglot/reporter.py index 89e55ca..4a9e4e8 100644 --- a/Lib/shaperglot/reporter.py +++ b/Lib/shaperglot/reporter.py @@ -1,6 +1,9 @@ +from collections import defaultdict from collections.abc import Sequence from dataclasses import dataclass, field from enum import Enum +import json +from typing import Dict from termcolor import colored @@ -19,32 +22,40 @@ class Message: message: str result_code: str = "ok" context: dict = field(default_factory=dict) + fixes: list = field(default_factory=list) def __repr__(self) -> str: return self.result.value + ": " + self.message + def __hash__(self) -> int: + hsh = hash(self.check_name + self.message + self.result.value) + return hsh + + def __eq__(self, other) -> bool: + return self.check_name == other.check_name and self.message == other.message + class Reporter(Sequence): def __init__(self) -> None: - self.results = [] + self.results = {} def __getitem__(self, index): - return self.results[index] + return list(self.results.keys())[index] def __len__(self) -> int: - return len(self.results) + return len(self.results.keys()) def okay(self, **kwargs) -> None: - self.results.append(Message(result=Result.PASS, **kwargs)) + self.results[Message(result=Result.PASS, **kwargs)] = 1 def warn(self, **kwargs) -> None: - self.results.append(Message(result=Result.WARN, **kwargs)) + self.results[Message(result=Result.WARN, **kwargs)] = 1 def fail(self, **kwargs) -> None: - self.results.append(Message(result=Result.FAIL, **kwargs)) + self.results[Message(result=Result.FAIL, **kwargs)] = 1 def skip(self, **kwargs) -> None: - self.results.append(Message(result=Result.SKIP, **kwargs)) + self.results[Message(result=Result.SKIP, **kwargs)] = 1 @property def is_unknown(self) -> bool: @@ -54,6 +65,9 @@ def is_unknown(self) -> bool: def is_success(self) -> bool: return len(self.passes) > 0 and not self.fails + def is_nearly_success(self, nearly: int) -> bool: + return not self.is_success and self.count_fixes <= nearly + @property def passes(self) -> list: return [x for x in self.results if x.result == Result.PASS] @@ -65,3 +79,19 @@ def fails(self) -> list: @property def warns(self) -> list: return [x for x in self.results if x.result == Result.WARN] + + def unique_fixes(self) -> Dict[str, list[str]]: + fixes = defaultdict(list) + for result in self.results: + for fix in result.fixes: + fixtype = fix["type"] + fixthing = fix["thing"] + if fixthing not in fixes[fixtype]: + fixes[fixtype].append(fixthing) + return fixes + + @property + def count_fixes(self) -> int: + """Return the number of fixes required to pass all checks""" + fixes = self.unique_fixes() + return sum([len(stuff) for stuff in fixes.values()])