diff --git a/Lib/ufo2ft/constants.py b/Lib/ufo2ft/constants.py index de033e5d8..0c8327fd4 100644 --- a/Lib/ufo2ft/constants.py +++ b/Lib/ufo2ft/constants.py @@ -38,6 +38,8 @@ UNICODE_VARIATION_SEQUENCES_KEY = "public.unicodeVariationSequences" +COMMON_SCRIPT = "Zyyy" + INDIC_SCRIPTS = [ "Beng", # Bengali "Cham", # Cham diff --git a/Lib/ufo2ft/featureCompiler.py b/Lib/ufo2ft/featureCompiler.py index 7b5b94601..b5e589b95 100644 --- a/Lib/ufo2ft/featureCompiler.py +++ b/Lib/ufo2ft/featureCompiler.py @@ -281,8 +281,14 @@ def setupFeatures(self): if self.featureWriters: featureFile = parseLayoutFeatures(self.ufo, self.feaIncludeDir) + path = self.ufo.path for writer in self.featureWriters: - writer.write(self.ufo, featureFile, compiler=self) + try: + writer.write(self.ufo, featureFile, compiler=self) + except FeatureLibError: + if path is None: + self._write_temporary_feature_file(featureFile.asFea()) + raise # stringify AST to get correct line numbers in error messages self.features = featureFile.asFea() @@ -316,15 +322,16 @@ def buildTables(self): addOpenTypeFeaturesFromString(self.ttFont, self.features, filename=path) except FeatureLibError: if path is None: - # if compilation fails, create temporary file for inspection - data = self.features.encode("utf-8") - with NamedTemporaryFile(delete=False) as tmp: - tmp.write(data) - logger.error( - "Compilation failed! Inspect temporary file: %r", tmp.name - ) + self._write_temporary_feature_file(self.features) raise + def _write_temporary_feature_file(self, features: str) -> None: + # if compilation fails, create temporary file for inspection + data = features.encode("utf-8") + with NamedTemporaryFile(delete=False) as tmp: + tmp.write(data) + logger.error("Compilation failed! Inspect temporary file: %r", tmp.name) + class MtiFeatureCompiler(BaseFeatureCompiler): """Compile OpenType layout tables from MTI feature files using diff --git a/Lib/ufo2ft/featureWriters/baseFeatureWriter.py b/Lib/ufo2ft/featureWriters/baseFeatureWriter.py index 3a1bfbbef..8e83d0a71 100644 --- a/Lib/ufo2ft/featureWriters/baseFeatureWriter.py +++ b/Lib/ufo2ft/featureWriters/baseFeatureWriter.py @@ -2,6 +2,8 @@ from collections import OrderedDict, namedtuple from types import SimpleNamespace +from fontTools import unicodedata + from ufo2ft.constants import OPENTYPE_CATEGORIES_KEY from ufo2ft.errors import InvalidFeaturesData from ufo2ft.featureWriters import ast @@ -122,8 +124,12 @@ def shouldContinue(self): def write(self, font, feaFile, compiler=None): """Write features and class definitions for this font to a feaLib FeatureFile object. - Returns True if feature file was modified, False if no new features - were generated. + + The main entry point for the FeatureCompiler to any of the + FeatureWriters. + + Returns True if feature file was modified, False if no new features were + generated. """ self.setContext(font, feaFile, compiler=compiler) try: @@ -386,3 +392,34 @@ def getGDEFGlyphClasses(self): frozenset(marks), frozenset(components), ) + + def guessFontScripts(self): + """Returns a set of scripts the font probably supports. + + This is done by: + + 1. Looking at all defined codepoints in a font and remembering the + script of any of the codepoints if it is associated with just one + script. This would remember the script of U+0780 THAANA LETTER HAA + (Thaa) but not U+061F ARABIC QUESTION MARK (multiple scripts). + 2. Adding explicitly declared `languagesystem` scripts on top. + """ + font = self.context.font + glyphSet = self.context.glyphSet + feaFile = self.context.feaFile + single_scripts = set() + + # First, detect scripts from the codepoints. + for glyph in font: + if glyph.name not in glyphSet or glyph.unicodes is None: + continue + for codepoint in glyph.unicodes: + scripts = unicodedata.script_extension(chr(codepoint)) + if len(scripts) == 1: + single_scripts.update(scripts) + + # Then, add explicitly declared languagesystems on top. + feaScripts = ast.getScriptLanguageSystems(feaFile) + single_scripts.update(feaScripts.keys()) + + return single_scripts diff --git a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py index a071eff05..bc71ee422 100644 --- a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py +++ b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py @@ -1,10 +1,19 @@ +from __future__ import annotations + +import itertools +import logging +from dataclasses import dataclass from types import SimpleNamespace +from typing import Iterator, Mapping from fontTools import unicodedata +from fontTools.unicodedata import script_horizontal_direction -from ufo2ft.constants import INDIC_SCRIPTS, USE_SCRIPTS +from ufo2ft.constants import COMMON_SCRIPT, INDIC_SCRIPTS, USE_SCRIPTS from ufo2ft.featureWriters import BaseFeatureWriter, ast -from ufo2ft.util import classifyGlyphs, quantize, unicodeScriptDirection +from ufo2ft.util import DFLT_SCRIPTS, classifyGlyphs, quantize + +LOGGER = logging.getLogger(__name__) SIDE1_PREFIX = "public.kern1." SIDE2_PREFIX = "public.kern2." @@ -20,6 +29,9 @@ RTL_BIDI_TYPES = {"R", "AL"} LTR_BIDI_TYPES = {"L", "AN", "EN"} +AMBIGUOUS_BIDIS = {"R", "L"} +COMMON_SCRIPTS_SET = {COMMON_SCRIPT} +COMMON_CLASS_NAME = "Default" def unicodeBidiType(uv): @@ -36,73 +48,142 @@ def unicodeBidiType(uv): return None +@dataclass(frozen=True, order=False) class KerningPair: + __slots__ = ("side1", "side2", "value") - __slots__ = ("side1", "side2", "value", "directions", "bidiTypes") - - def __init__(self, side1, side2, value, directions=None, bidiTypes=None): - if isinstance(side1, str): - self.side1 = ast.GlyphName(side1) - elif isinstance(side1, ast.GlyphClassDefinition): - self.side1 = ast.GlyphClassName(side1) - else: - raise AssertionError(side1) + side1: str | tuple[str, ...] + side2: str | tuple[str, ...] + value: float - if isinstance(side2, str): - self.side2 = ast.GlyphName(side2) - elif isinstance(side2, ast.GlyphClassDefinition): - self.side2 = ast.GlyphClassName(side2) - else: - raise AssertionError(side2) + def __lt__(self, other: KerningPair) -> bool: + if not isinstance(other, KerningPair): + return NotImplemented - self.value = value - self.directions = directions or set() - self.bidiTypes = bidiTypes or set() + # Sort Kerning pairs so that glyph to glyph comes first, then glyph to + # class, class to glyph, and finally class to class. This makes "kerning + # exceptions" work, where more specific glyph pair values override less + # specific class kerning. NOTE: Since comparisons terminate early, this + # is never going to compare a str to a tuple. + selfTuple = (self.firstIsClass, self.secondIsClass, self.side1, self.side2) + otherTuple = (other.firstIsClass, other.secondIsClass, other.side1, other.side2) + return selfTuple < otherTuple @property - def firstIsClass(self): - return isinstance(self.side1, ast.GlyphClassName) + def firstIsClass(self) -> bool: + return isinstance(self.side1, tuple) @property - def secondIsClass(self): - return isinstance(self.side2, ast.GlyphClassName) + def secondIsClass(self) -> bool: + return isinstance(self.side2, tuple) @property - def glyphs(self): - if self.firstIsClass: - classDef1 = self.side1.glyphclass - glyphs1 = {g.asFea() for g in classDef1.glyphSet()} + def firstGlyphs(self) -> tuple[str, ...]: + if isinstance(self.side1, tuple): + return self.side1 else: - glyphs1 = {self.side1.asFea()} - if self.secondIsClass: - classDef2 = self.side2.glyphclass - glyphs2 = {g.asFea() for g in classDef2.glyphSet()} + return (self.side1,) + + @property + def secondGlyphs(self) -> tuple[str, ...]: + if isinstance(self.side2, tuple): + return self.side2 else: - glyphs2 = {self.side2.asFea()} - return glyphs1 | glyphs2 - - def __repr__(self): - return "<{} {} {} {}{}{}>".format( - self.__class__.__name__, - self.side1, - self.side2, - self.value, - " %r" % self.directions if self.directions else "", - " %r" % self.bidiTypes if self.bidiTypes else "", - ) + return (self.side2,) + + @property + def glyphs(self) -> tuple[str, ...]: + return (*self.firstGlyphs, *self.secondGlyphs) class KernFeatureWriter(BaseFeatureWriter): """Generates a kerning feature based on groups and rules contained in an UFO's kerning data. - There are currently two possible writing modes: - 2) "skip" (default) will not write anything if the features are already present; - 1) "append" will add additional lookups to an existing feature, if present, - or it will add a new one at the end of all features. - If the `quantization` argument is given in the filter options, the resulting anchors are rounded to the nearest multiple of the quantization value. + + ## Implementation Notes + + The algorithm works like this: + + * Parse GDEF GlyphClassDefinition from UFO features.fea to get the set of + "Mark" glyphs (this will be used later to decide whether to add + ignoreMarks flag to kern lookups containing pairs between base and mark + glyphs). + * Get the ordered glyphset for the font, for filtering kerning groups and + kernings that reference unknown glyphs. + * Determine which scripts the kerning affects (read: "the font most probably + supports"), to know which lookups to generate later: + * First, determine the unambiguous script associations for each + (Unicoded) glyph in the glyphset, as in, glyphs that have a single + entry for their Unicode script extensions property; + * then, parse the `languagesystem` statements in the provided feature + file to add on top. + * Compile a Unicode cmap from the UFO and a GSUB table from the features so + far, so we can determine: + * the script (extensions) for each glyph in the glyphset, including + glyphs reachable via substitution, using the fontTools subsetter with + its `closure_glyphs` machinery; the scripts are cut down to the ones + we think the font supports; + * and the bidirectionality class, so we can later filter out kerning + pairs that would mix RTL and LTR glyphs, which will not occur in + applications. Unicode BiDi classes L, AN and EN are considered L, R + and AL are considered R. + * Get the kerning groups from the UFO and filter out glyphs not in the + glyphset and empty groups. Remember which group a glyph is a member of, + for kern1 and kern2, so we can later reconstruct per-script groups. + * Get the bare kerning pairs from the UFO, filtering out pairs with unknown + groups or glyphs not in the glyphset and (redundant) zero class-to-class + kernings and optionally quantizing kerning values. + * Start generating lookups. By default, the ignore marks flag is added to + each lookup. Kerning pairs that kern bases against marks or marks against + marks, according to the glyphs' GDEF category, then get split off into a + second lookup without the ignore marks flag. + * Go through all kerning pairs and split them up by script, to put them in + different lookups. This reduces the size of each lookup compared to + splitting by direction, as previously done. + * Partition the first and second side of a pair by script and emit only + those with the same script (e.g. `a` and `b` are both "Latn", `period` + and `period` are both "Default", but `a` and `a-cy` would mix "Latn" + and "Cyrl" and are dropped) or those that kern an explicit against a + "common" or "inherited" script, e.g. `a` and `period`. + * Glyphs can have multiple scripts assigned to them (legitimately, e.g. + U+0951 DEVANAGARI STRESS SIGN UDATTA, or for random reasons like + having both `sub h by h.sc` and `sub Etaprosgegrammeni by h.sc;`). + Only scripts that were determined earlier to be supported by the font + will be considered. Usually, we will emit pairs where both sides have + the same script and no splitting is necessary. The only mixed script + pairs we emit are common or inherited (Zyyy or Zinh) against explicit + (e.g. Latn) scripts. A glyph can be part of both for weird reasons, so + we always treat any glyph with a common or inherited script as a + purely common (not inherited) glyph for bucketing purposes. This + avoids creating overlapping groups with the multi-script glyph in a + lookup. + * Some glyphs may have a script of Zyyy or Zinh but have a disjoint set + of explicit scripts as their script extension. By looking only at the + script extension, we treat many of them as being part of an explicit + script rather than as a common or inherited glyph. + * Preserve the type of the kerning pair, so class-to-class kerning stays + that way, even when there's only one glyph on each side. + * Reconstruct kerning group names for the newly split classes. This is done + for debuggability; it makes no difference for the final font binary. + * This first looks at the common lookups and then all others, assigning + new group names are it goes. A class like `@kern1.A = [A A-cy + increment]` may be split up into `@kern1.Latn.A = [A]`, `@kern1.Cyrl.A + = [A-cy]` and `@kern1.Default.A = [increment]`. Note: If there is no + dedicated Default lookup, common glyph classes like `[period]` might + carry the name `@kern1.Grek.foo` if the class was first encountered + while going over the Grek lookup. + * Discard pairs that mix RTL and LTR BiDi types, because they won't show up + in applications due to how Unicode text is split into runs. + * Discard empty lookups, if they were created but all their pairs were + discarded. + * Make a `kern` (and potentially `dist`) feature block and register the + lookups for each script. Some scripts need to be registered in the `dist` + feature for some shapers to discover them, e.g. Yezi. + * Write the new glyph class definitions and then the lookups and feature + blocks to the feature file. """ tableTag = "GPOS" @@ -112,10 +193,25 @@ class KernFeatureWriter(BaseFeatureWriter): def setContext(self, font, feaFile, compiler=None): ctx = super().setContext(font, feaFile, compiler=compiler) ctx.gdefClasses = self.getGDEFGlyphClasses() - ctx.kerning = self.getKerningData(font, feaFile, self.getOrderedGlyphSet()) + ctx.glyphSet = self.getOrderedGlyphSet() + + # TODO: Also include substitution information from Designspace rules to + # correctly set the scripts of variable substitution glyphs, maybe add + # `glyphUnicodeMapping: dict[str, int] | None` to `BaseFeatureCompiler`? + cmap = self.makeUnicodeToGlyphNameMapping() + gsub = self.compileGSUB() + ctx.knownScripts = self.guessFontScripts() + scriptGlyphs = classifyGlyphs(self.knownScriptsPerCodepoint, cmap, gsub) + bidiGlyphs = classifyGlyphs(unicodeBidiType, cmap, gsub) + ctx.bidiGlyphs = bidiGlyphs - feaScripts = ast.getScriptLanguageSystems(feaFile) - ctx.scriptGroups = self._groupScriptsByTagAndDirection(feaScripts) + glyphScripts = {} + for script, glyphs in scriptGlyphs.items(): + for g in glyphs: + glyphScripts.setdefault(g, set()).add(script) + ctx.glyphScripts = glyphScripts + + ctx.kerning = self.getKerningData() return ctx @@ -124,13 +220,6 @@ def shouldContinue(self): self.log.debug("No kerning data; skipped") return False - if "dist" in self.context.todo and "dist" not in self.context.scriptGroups: - self.log.debug( - "No dist-enabled scripts defined in languagesystem " - "statements; dist feature will not be generated" - ) - self.context.todo.remove("dist") - return super().shouldContinue() def _write(self): @@ -148,16 +237,14 @@ def _write(self): feaFile = self.context.feaFile # first add the glyph class definitions - side1Classes = self.context.kerning.side1Classes - side2Classes = self.context.kerning.side2Classes - newClassDefs = [] - for classes in (side1Classes, side2Classes): - newClassDefs.extend([c for _, c in sorted(classes.items())]) + classDefs = self.context.kerning.classDefs + newClassDefs = [c for _, c in sorted(classDefs.items())] lookupGroups = [] for _, lookupGroup in sorted(lookups.items()): - lookupGroups.extend(lookupGroup) + lookupGroups.extend(lookupGroup.values()) + # NOTE: We don't write classDefs because we literalise all classes. self._insert( feaFile=feaFile, classDefs=newClassDefs, @@ -166,223 +253,130 @@ def _write(self): ) return True - @classmethod - def getKerningData(cls, font, feaFile=None, glyphSet=None): - side1Classes, side2Classes = cls.getKerningClasses(font, feaFile, glyphSet) - pairs = cls.getKerningPairs(font, side1Classes, side2Classes, glyphSet) + def getKerningData(self): + side1Groups, side2Groups = self.getKerningGroups() + pairs = self.getKerningPairs(side1Groups, side2Groups) + # side(1|2)Classes and classDefs will hold the feaLib AST to write out. return SimpleNamespace( - side1Classes=side1Classes, side2Classes=side2Classes, pairs=pairs + side1Classes={}, side2Classes={}, classDefs={}, pairs=pairs ) - @staticmethod - def getKerningGroups(font, glyphSet=None): - if glyphSet: - allGlyphs = set(glyphSet.keys()) - else: - allGlyphs = set(font.keys()) + def getKerningGroups(self): + font = self.context.font + allGlyphs = self.context.glyphSet side1Groups = {} side2Groups = {} + side1Membership = {} + side2Membership = {} for name, members in font.groups.items(): # prune non-existent or skipped glyphs - members = [g for g in members if g in allGlyphs] + members = {g for g in members if g in allGlyphs} if not members: # skip empty groups continue # skip groups without UFO3 public.kern{1,2} prefix if name.startswith(SIDE1_PREFIX): - side1Groups[name] = members + side1Groups[name] = tuple(sorted(members)) + name_truncated = name[len(SIDE1_PREFIX) :] + for member in members: + side1Membership[member] = name_truncated elif name.startswith(SIDE2_PREFIX): - side2Groups[name] = members + side2Groups[name] = tuple(sorted(members)) + name_truncated = name[len(SIDE2_PREFIX) :] + for member in members: + side2Membership[member] = name_truncated + self.context.side1Membership = side1Membership + self.context.side2Membership = side2Membership return side1Groups, side2Groups - @classmethod - def getKerningClasses(cls, font, feaFile=None, glyphSet=None): - side1Groups, side2Groups = cls.getKerningGroups(font, glyphSet) - side1Classes = ast.makeGlyphClassDefinitions( - side1Groups, feaFile, stripPrefix="public." - ) - side2Classes = ast.makeGlyphClassDefinitions( - side2Groups, feaFile, stripPrefix="public." - ) - return side1Classes, side2Classes - - @staticmethod - def getKerningPairs(font, side1Classes, side2Classes, glyphSet=None): - if glyphSet: - allGlyphs = set(glyphSet.keys()) - else: - allGlyphs = set(font.keys()) + def getKerningPairs(self, side1Classes, side2Classes): + glyphSet = self.context.glyphSet + font = self.context.font kerning = font.kerning + quantization = self.options.quantization - pairsByFlags = {} - for (side1, side2) in kerning: - # filter out pairs that reference missing groups or glyphs - if side1 not in side1Classes and side1 not in allGlyphs: + kerning = font.kerning + result = [] + for (side1, side2), value in kerning.items(): + firstIsClass, secondIsClass = (side1 in side1Classes, side2 in side2Classes) + # Filter out pairs that reference missing groups or glyphs. + if not firstIsClass and side1 not in glyphSet: + continue + if not secondIsClass and side2 not in glyphSet: continue - if side2 not in side2Classes and side2 not in allGlyphs: + # Ignore zero-valued class kern pairs. They are the most general + # kerns, so they don't override anything else like glyph kerns would + # and zero is the default. + if firstIsClass and secondIsClass and value == 0: continue - flags = (side1 in side1Classes, side2 in side2Classes) - pairsByFlags.setdefault(flags, set()).add((side1, side2)) + if firstIsClass: + side1 = side1Classes[side1] + if secondIsClass: + side2 = side2Classes[side2] + value = quantize(value, quantization) + result.append(KerningPair(side1, side2, value)) - result = [] - for flags, pairs in sorted(pairsByFlags.items()): - for side1, side2 in sorted(pairs): - value = kerning[side1, side2] - if all(flags) and value == 0: - # ignore zero-valued class kern pairs - continue - firstIsClass, secondIsClass = flags - if firstIsClass: - side1 = side1Classes[side1] - if secondIsClass: - side2 = side2Classes[side2] - result.append(KerningPair(side1, side2, value)) return result - def _intersectPairs(self, attribute, glyphSets): - allKeys = set() - for pair in self.context.kerning.pairs: - for key, glyphs in glyphSets.items(): - if not pair.glyphs.isdisjoint(glyphs): - getattr(pair, attribute).add(key) - allKeys.add(key) - return allKeys - - @staticmethod - def _groupScriptsByTagAndDirection(feaScripts): - # Read scripts/languages defined in feaFile's 'languagesystem' - # statements and group them by the feature tag (kern or dist) - # they are associated with, and the global script's horizontal - # direction (DFLT is excluded) - scriptGroups = {} - for scriptCode, scriptLangSys in feaScripts.items(): - if scriptCode: - direction = unicodedata.script_horizontal_direction(scriptCode) - else: - direction = "LTR" - if scriptCode in DIST_ENABLED_SCRIPTS: - tag = "dist" - else: - tag = "kern" - scriptGroups.setdefault(tag, {}).setdefault(direction, []).extend( - scriptLangSys - ) - return scriptGroups - - @staticmethod - def _makePairPosRule(pair, rtl=False, quantization=1): + def _makePairPosRule(self, pair, side1Classes, side2Classes, rtl=False): enumerated = pair.firstIsClass ^ pair.secondIsClass - value = quantize(pair.value, quantization) - if rtl and "L" in pair.bidiTypes: - # numbers are always shaped LTR even in RTL scripts - rtl = False valuerecord = ast.ValueRecord( - xPlacement=value if rtl else None, + xPlacement=pair.value if rtl else None, yPlacement=0 if rtl else None, - xAdvance=value, + xAdvance=pair.value, yAdvance=0 if rtl else None, ) + + if pair.firstIsClass: + glyphs1 = ast.GlyphClassName(side1Classes[pair.side1]) + else: + glyphs1 = ast.GlyphName(pair.side1) + if pair.secondIsClass: + glyphs2 = ast.GlyphClassName(side2Classes[pair.side2]) + else: + glyphs2 = ast.GlyphName(pair.side2) + return ast.PairPosStatement( - glyphs1=pair.side1, + glyphs1=glyphs1, valuerecord1=valuerecord, - glyphs2=pair.side2, + glyphs2=glyphs2, valuerecord2=None, enumerated=enumerated, ) - def _makeKerningLookup( - self, name, pairs, exclude=None, rtl=False, ignoreMarks=True - ): - assert pairs - rules = [] - for pair in pairs: - if exclude is not None and exclude(pair): - self.log.debug("pair excluded from '%s' lookup: %r", name, pair) - continue - rules.append( - self._makePairPosRule( - pair, rtl=rtl, quantization=self.options.quantization - ) - ) - - if rules: - lookup = ast.LookupBlock(name) - if ignoreMarks and self.options.ignoreMarks: - lookup.statements.append(ast.makeLookupFlag("IgnoreMarks")) - lookup.statements.extend(rules) - return lookup - - def _makeKerningLookups(self): - cmap = self.makeUnicodeToGlyphNameMapping() - if any(unicodeScriptDirection(uv) == "RTL" for uv in cmap): - # If there are any characters from globally RTL scripts in the - # cmap, we compile a temporary GSUB table to resolve substitutions - # and group glyphs by script horizontal direction and bidirectional - # type. We then mark each kerning pair with these properties when - # any of the glyphs involved in a pair intersects these groups. - gsub = self.compileGSUB() - dirGlyphs = classifyGlyphs(unicodeScriptDirection, cmap, gsub) - directions = self._intersectPairs("directions", dirGlyphs) - shouldSplit = "RTL" in directions - if shouldSplit: - bidiGlyphs = classifyGlyphs(unicodeBidiType, cmap, gsub) - self._intersectPairs("bidiTypes", bidiGlyphs) + def _makeKerningLookup(self, name, ignoreMarks=True): + lookup = ast.LookupBlock(name) + if ignoreMarks and self.options.ignoreMarks: + lookup.statements.append(ast.makeLookupFlag("IgnoreMarks")) + return lookup + + def knownScriptsPerCodepoint(self, uv: int) -> set[str]: + if not self.context.knownScripts: + # If there are no languagesystems and nothing to derive from Unicode + # codepoints, consider everything common; it'll all end in DFLT/dflt + # anyway. + return {COMMON_SCRIPT} else: - shouldSplit = False + script_extension = unicodedata.script_extension(chr(uv)) + return script_extension & (self.context.knownScripts | DFLT_SCRIPTS) + def _makeKerningLookups(self): marks = self.context.gdefClasses.mark lookups = {} - if shouldSplit: - # make one DFLT lookup with script-agnostic characters, and two - # LTR/RTL lookups excluding pairs from the opposite group. - # We drop kerning pairs with ambiguous direction: i.e. those containing - # glyphs from scripts with different overall horizontal direction, or - # glyphs with incompatible bidirectional type (e.g. arabic letters vs - # arabic numerals). - pairs = [] - for pair in self.context.kerning.pairs: - if ("RTL" in pair.directions and "LTR" in pair.directions) or ( - "R" in pair.bidiTypes and "L" in pair.bidiTypes - ): - self.log.warning( - "skipped kern pair with ambiguous direction: %r", pair - ) - continue - pairs.append(pair) - if not pairs: - return lookups - - if self.options.ignoreMarks: - # If there are pairs with a mix of mark/base then the IgnoreMarks - # flag is unnecessary and should not be set - basePairs, markPairs = self._splitBaseAndMarkPairs(pairs, marks) - if basePairs: - self._makeSplitDirectionKernLookups(lookups, basePairs) - if markPairs: - self._makeSplitDirectionKernLookups( - lookups, markPairs, ignoreMarks=False, suffix="_marks" - ) - else: - self._makeSplitDirectionKernLookups(lookups, pairs) + pairs = self.context.kerning.pairs + + if self.options.ignoreMarks: + basePairs, markPairs = self._splitBaseAndMarkPairs( + self.context.kerning.pairs, marks + ) + if basePairs: + self._makeSplitScriptKernLookups(lookups, basePairs) + if markPairs: + self._makeSplitScriptKernLookups( + lookups, markPairs, ignoreMarks=False, suffix="_marks" + ) else: - # only make a single (implicitly LTR) lookup including all base/base pairs - # and a single lookup including all base/mark pairs (if any) - pairs = self.context.kerning.pairs - if self.options.ignoreMarks: - basePairs, markPairs = self._splitBaseAndMarkPairs(pairs, marks) - lookups["LTR"] = [] - if basePairs: - lookups["LTR"].append( - self._makeKerningLookup("kern_ltr", basePairs) - ) - if markPairs: - lookups["LTR"].append( - self._makeKerningLookup( - "kern_ltr_marks", markPairs, ignoreMarks=False - ) - ) - else: - lookups["LTR"] = [self._makeKerningLookup("kern_ltr", pairs)] + self._makeSplitScriptKernLookups(lookups, pairs) return lookups def _splitBaseAndMarkPairs(self, pairs, marks): @@ -397,111 +391,282 @@ def _splitBaseAndMarkPairs(self, pairs, marks): basePairs[:] = pairs return basePairs, markPairs - def _makeSplitDirectionKernLookups( - self, lookups, pairs, ignoreMarks=True, suffix="" - ): - dfltKern = self._makeKerningLookup( - "kern_dflt" + suffix, - pairs, - exclude=(lambda pair: {"LTR", "RTL"}.intersection(pair.directions)), - rtl=False, - ignoreMarks=ignoreMarks, - ) - if dfltKern: - lookups.setdefault("DFLT", []).append(dfltKern) - - ltrKern = self._makeKerningLookup( - "kern_ltr" + suffix, - pairs, - exclude=(lambda pair: not pair.directions or "RTL" in pair.directions), - rtl=False, - ignoreMarks=ignoreMarks, - ) - if ltrKern: - lookups.setdefault("LTR", []).append(ltrKern) - - rtlKern = self._makeKerningLookup( - "kern_rtl" + suffix, - pairs, - exclude=(lambda pair: not pair.directions or "LTR" in pair.directions), - rtl=True, - ignoreMarks=ignoreMarks, + def _makeSplitScriptKernLookups(self, lookups, pairs, ignoreMarks=True, suffix=""): + bidiGlyphs = self.context.bidiGlyphs + glyphScripts = self.context.glyphScripts + kerningPerScript = splitKerning(pairs, glyphScripts) + + classDefs, side1Classes, side2Classes = makeAllGlyphClassDefinitions( + kerningPerScript, self.context, self.context.feaFile ) - if rtlKern: - lookups.setdefault("RTL", []).append(rtlKern) + assert not classDefs.keys() & self.context.kerning.classDefs.keys() + self.context.kerning.classDefs.update(classDefs) + + for script, pairs in kerningPerScript.items(): + scriptLookups = lookups.setdefault(script, {}) + + key = f"kern_{script}{suffix}" + lookup = scriptLookups.get(key) + if not lookup: + # For neatness: + lookup = self._makeKerningLookup( + key.replace(COMMON_SCRIPT, COMMON_CLASS_NAME), + ignoreMarks=ignoreMarks, + ) + scriptLookups[key] = lookup + for pair in pairs: + bidiTypes = { + direction + for direction, glyphs in bidiGlyphs.items() + if not set(pair.glyphs).isdisjoint(glyphs) + } + if bidiTypes.issuperset(AMBIGUOUS_BIDIS): + LOGGER.info( + "Skipping kerning pair <%s %s %s> with ambiguous direction", + pair.side1, + pair.side2, + pair.value, + ) + continue + scriptIsRtl = script_horizontal_direction(script, "LTR") == "RTL" + # Numbers are always shaped LTR even in RTL scripts: + pairIsRtl = scriptIsRtl and "L" not in bidiTypes + rule = self._makePairPosRule( + pair, side1Classes, side2Classes, pairIsRtl + ) + lookup.statements.append(rule) + + # Clean out empty lookups. + for script, scriptLookups in list(lookups.items()): + for lookup_name, lookup in list(scriptLookups.items()): + if not any( + stmt + for stmt in lookup.statements + if not isinstance(stmt, ast.LookupFlagStatement) + ): + del scriptLookups[lookup_name] + if not scriptLookups: + del lookups[script] def _makeFeatureBlocks(self, lookups): features = {} if "kern" in self.context.todo: kern = ast.FeatureBlock("kern") - self._registerKernLookups(kern, lookups) + self._registerLookups(kern, lookups) if kern.statements: features["kern"] = kern if "dist" in self.context.todo: dist = ast.FeatureBlock("dist") - self._registerDistLookups(dist, lookups) + self._registerLookups(dist, lookups) if dist.statements: features["dist"] = dist return features - def _registerKernLookups(self, feature, lookups): - if "DFLT" in lookups: - ast.addLookupReferences(feature, lookups["DFLT"]) + @staticmethod + def _registerLookups( + feature: ast.FeatureBlock, lookups: dict[str, dict[str, ast.LookupBlock]] + ) -> None: + # Ensure we have kerning for pure common script runs (e.g. ">1") + isKernBlock = feature.name == "kern" + if isKernBlock and COMMON_SCRIPT in lookups: + ast.addLookupReferences( + feature, lookups[COMMON_SCRIPT].values(), "DFLT", ["dflt"] + ) - scriptGroups = self.context.scriptGroups - if "dist" in self.context.todo: - distScripts = scriptGroups["dist"] + # Feature blocks use script tags to distinguish what to run for a + # Unicode script. + # + # "Script tags generally correspond to a Unicode script. However, the + # associations between them may not always be one-to-one, and the + # OpenType script tags are not guaranteed to be the same as Unicode + # Script property-value aliases or ISO 15924 script IDs." + # + # E.g. {"latn": "Latn", "telu": "Telu", "tel2": "Telu"} + # + # Skip DFLT script because we always take care of it above for `kern`. + # It never occurs in `dist`. + if isKernBlock: + scriptsToReference = lookups.keys() - DIST_ENABLED_SCRIPTS else: - distScripts = {} - kernScripts = scriptGroups.get("kern", {}) - ltrScripts = kernScripts.get("LTR", []) - rtlScripts = kernScripts.get("RTL", []) - - ltrLookups = lookups.get("LTR") - rtlLookups = lookups.get("RTL") - if ltrLookups and rtlLookups: - if ltrScripts and rtlScripts: - for script, langs in ltrScripts: - ast.addLookupReferences(feature, ltrLookups, script, langs) - for script, langs in rtlScripts: - ast.addLookupReferences(feature, rtlLookups, script, langs) - elif ltrScripts: - ast.addLookupReferences(feature, rtlLookups, script="DFLT") - for script, langs in ltrScripts: - ast.addLookupReferences(feature, ltrLookups, script, langs) - elif rtlScripts: - ast.addLookupReferences(feature, ltrLookups, script="DFLT") - for script, langs in rtlScripts: - ast.addLookupReferences(feature, rtlLookups, script, langs) - else: - if not (distScripts.get("LTR") and distScripts.get("RTL")): - raise ValueError( - "cannot use DFLT script for both LTR and RTL kern " - "lookups; add 'languagesystems' to features for at " - "least one LTR or RTL script using the kern feature" - ) - elif ltrLookups: - if not (rtlScripts or distScripts): - ast.addLookupReferences(feature, ltrLookups) - else: - ast.addLookupReferences(feature, ltrLookups, script="DFLT") - for script, langs in ltrScripts: - ast.addLookupReferences(feature, ltrLookups, script, langs) - elif rtlLookups: - if not (ltrScripts or distScripts): - ast.addLookupReferences(feature, rtlLookups) - else: - ast.addLookupReferences(feature, rtlLookups, script="DFLT") - for script, langs in rtlScripts: - ast.addLookupReferences(feature, rtlLookups, script, langs) - - def _registerDistLookups(self, feature, lookups): - scripts = self.context.scriptGroups["dist"] - ltrLookups = lookups.get("LTR") - if ltrLookups: - for script, langs in scripts.get("LTR", []): - ast.addLookupReferences(feature, ltrLookups, script, langs) - rtlLookups = lookups.get("RTL") - if rtlLookups: - for script, langs in scripts.get("RTL", []): - ast.addLookupReferences(feature, rtlLookups, script, langs) + scriptsToReference = DIST_ENABLED_SCRIPTS.intersection(lookups.keys()) + for script in sorted(scriptsToReference - DFLT_SCRIPTS): + for tag in unicodedata.ot_tags_from_script(script): + # Insert line breaks between statements for niceness :). + if feature.statements: + feature.statements.append(ast.Comment("")) + # We have something for this script. First add the default + # lookups, then the script-specific ones + lookupsForThisScript = [] + for dfltScript in DFLT_SCRIPTS: + if dfltScript in lookups: + lookupsForThisScript.extend(lookups[dfltScript].values()) + lookupsForThisScript.extend(lookups[script].values()) + # NOTE: We always use the `dflt` language because there is no + # language-specific kerning to be derived from UFO (kerning.plist) + # sources and we are independent of what's going on in the rest of + # the features.fea file. + ast.addLookupReferences(feature, lookupsForThisScript, tag, ["dflt"]) + + +def splitKerning(pairs, glyphScripts): + # Split kerning into per-script buckets, so we can post-process them before + # continuing. + kerningPerScript = {} + for pair in pairs: + for script, splitPair in partitionByScript(pair, glyphScripts): + kerningPerScript.setdefault(script, []).append(splitPair) + + for pairs in kerningPerScript.values(): + pairs.sort() + + return kerningPerScript + + +def partitionByScript( + pair: KerningPair, + glyphScripts: Mapping[str, set[str]], +) -> Iterator[tuple[str, KerningPair]]: + """Split a potentially mixed-script pair into pairs that make sense based + on the dominant script, and yield each combination with its dominant script.""" + + side1Scripts: dict[str, set[str]] = {} + side2Scripts: dict[str, set[str]] = {} + for glyph in pair.firstGlyphs: + scripts = glyphScripts.get(glyph, DFLT_SCRIPTS) + # If a glyph is both common or inherited *and* another script, treat it + # as just common (throwing Zyyy and Zinh into the same bucket for + # simplicity). This ensures that a pair appears to the shaper exactly + # once, as long as every script sees at most 2 lookups (or 3 with mark + # lookups, but they contain distinct pairs), the common one and the + # script-specific one. + if scripts & DFLT_SCRIPTS: + scripts = COMMON_SCRIPTS_SET + for script in scripts: + side1Scripts.setdefault(script, set()).add(glyph) + for glyph in pair.secondGlyphs: + scripts = glyphScripts.get(glyph, DFLT_SCRIPTS) + if scripts & DFLT_SCRIPTS: + scripts = COMMON_SCRIPTS_SET + for script in scripts: + side2Scripts.setdefault(script, set()).add(glyph) + + for firstScript, secondScript in itertools.product(side1Scripts, side2Scripts): + # Preserve the type (glyph or class) of each side. + localGlyphs: set[str] = set() + localSide1: str | tuple[str, ...] + localSide2: str | tuple[str, ...] + if pair.firstIsClass: + localSide1 = tuple(sorted(side1Scripts[firstScript])) + localGlyphs.update(localSide1) + else: + assert len(side1Scripts[firstScript]) == 1 + (localSide1,) = side1Scripts[firstScript] + localGlyphs.add(localSide1) + if pair.secondIsClass: + localSide2 = tuple(sorted(side2Scripts[secondScript])) + localGlyphs.update(localSide2) + else: + assert len(side2Scripts[secondScript]) == 1 + (localSide2,) = side2Scripts[secondScript] + localGlyphs.add(localSide2) + + if firstScript == secondScript or secondScript == COMMON_SCRIPT: + localScript = firstScript + elif firstScript == COMMON_SCRIPT: + localScript = secondScript + # Two different explicit scripts: + else: + LOGGER.info( + "Skipping kerning pair <%s %s %s> with mixed script (%s, %s)", + pair.side1, + pair.side2, + pair.value, + firstScript, + secondScript, + ) + continue + + yield localScript, KerningPair( + localSide1, + localSide2, + pair.value, + ) + + +def makeAllGlyphClassDefinitions(kerningPerScript, context, feaFile=None): + side1Classes = {} + side2Classes = {} + side1Membership = context.side1Membership + side2Membership = context.side2Membership + + classDefs = {} + if feaFile is not None: + classNames = {cdef.name for cdef in ast.iterClassDefinitions(feaFile)} + else: + classNames = set() + + # Generate common class names first so that common classes are correctly + # named in other lookups. + if COMMON_SCRIPT in kerningPerScript: + common_pairs = kerningPerScript[COMMON_SCRIPT] + for pair in common_pairs: + if pair.firstIsClass and pair.side1 not in side1Classes: + addClassDefinition( + "kern1", + pair.side1, + side1Classes, + side1Membership, + classDefs, + classNames, + COMMON_CLASS_NAME, + ) + if pair.secondIsClass and pair.side2 not in side2Classes: + addClassDefinition( + "kern2", + pair.side2, + side2Classes, + side2Membership, + classDefs, + classNames, + COMMON_CLASS_NAME, + ) + + sortedKerningPerScript = sorted(kerningPerScript.items()) + for script, pairs in sortedKerningPerScript: + if script == COMMON_SCRIPT: + continue + for pair in pairs: + if pair.firstIsClass and pair.side1 not in side1Classes: + addClassDefinition( + "kern1", + pair.side1, + side1Classes, + side1Membership, + classDefs, + classNames, + script, + ) + if pair.secondIsClass and pair.side2 not in side2Classes: + addClassDefinition( + "kern2", + pair.side2, + side2Classes, + side2Membership, + classDefs, + classNames, + script, + ) + + return classDefs, side1Classes, side2Classes + + +def addClassDefinition( + prefix, group, classes, originalMembership, classDefs, classNames, script +): + firstGlyph = next(iter(group)) + originalGroupName = originalMembership[firstGlyph] + groupName = f"{prefix}.{script}.{originalGroupName}" + className = ast.makeFeaClassName(groupName, classNames) + classNames.add(className) + classDef = ast.makeGlyphClassDefinition(className, group) + classes[group] = classDefs[className] = classDef diff --git a/Lib/ufo2ft/util.py b/Lib/ufo2ft/util.py index 34f5d5405..19a661645 100644 --- a/Lib/ufo2ft/util.py +++ b/Lib/ufo2ft/util.py @@ -281,12 +281,12 @@ def closeGlyphsOverGSUB(gsub, glyphs): def classifyGlyphs(unicodeFunc, cmap, gsub=None): """'unicodeFunc' is a callable that takes a Unicode codepoint and - returns a string denoting some Unicode property associated with the - given character (or None if a character is considered 'neutral'). - 'cmap' is a dictionary mapping Unicode codepoints to glyph names. - 'gsub' is an (optional) fonttools GSUB table object, used to find all - the glyphs that are "reachable" via substitutions from the initial - sets of glyphs defined in the cmap. + returns a string, or collection of strings, denoting some Unicode + property associated with the given character (or None if a character + is considered 'neutral'). 'cmap' is a dictionary mapping Unicode + codepoints to glyph names. 'gsub' is an (optional) fonttools GSUB + table object, used to find all the glyphs that are "reachable" via + substitutions from the initial sets of glyphs defined in the cmap. Returns a dictionary of glyph sets associated with the given Unicode properties. @@ -297,7 +297,7 @@ def classifyGlyphs(unicodeFunc, cmap, gsub=None): key_or_keys = unicodeFunc(uv) if key_or_keys is None: neutralGlyphs.add(glyphName) - elif isinstance(key_or_keys, (list, set)): + elif isinstance(key_or_keys, (list, set, tuple)): for key in key_or_keys: glyphSets.setdefault(key, set()).add(glyphName) else: diff --git a/tests/data/TestFont-CFF-compreffor.ttx b/tests/data/TestFont-CFF-compreffor.ttx index 9877c3e17..49e8a21de 100644 --- a/tests/data/TestFont-CFF-compreffor.ttx +++ b/tests/data/TestFont-CFF-compreffor.ttx @@ -392,7 +392,7 @@ - +