From c2b480260db6c96237bc8dc30849676d812b8c23 Mon Sep 17 00:00:00 2001 From: Khaled Hosny Date: Sun, 21 Jan 2024 19:19:57 +0200 Subject: [PATCH] Allow cross-script kerning Several implementations can apply kerning cross-script, so instead of splitting cross-script kerning pairs, we now keep all kerning for scripts which have common kerning pairs in one lookup and split the rest. --- .../featureWriters/kernFeatureWriter.py | 254 ++++++++++-------- 1 file changed, 139 insertions(+), 115 deletions(-) diff --git a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py index 0713bb8f..bdbb6c23 100644 --- a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py +++ b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py @@ -157,24 +157,26 @@ class KernFeatureWriter(BaseFeatureWriter): second lookup without the ignore marks flag. * Go through all kerning pairs and split them up by script, to put them in different lookups. This reduces the size of each lookup compared to - splitting by direction, as previously done. + splitting by direction, as previously done. If there are kerning pairs + with different scripts on each side, theese scripts are all kept together + to allow for cross-script kerning (in implmenetations that apply it). + Scripts with different direction are always split. * Partition the first and second side of a pair by script and emit only those with the same script (e.g. `a` and `b` are both "Latn", `period` and `period` are both "Default", but `a` and `a-cy` would mix "Latn" - and "Cyrl" and are dropped) or those that kern an explicit against a - "common" or "inherited" script, e.g. `a` and `period`. + and "Cyrl" and are dropped), or those with kerning across them, or + those that kern an explicit against a "common" or "inherited" script + (e.g. `a` and `period`). * Glyphs can have multiple scripts assigned to them (legitimately, e.g. U+0951 DEVANAGARI STRESS SIGN UDATTA, or for random reasons like having both `sub h by h.sc` and `sub Etaprosgegrammeni by h.sc;`). Only scripts that were determined earlier to be supported by the font will be considered. Usually, we will emit pairs where both sides have - the same script and no splitting is necessary. The only mixed script - pairs we emit are common or inherited (Zyyy or Zinh) against explicit - (e.g. Latn) scripts. A glyph can be part of both for weird reasons, so - we always treat any glyph with a common or inherited script as a - purely common (not inherited) glyph for bucketing purposes. This - avoids creating overlapping groups with the multi-script glyph in a - lookup. + the same script and no splitting is necessary. A glyph can be part of + both for weird reasons, so we always treat any glyph with a common or + inherited script as a purely common (not inherited) glyph for + bucketing purposes. This avoids creating overlapping groups with the + multi-script glyph in a lookup. * Some glyphs may have a script of Zyyy or Zinh but have a disjoint set of explicit scripts as their script extension. By looking only at the script extension, we treat many of them as being part of an explicit @@ -291,7 +293,9 @@ def _write(self): lookupGroups = [] for _, lookupGroup in sorted(lookups.items()): - lookupGroups.extend(lookupGroup.values()) + lookupGroups.extend( + lkp for lkp in lookupGroup.values() if lkp not in lookupGroups + ) # NOTE: We don't write classDefs because we literalise all classes. self._insert( @@ -700,18 +704,11 @@ def _makeSplitScriptKernLookups(self, lookups, pairs, ignoreMarks=True, suffix=" assert not side2Classes.keys() & newSide2Classes.keys() side2Classes.update(newSide2Classes) - for script, pairs in kerningPerScript.items(): - scriptLookups = lookups.setdefault(script, {}) - - key = f"kern_{script}{suffix}" - lookup = scriptLookups.get(key) - if not lookup: - # For neatness: - lookup = self._makeKerningLookup( - key.replace(COMMON_SCRIPT, COMMON_CLASS_NAME), - ignoreMarks=ignoreMarks, - ) - scriptLookups[key] = lookup + for scripts, pairs in kerningPerScript.items(): + lookupName = f"kern_{'_'.join(scripts)}{suffix}".replace( + COMMON_SCRIPT, COMMON_CLASS_NAME + ) + lookup = self._makeKerningLookup(lookupName, ignoreMarks=ignoreMarks) for pair in pairs: bidiTypes = { direction @@ -726,13 +723,19 @@ def _makeSplitScriptKernLookups(self, lookups, pairs, ignoreMarks=True, suffix=" pair.value, ) continue - scriptIsRtl = script_horizontal_direction(script, "LTR") == "RTL" + directions = { + script_horizontal_direction(script, "LTR") for script in scripts + } + assert len(directions) == 1 + scriptIsRtl = directions == {"RTL"} # Numbers are always shaped LTR even in RTL scripts: pairIsRtl = scriptIsRtl and "L" not in bidiTypes rule = self._makePairPosRule( pair, side1Classes, side2Classes, pairIsRtl ) lookup.statements.append(rule) + for script in scripts: + lookups.setdefault(script, {})[lookupName] = lookup # Clean out empty lookups. for script, scriptLookups in list(lookups.items()): @@ -771,7 +774,9 @@ def _registerLookups( isKernBlock = feature.name == "kern" dfltLookups: list[ast.LookupBlock] = [] if isKernBlock and COMMON_SCRIPT in lookups: - dfltLookups.extend(lookups[COMMON_SCRIPT].values()) + dfltLookups.extend( + lkp for lkp in lookups[COMMON_SCRIPT].values() if lkp not in dfltLookups + ) # InDesign bugfix: register kerning lookups for all LTR scripts under DFLT # so that the basic composer, without a language selected, will still kern. @@ -785,7 +790,9 @@ def _registerLookups( lookupsLTR.extend(scriptLookups.values()) elif script_horizontal_direction(script, "LTR") == "RTL": lookupsRTL.extend(scriptLookups.values()) - dfltLookups.extend(lookupsLTR or lookupsRTL) + dfltLookups.extend( + lkp for lkp in (lookupsLTR or lookupsRTL) if lkp not in dfltLookups + ) if dfltLookups: languages = feaLanguagesByScript.get("DFLT", ["dflt"]) @@ -814,25 +821,34 @@ def _registerLookups( feature.statements.append(ast.Comment("")) # We have something for this script. First add the default # lookups, then the script-specific ones - lookupsForThisScript = [] + lookupsForThisScript = {} for dfltScript in DFLT_SCRIPTS: if dfltScript in lookups: - lookupsForThisScript.extend(lookups[dfltScript].values()) - lookupsForThisScript.extend(lookups[script].values()) + lookupsForThisScript.update(lookups[dfltScript]) + lookupsForThisScript.update(lookups[script]) # Register the lookups for all languages defined in the feature # file for the script, otherwise kerning is not applied if any # language is set at all. languages = feaLanguagesByScript.get(tag, ["dflt"]) - ast.addLookupReferences(feature, lookupsForThisScript, tag, languages) + ast.addLookupReferences( + feature, lookupsForThisScript.values(), tag, languages + ) def splitKerning(pairs, glyphScripts): # Split kerning into per-script buckets, so we can post-process them before - # continuing. + # continuing. Scripts that have cross-script kerning pairs will be put in + # the same bucket. kerningPerScript = {} for pair in pairs: - for script, splitPair in partitionByScript(pair, glyphScripts): - kerningPerScript.setdefault(script, []).append(splitPair) + for scripts, splitPair in partitionByScript(pair, glyphScripts): + scripts = tuple(sorted(scripts)) + kerningPerScript.setdefault(scripts, []).append(splitPair) + + # Merge buckets that have common scripts. If we have [A, B], [B, C], and + # [D] buckets, we want to merge the first two into [A, B, C] and leave [D] + # so that all kerning pairs of the three scripts are in the same lookup. + kerningPerScript = mergeScripts(kerningPerScript) for pairs in kerningPerScript.values(): pairs.sort() @@ -847,70 +863,113 @@ def partitionByScript( """Split a potentially mixed-script pair into pairs that make sense based on the dominant script, and yield each combination with its dominant script.""" - side1Scripts: dict[str, set[str]] = {} - side2Scripts: dict[str, set[str]] = {} + def script_direction(script: str) -> str: + if script == COMMON_SCRIPT: + return "Auto" + return script_horizontal_direction(script, "LTR") + + side1Directions: dict[str, set[str]] = {} + side2Directions: dict[str, set[str]] = {} + resolvedScripts: dict[str, set[str]] = {} for glyph in pair.firstGlyphs: - scripts = glyphScripts.get(glyph, DFLT_SCRIPTS) - # If a glyph is both common or inherited *and* another script, treat it - # as just common (throwing Zyyy and Zinh into the same bucket for - # simplicity). This ensures that a pair appears to the shaper exactly - # once, as long as every script sees at most 2 lookups (or 3 with mark - # lookups, but they contain distinct pairs), the common one and the - # script-specific one. - if scripts & DFLT_SCRIPTS: - scripts = COMMON_SCRIPTS_SET - for script in scripts: - side1Scripts.setdefault(script, set()).add(glyph) + side1Scripts = glyphScripts.get(glyph, DFLT_SCRIPTS) + # Treat Inherited as Common. + if side1Scripts & DFLT_SCRIPTS: + side1Scripts = COMMON_SCRIPTS_SET + resolvedScripts[glyph] = side1Scripts + side1Scripts = sorted(side1Scripts) + directions = [script_direction(script) for script in side1Scripts] + for direction in directions: + side1Directions.setdefault(direction, set()).add(glyph) for glyph in pair.secondGlyphs: - scripts = glyphScripts.get(glyph, DFLT_SCRIPTS) - if scripts & DFLT_SCRIPTS: - scripts = COMMON_SCRIPTS_SET - for script in scripts: - side2Scripts.setdefault(script, set()).add(glyph) - - for firstScript, secondScript in itertools.product(side1Scripts, side2Scripts): - # Preserve the type (glyph or class) of each side. - localGlyphs: set[str] = set() + side1Scripts = glyphScripts.get(glyph, DFLT_SCRIPTS) + # Treat Inherited as Common. + if side1Scripts & DFLT_SCRIPTS: + side1Scripts = COMMON_SCRIPTS_SET + resolvedScripts[glyph] = side1Scripts + side1Scripts = sorted(side1Scripts) + directions = [script_direction(script) for script in side1Scripts] + for direction in directions: + side2Directions.setdefault(direction, set()).add(glyph) + + for side1Direction, side2Direction in itertools.product( + side1Directions, side2Directions + ): localSide1: str | tuple[str, ...] localSide2: str | tuple[str, ...] + side1Scripts: set[str] = set() + side2Scripts: set[str] = set() if pair.firstIsClass: - localSide1 = tuple(sorted(side1Scripts[firstScript])) - localGlyphs.update(localSide1) + localSide1 = tuple(sorted(side1Directions[side1Direction])) + for glyph in localSide1: + side1Scripts |= resolvedScripts[glyph] else: - assert len(side1Scripts[firstScript]) == 1 - (localSide1,) = side1Scripts[firstScript] - localGlyphs.add(localSide1) + assert len(side1Directions[side1Direction]) == 1 + (localSide1,) = side1Directions[side1Direction] + side1Scripts |= resolvedScripts[localSide1] if pair.secondIsClass: - localSide2 = tuple(sorted(side2Scripts[secondScript])) - localGlyphs.update(localSide2) - else: - assert len(side2Scripts[secondScript]) == 1 - (localSide2,) = side2Scripts[secondScript] - localGlyphs.add(localSide2) - - if firstScript == secondScript or secondScript == COMMON_SCRIPT: - localScript = firstScript - elif firstScript == COMMON_SCRIPT: - localScript = secondScript - # Two different explicit scripts: + localSide2 = tuple(sorted(side2Directions[side2Direction])) + for glyph in localSide2: + side2Scripts |= resolvedScripts[glyph] else: + assert len(side2Directions[side2Direction]) == 1 + (localSide2,) = side2Directions[side2Direction] + side2Scripts |= resolvedScripts[localSide2] + + # Skip pairs with mixed direction. + if side1Direction != side2Direction and not any( + side == "Auto" for side in (side1Direction, side2Direction) + ): LOGGER.info( - "Skipping kerning pair <%s %s %s> with mixed script (%s, %s)", + "Skipping kerning pair <%s %s %s> with mixed direction (%s, %s)", pair.side1, pair.side2, pair.value, - firstScript, - secondScript, + side1Direction, + side2Direction, ) continue - yield localScript, KerningPair( + scripts = side1Scripts | side2Scripts + # If only one side has Common, drop it + if not all(side & COMMON_SCRIPTS_SET for side in (side1Scripts, side2Scripts)): + scripts -= COMMON_SCRIPTS_SET + + yield scripts, KerningPair( localSide1, localSide2, pair.value, ) +def mergeScripts(kerningPerScript): + # https://stackoverflow.com/a/9112588 + sets = [set(lst) for lst in kerningPerScript if lst] + merged = True + while merged: + merged = False + result = [] + while sets: + common, rest = sets[0], sets[1:] + sets = [] + for x in rest: + if x.isdisjoint(common): + sets.append(x) + else: + merged = True + common |= x + result.append(common) + sets = result + + result = {tuple(sorted(scripts)): [] for scripts in sets} + for scripts, pairs in kerningPerScript.items(): + for scripts2 in sets: + if scripts2 & set(scripts): + result[tuple(sorted(scripts2))].extend(pairs) + break + return result + + def makeAllGlyphClassDefinitions(kerningPerScript, context, feaFile=None): # Note: Refer to the context for existing classDefs and mappings of glyph # class tuples to feaLib AST to avoid overwriting existing class names, @@ -929,44 +988,9 @@ def makeAllGlyphClassDefinitions(kerningPerScript, context, feaFile=None): classNames = set() classNames.update(context.kerning.classDefs.keys()) - # Generate common class names first so that common classes are correctly - # named in other lookups. - if COMMON_SCRIPT in kerningPerScript: - common_pairs = kerningPerScript[COMMON_SCRIPT] - for pair in common_pairs: - if ( - pair.firstIsClass - and pair.side1 not in existingSide1Classes - and pair.side1 not in newSide1Classes - ): - addClassDefinition( - "kern1", - pair.side1, - newSide1Classes, - side1Membership, - newClassDefs, - classNames, - COMMON_CLASS_NAME, - ) - if ( - pair.secondIsClass - and pair.side2 not in existingSide2Classes - and pair.side2 not in newSide2Classes - ): - addClassDefinition( - "kern2", - pair.side2, - newSide2Classes, - side2Membership, - newClassDefs, - classNames, - COMMON_CLASS_NAME, - ) - sortedKerningPerScript = sorted(kerningPerScript.items()) - for script, pairs in sortedKerningPerScript: - if script == COMMON_SCRIPT: - continue + for scripts, pairs in sortedKerningPerScript: + name = "_".join(scripts).replace(COMMON_SCRIPT, COMMON_CLASS_NAME) for pair in pairs: if ( pair.firstIsClass @@ -980,7 +1004,7 @@ def makeAllGlyphClassDefinitions(kerningPerScript, context, feaFile=None): side1Membership, newClassDefs, classNames, - script, + name, ) if ( pair.secondIsClass @@ -994,7 +1018,7 @@ def makeAllGlyphClassDefinitions(kerningPerScript, context, feaFile=None): side2Membership, newClassDefs, classNames, - script, + name, ) return newClassDefs, newSide1Classes, newSide2Classes