Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cross script kerning #811

Merged
merged 8 commits into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
207 changes: 135 additions & 72 deletions Lib/ufo2ft/featureWriters/kernFeatureWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ def unicodeBidiType(uv):
return None


def script_direction(script: str) -> str:
if script == COMMON_SCRIPT:
return "Auto"
return script_horizontal_direction(script, "LTR")


@dataclass(frozen=True, order=False)
class KerningPair:
__slots__ = ("side1", "side2", "value")
Expand Down Expand Up @@ -157,24 +163,26 @@ class KernFeatureWriter(BaseFeatureWriter):
second lookup without the ignore marks flag.
* Go through all kerning pairs and split them up by script, to put them in
different lookups. This reduces the size of each lookup compared to
splitting by direction, as previously done.
splitting by direction, as previously done. If there are kerning pairs
with different scripts on each side, theese scripts are all kept together
to allow for cross-script kerning (in implmenetations that apply it).
khaledhosny marked this conversation as resolved.
Show resolved Hide resolved
Scripts with different direction are always split.
* Partition the first and second side of a pair by script and emit only
those with the same script (e.g. `a` and `b` are both "Latn", `period`
and `period` are both "Default", but `a` and `a-cy` would mix "Latn"
and "Cyrl" and are dropped) or those that kern an explicit against a
"common" or "inherited" script, e.g. `a` and `period`.
and "Cyrl" and are dropped), or those with kerning across them, or
those that kern an explicit against a "common" or "inherited" script
(e.g. `a` and `period`).
* Glyphs can have multiple scripts assigned to them (legitimately, e.g.
U+0951 DEVANAGARI STRESS SIGN UDATTA, or for random reasons like
having both `sub h by h.sc` and `sub Etaprosgegrammeni by h.sc;`).
Only scripts that were determined earlier to be supported by the font
will be considered. Usually, we will emit pairs where both sides have
the same script and no splitting is necessary. The only mixed script
pairs we emit are common or inherited (Zyyy or Zinh) against explicit
(e.g. Latn) scripts. A glyph can be part of both for weird reasons, so
we always treat any glyph with a common or inherited script as a
purely common (not inherited) glyph for bucketing purposes. This
avoids creating overlapping groups with the multi-script glyph in a
lookup.
the same script and no splitting is necessary. A glyph can be part of
both for weird reasons, so we always treat any glyph with a common or
inherited script as a purely common (not inherited) glyph for
bucketing purposes. This avoids creating overlapping groups with the
multi-script glyph in a lookup.
* Some glyphs may have a script of Zyyy or Zinh but have a disjoint set
of explicit scripts as their script extension. By looking only at the
script extension, we treat many of them as being part of an explicit
Expand Down Expand Up @@ -291,7 +299,9 @@ def _write(self):

lookupGroups = []
for _, lookupGroup in sorted(lookups.items()):
lookupGroups.extend(lookupGroup.values())
lookupGroups.extend(
lkp for lkp in lookupGroup.values() if lkp not in lookupGroups
)

# NOTE: We don't write classDefs because we literalise all classes.
self._insert(
Expand Down Expand Up @@ -700,18 +710,11 @@ def _makeSplitScriptKernLookups(self, lookups, pairs, ignoreMarks=True, suffix="
assert not side2Classes.keys() & newSide2Classes.keys()
side2Classes.update(newSide2Classes)

for script, pairs in kerningPerScript.items():
scriptLookups = lookups.setdefault(script, {})

key = f"kern_{script}{suffix}"
lookup = scriptLookups.get(key)
if not lookup:
# For neatness:
lookup = self._makeKerningLookup(
key.replace(COMMON_SCRIPT, COMMON_CLASS_NAME),
ignoreMarks=ignoreMarks,
)
scriptLookups[key] = lookup
for scripts, pairs in kerningPerScript.items():
lookupName = f"kern_{'_'.join(scripts)}{suffix}".replace(
COMMON_SCRIPT, COMMON_CLASS_NAME
)
lookup = self._makeKerningLookup(lookupName, ignoreMarks=ignoreMarks)
for pair in pairs:
bidiTypes = {
direction
Expand All @@ -726,13 +729,17 @@ def _makeSplitScriptKernLookups(self, lookups, pairs, ignoreMarks=True, suffix="
pair.value,
)
continue
scriptIsRtl = script_horizontal_direction(script, "LTR") == "RTL"
directions = {script_direction(script) for script in scripts}
assert len(directions) == 1
scriptIsRtl = directions == {"RTL"}
# Numbers are always shaped LTR even in RTL scripts:
pairIsRtl = scriptIsRtl and "L" not in bidiTypes
rule = self._makePairPosRule(
pair, side1Classes, side2Classes, pairIsRtl
)
lookup.statements.append(rule)
for script in scripts:
lookups.setdefault(script, {})[lookupName] = lookup
anthrotype marked this conversation as resolved.
Show resolved Hide resolved

# Clean out empty lookups.
for script, scriptLookups in list(lookups.items()):
Expand Down Expand Up @@ -771,7 +778,9 @@ def _registerLookups(
isKernBlock = feature.name == "kern"
dfltLookups: list[ast.LookupBlock] = []
if isKernBlock and COMMON_SCRIPT in lookups:
dfltLookups.extend(lookups[COMMON_SCRIPT].values())
dfltLookups.extend(
lkp for lkp in lookups[COMMON_SCRIPT].values() if lkp not in dfltLookups
)

# InDesign bugfix: register kerning lookups for all LTR scripts under DFLT
# so that the basic composer, without a language selected, will still kern.
Expand All @@ -780,12 +789,14 @@ def _registerLookups(
lookupsLTR: list[ast.LookupBlock] = []
lookupsRTL: list[ast.LookupBlock] = []
for script, scriptLookups in sorted(lookups.items()):
if script != COMMON_SCRIPT and script not in DIST_ENABLED_SCRIPTS:
if script_horizontal_direction(script, "LTR") == "LTR":
if script not in DIST_ENABLED_SCRIPTS:
if script_direction(script) == "LTR":
lookupsLTR.extend(scriptLookups.values())
elif script_horizontal_direction(script, "LTR") == "RTL":
elif script_direction(script) == "RTL":
lookupsRTL.extend(scriptLookups.values())
dfltLookups.extend(lookupsLTR or lookupsRTL)
dfltLookups.extend(
lkp for lkp in (lookupsLTR or lookupsRTL) if lkp not in dfltLookups
)

if dfltLookups:
languages = feaLanguagesByScript.get("DFLT", ["dflt"])
Expand Down Expand Up @@ -814,25 +825,31 @@ def _registerLookups(
feature.statements.append(ast.Comment(""))
# We have something for this script. First add the default
# lookups, then the script-specific ones
lookupsForThisScript = []
lookupsForThisScript = {}
for dfltScript in DFLT_SCRIPTS:
if dfltScript in lookups:
lookupsForThisScript.extend(lookups[dfltScript].values())
lookupsForThisScript.extend(lookups[script].values())
lookupsForThisScript.update(lookups[dfltScript])
lookupsForThisScript.update(lookups[script])
# Register the lookups for all languages defined in the feature
# file for the script, otherwise kerning is not applied if any
# language is set at all.
languages = feaLanguagesByScript.get(tag, ["dflt"])
ast.addLookupReferences(feature, lookupsForThisScript, tag, languages)
ast.addLookupReferences(
feature, lookupsForThisScript.values(), tag, languages
)


def splitKerning(pairs, glyphScripts):
# Split kerning into per-script buckets, so we can post-process them before
# continuing.
# continuing. Scripts that have cross-script kerning pairs will be put in
# the same bucket.
kerningPerScript = {}
for pair in pairs:
for script, splitPair in partitionByScript(pair, glyphScripts):
kerningPerScript.setdefault(script, []).append(splitPair)
for scripts, splitPair in partitionByScript(pair, glyphScripts):
scripts = tuple(sorted(scripts))
kerningPerScript.setdefault(scripts, []).append(splitPair)

kerningPerScript = mergeScripts(kerningPerScript)

for pairs in kerningPerScript.values():
pairs.sort()
Expand All @@ -847,8 +864,9 @@ def partitionByScript(
"""Split a potentially mixed-script pair into pairs that make sense based
on the dominant script, and yield each combination with its dominant script."""

side1Scripts: dict[str, set[str]] = {}
side2Scripts: dict[str, set[str]] = {}
side1Directions: dict[str, set[str]] = {}
side2Directions: dict[str, set[str]] = {}
resolvedScripts: dict[str, set[str]] = {}
for glyph in pair.firstGlyphs:
scripts = glyphScripts.get(glyph, DFLT_SCRIPTS)
# If a glyph is both common or inherited *and* another script, treat it
Expand All @@ -859,58 +877,101 @@ def partitionByScript(
# script-specific one.
if scripts & DFLT_SCRIPTS:
scripts = COMMON_SCRIPTS_SET
for script in scripts:
side1Scripts.setdefault(script, set()).add(glyph)
resolvedScripts[glyph] = scripts
scripts = sorted(scripts)
directions = [script_direction(script) for script in scripts]
for direction in directions:
khaledhosny marked this conversation as resolved.
Show resolved Hide resolved
side1Directions.setdefault(direction, set()).add(glyph)
for glyph in pair.secondGlyphs:
scripts = glyphScripts.get(glyph, DFLT_SCRIPTS)
if scripts & DFLT_SCRIPTS:
scripts = COMMON_SCRIPTS_SET
for script in scripts:
side2Scripts.setdefault(script, set()).add(glyph)

for firstScript, secondScript in itertools.product(side1Scripts, side2Scripts):
# Preserve the type (glyph or class) of each side.
localGlyphs: set[str] = set()
resolvedScripts[glyph] = scripts
scripts = sorted(scripts)
directions = [script_direction(script) for script in scripts]
for direction in directions:
khaledhosny marked this conversation as resolved.
Show resolved Hide resolved
side2Directions.setdefault(direction, set()).add(glyph)

for side1Direction, side2Direction in itertools.product(
side1Directions, side2Directions
):
localSide1: str | tuple[str, ...]
localSide2: str | tuple[str, ...]
side1Scripts: set[str] = set()
side2Scripts: set[str] = set()
if pair.firstIsClass:
localSide1 = tuple(sorted(side1Scripts[firstScript]))
localGlyphs.update(localSide1)
localSide1 = tuple(sorted(side1Directions[side1Direction]))
for glyph in localSide1:
side1Scripts |= resolvedScripts[glyph]
else:
assert len(side1Scripts[firstScript]) == 1
(localSide1,) = side1Scripts[firstScript]
localGlyphs.add(localSide1)
assert len(side1Directions[side1Direction]) == 1
(localSide1,) = side1Directions[side1Direction]
side1Scripts |= resolvedScripts[localSide1]
if pair.secondIsClass:
localSide2 = tuple(sorted(side2Scripts[secondScript]))
localGlyphs.update(localSide2)
else:
assert len(side2Scripts[secondScript]) == 1
(localSide2,) = side2Scripts[secondScript]
localGlyphs.add(localSide2)

if firstScript == secondScript or secondScript == COMMON_SCRIPT:
localScript = firstScript
elif firstScript == COMMON_SCRIPT:
localScript = secondScript
# Two different explicit scripts:
localSide2 = tuple(sorted(side2Directions[side2Direction]))
for glyph in localSide2:
side2Scripts |= resolvedScripts[glyph]
else:
assert len(side2Directions[side2Direction]) == 1
(localSide2,) = side2Directions[side2Direction]
side2Scripts |= resolvedScripts[localSide2]

# Skip pairs with mixed direction.
if side1Direction != side2Direction and not any(
side == "Auto" for side in (side1Direction, side2Direction)
anthrotype marked this conversation as resolved.
Show resolved Hide resolved
):
LOGGER.info(
"Skipping kerning pair <%s %s %s> with mixed script (%s, %s)",
"Skipping kerning pair <%s %s %s> with mixed direction (%s, %s)",
pair.side1,
pair.side2,
pair.value,
firstScript,
secondScript,
side1Direction,
side2Direction,
)
continue

yield localScript, KerningPair(
scripts = side1Scripts | side2Scripts
# If only one side has Common, drop it
if not all(side & COMMON_SCRIPTS_SET for side in (side1Scripts, side2Scripts)):
scripts -= COMMON_SCRIPTS_SET

yield scripts, KerningPair(
localSide1,
localSide2,
pair.value,
)


def mergeScripts(kerningPerScript):
"""Merge buckets that have common scripts. If we have [A, B], [B, C], and
[D] buckets, we want to merge the first two into [A, B, C] and leave [D] so
that all kerning pairs of the three scripts are in the same lookup."""
sets = [set(scripts) for scripts in kerningPerScript if scripts]
merged = True
while merged:
merged = False
result = []
while sets:
common, rest = sets[0], sets[1:]
sets = []
for scripts in rest:
if scripts.isdisjoint(common):
sets.append(scripts)
else:
merged = True
common |= scripts
result.append(common)
sets = result

result = {tuple(sorted(scripts)): [] for scripts in sets}
for scripts, pairs in kerningPerScript.items():
for scripts2 in sets:
if scripts2 & set(scripts):
result[tuple(sorted(scripts2))].extend(pairs)
break
anthrotype marked this conversation as resolved.
Show resolved Hide resolved
return result


def makeAllGlyphClassDefinitions(kerningPerScript, context, feaFile=None):
# Note: Refer to the context for existing classDefs and mappings of glyph
# class tuples to feaLib AST to avoid overwriting existing class names,
Expand All @@ -931,9 +992,10 @@ def makeAllGlyphClassDefinitions(kerningPerScript, context, feaFile=None):

# Generate common class names first so that common classes are correctly
# named in other lookups.
if COMMON_SCRIPT in kerningPerScript:
common_pairs = kerningPerScript[COMMON_SCRIPT]
for pair in common_pairs:
for scripts, pairs in kerningPerScript.items():
if set(scripts) != COMMON_SCRIPTS_SET:
continue
anthrotype marked this conversation as resolved.
Show resolved Hide resolved
for pair in pairs:
if (
pair.firstIsClass
and pair.side1 not in existingSide1Classes
Expand Down Expand Up @@ -964,9 +1026,10 @@ def makeAllGlyphClassDefinitions(kerningPerScript, context, feaFile=None):
)

sortedKerningPerScript = sorted(kerningPerScript.items())
for script, pairs in sortedKerningPerScript:
if script == COMMON_SCRIPT:
for scripts, pairs in sortedKerningPerScript:
if set(scripts) == COMMON_SCRIPTS_SET:
continue
script = "_".join(scripts).replace(COMMON_SCRIPT, COMMON_CLASS_NAME)
for pair in pairs:
if (
pair.firstIsClass
Expand Down
Loading
Loading