From 49b2d42be17bdd43c37c176cc9be34d5a9988e71 Mon Sep 17 00:00:00 2001
From: Khaled Hosny <khaled@aliftype.com>
Date: Sun, 21 Jan 2024 19:19:57 +0200
Subject: [PATCH 1/8] Allow cross-script kerning

Several implementations can apply kerning cross-script, so instead of
splitting cross-script kerning pairs, we now keep all kerning for
scripts which have common kerning pairs in one lookup and split the rest.
---
 .../featureWriters/kernFeatureWriter.py       | 202 ++++++++++++------
 1 file changed, 133 insertions(+), 69 deletions(-)

diff --git a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py
index 0713bb8f..d5def86f 100644
--- a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py
+++ b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py
@@ -157,24 +157,26 @@ class KernFeatureWriter(BaseFeatureWriter):
       second lookup without the ignore marks flag.
     * Go through all kerning pairs and split them up by script, to put them in
       different lookups. This reduces the size of each lookup compared to
-      splitting by direction, as previously done.
+      splitting by direction, as previously done. If there are kerning pairs
+      with different scripts on each side, theese scripts are all kept together
+      to allow for cross-script kerning (in implmenetations that apply it).
+      Scripts with different direction are always split.
         * Partition the first and second side of a pair by script and emit only
           those with the same script (e.g. `a` and `b` are both "Latn", `period`
           and `period` are both "Default", but `a` and `a-cy` would mix "Latn"
-          and "Cyrl" and are dropped) or those that kern an explicit against a
-          "common" or "inherited" script, e.g. `a` and `period`.
+          and "Cyrl" and are dropped), or those with kerning across them, or
+          those that kern an explicit against a "common" or "inherited" script
+          (e.g. `a` and `period`).
         * Glyphs can have multiple scripts assigned to them (legitimately, e.g.
           U+0951 DEVANAGARI STRESS SIGN UDATTA, or for random reasons like
           having both `sub h by h.sc` and `sub Etaprosgegrammeni by h.sc;`).
           Only scripts that were determined earlier to be supported by the font
           will be considered. Usually, we will emit pairs where both sides have
-          the same script and no splitting is necessary. The only mixed script
-          pairs we emit are common or inherited (Zyyy or Zinh) against explicit
-          (e.g. Latn) scripts. A glyph can be part of both for weird reasons, so
-          we always treat any glyph with a common or inherited script as a
-          purely common (not inherited) glyph for bucketing purposes. This
-          avoids creating overlapping groups with the multi-script glyph in a
-          lookup.
+          the same script and no splitting is necessary. A glyph can be part of
+          both for weird reasons, so we always treat any glyph with a common or
+          inherited script as a purely common (not inherited) glyph for
+          bucketing purposes. This avoids creating overlapping groups with the
+          multi-script glyph in a lookup.
         * Some glyphs may have a script of Zyyy or Zinh but have a disjoint set
           of explicit scripts as their script extension. By looking only at the
           script extension, we treat many of them as being part of an explicit
@@ -291,7 +293,9 @@ def _write(self):
 
         lookupGroups = []
         for _, lookupGroup in sorted(lookups.items()):
-            lookupGroups.extend(lookupGroup.values())
+            lookupGroups.extend(
+                lkp for lkp in lookupGroup.values() if lkp not in lookupGroups
+            )
 
         # NOTE: We don't write classDefs because we literalise all classes.
         self._insert(
@@ -700,18 +704,11 @@ def _makeSplitScriptKernLookups(self, lookups, pairs, ignoreMarks=True, suffix="
         assert not side2Classes.keys() & newSide2Classes.keys()
         side2Classes.update(newSide2Classes)
 
-        for script, pairs in kerningPerScript.items():
-            scriptLookups = lookups.setdefault(script, {})
-
-            key = f"kern_{script}{suffix}"
-            lookup = scriptLookups.get(key)
-            if not lookup:
-                # For neatness:
-                lookup = self._makeKerningLookup(
-                    key.replace(COMMON_SCRIPT, COMMON_CLASS_NAME),
-                    ignoreMarks=ignoreMarks,
-                )
-                scriptLookups[key] = lookup
+        for scripts, pairs in kerningPerScript.items():
+            lookupName = f"kern_{'_'.join(scripts)}{suffix}".replace(
+                COMMON_SCRIPT, COMMON_CLASS_NAME
+            )
+            lookup = self._makeKerningLookup(lookupName, ignoreMarks=ignoreMarks)
             for pair in pairs:
                 bidiTypes = {
                     direction
@@ -726,13 +723,19 @@ def _makeSplitScriptKernLookups(self, lookups, pairs, ignoreMarks=True, suffix="
                         pair.value,
                     )
                     continue
-                scriptIsRtl = script_horizontal_direction(script, "LTR") == "RTL"
+                directions = {
+                    script_horizontal_direction(script, "LTR") for script in scripts
+                }
+                assert len(directions) == 1
+                scriptIsRtl = directions == {"RTL"}
                 # Numbers are always shaped LTR even in RTL scripts:
                 pairIsRtl = scriptIsRtl and "L" not in bidiTypes
                 rule = self._makePairPosRule(
                     pair, side1Classes, side2Classes, pairIsRtl
                 )
                 lookup.statements.append(rule)
+            for script in scripts:
+                lookups.setdefault(script, {})[lookupName] = lookup
 
         # Clean out empty lookups.
         for script, scriptLookups in list(lookups.items()):
@@ -771,7 +774,9 @@ def _registerLookups(
         isKernBlock = feature.name == "kern"
         dfltLookups: list[ast.LookupBlock] = []
         if isKernBlock and COMMON_SCRIPT in lookups:
-            dfltLookups.extend(lookups[COMMON_SCRIPT].values())
+            dfltLookups.extend(
+                lkp for lkp in lookups[COMMON_SCRIPT].values() if lkp not in dfltLookups
+            )
 
         # InDesign bugfix: register kerning lookups for all LTR scripts under DFLT
         # so that the basic composer, without a language selected, will still kern.
@@ -785,7 +790,9 @@ def _registerLookups(
                         lookupsLTR.extend(scriptLookups.values())
                     elif script_horizontal_direction(script, "LTR") == "RTL":
                         lookupsRTL.extend(scriptLookups.values())
-            dfltLookups.extend(lookupsLTR or lookupsRTL)
+            dfltLookups.extend(
+                lkp for lkp in (lookupsLTR or lookupsRTL) if lkp not in dfltLookups
+            )
 
         if dfltLookups:
             languages = feaLanguagesByScript.get("DFLT", ["dflt"])
@@ -814,25 +821,31 @@ def _registerLookups(
                     feature.statements.append(ast.Comment(""))
                 # We have something for this script. First add the default
                 # lookups, then the script-specific ones
-                lookupsForThisScript = []
+                lookupsForThisScript = {}
                 for dfltScript in DFLT_SCRIPTS:
                     if dfltScript in lookups:
-                        lookupsForThisScript.extend(lookups[dfltScript].values())
-                lookupsForThisScript.extend(lookups[script].values())
+                        lookupsForThisScript.update(lookups[dfltScript])
+                lookupsForThisScript.update(lookups[script])
                 # Register the lookups for all languages defined in the feature
                 # file for the script, otherwise kerning is not applied if any
                 # language is set at all.
                 languages = feaLanguagesByScript.get(tag, ["dflt"])
-                ast.addLookupReferences(feature, lookupsForThisScript, tag, languages)
+                ast.addLookupReferences(
+                    feature, lookupsForThisScript.values(), tag, languages
+                )
 
 
 def splitKerning(pairs, glyphScripts):
     # Split kerning into per-script buckets, so we can post-process them before
-    # continuing.
+    # continuing. Scripts that have cross-script kerning pairs will be put in
+    # the same bucket.
     kerningPerScript = {}
     for pair in pairs:
-        for script, splitPair in partitionByScript(pair, glyphScripts):
-            kerningPerScript.setdefault(script, []).append(splitPair)
+        for scripts, splitPair in partitionByScript(pair, glyphScripts):
+            scripts = tuple(sorted(scripts))
+            kerningPerScript.setdefault(scripts, []).append(splitPair)
+
+    kerningPerScript = mergeScripts(kerningPerScript)
 
     for pairs in kerningPerScript.values():
         pairs.sort()
@@ -847,8 +860,14 @@ def partitionByScript(
     """Split a potentially mixed-script pair into pairs that make sense based
     on the dominant script, and yield each combination with its dominant script."""
 
-    side1Scripts: dict[str, set[str]] = {}
-    side2Scripts: dict[str, set[str]] = {}
+    def script_direction(script: str) -> str:
+        if script == COMMON_SCRIPT:
+            return "Auto"
+        return script_horizontal_direction(script, "LTR")
+
+    side1Directions: dict[str, set[str]] = {}
+    side2Directions: dict[str, set[str]] = {}
+    resolvedScripts: dict[str, set[str]] = {}
     for glyph in pair.firstGlyphs:
         scripts = glyphScripts.get(glyph, DFLT_SCRIPTS)
         # If a glyph is both common or inherited *and* another script, treat it
@@ -859,58 +878,101 @@ def partitionByScript(
         # script-specific one.
         if scripts & DFLT_SCRIPTS:
             scripts = COMMON_SCRIPTS_SET
-        for script in scripts:
-            side1Scripts.setdefault(script, set()).add(glyph)
+        resolvedScripts[glyph] = scripts
+        scripts = sorted(scripts)
+        directions = [script_direction(script) for script in scripts]
+        for direction in directions:
+            side1Directions.setdefault(direction, set()).add(glyph)
     for glyph in pair.secondGlyphs:
         scripts = glyphScripts.get(glyph, DFLT_SCRIPTS)
         if scripts & DFLT_SCRIPTS:
             scripts = COMMON_SCRIPTS_SET
-        for script in scripts:
-            side2Scripts.setdefault(script, set()).add(glyph)
-
-    for firstScript, secondScript in itertools.product(side1Scripts, side2Scripts):
-        # Preserve the type (glyph or class) of each side.
-        localGlyphs: set[str] = set()
+        resolvedScripts[glyph] = scripts
+        scripts = sorted(scripts)
+        directions = [script_direction(script) for script in scripts]
+        for direction in directions:
+            side2Directions.setdefault(direction, set()).add(glyph)
+
+    for side1Direction, side2Direction in itertools.product(
+        side1Directions, side2Directions
+    ):
         localSide1: str | tuple[str, ...]
         localSide2: str | tuple[str, ...]
+        side1Scripts: set[str] = set()
+        side2Scripts: set[str] = set()
         if pair.firstIsClass:
-            localSide1 = tuple(sorted(side1Scripts[firstScript]))
-            localGlyphs.update(localSide1)
+            localSide1 = tuple(sorted(side1Directions[side1Direction]))
+            for glyph in localSide1:
+                side1Scripts |= resolvedScripts[glyph]
         else:
-            assert len(side1Scripts[firstScript]) == 1
-            (localSide1,) = side1Scripts[firstScript]
-            localGlyphs.add(localSide1)
+            assert len(side1Directions[side1Direction]) == 1
+            (localSide1,) = side1Directions[side1Direction]
+            side1Scripts |= resolvedScripts[localSide1]
         if pair.secondIsClass:
-            localSide2 = tuple(sorted(side2Scripts[secondScript]))
-            localGlyphs.update(localSide2)
-        else:
-            assert len(side2Scripts[secondScript]) == 1
-            (localSide2,) = side2Scripts[secondScript]
-            localGlyphs.add(localSide2)
-
-        if firstScript == secondScript or secondScript == COMMON_SCRIPT:
-            localScript = firstScript
-        elif firstScript == COMMON_SCRIPT:
-            localScript = secondScript
-        # Two different explicit scripts:
+            localSide2 = tuple(sorted(side2Directions[side2Direction]))
+            for glyph in localSide2:
+                side2Scripts |= resolvedScripts[glyph]
         else:
+            assert len(side2Directions[side2Direction]) == 1
+            (localSide2,) = side2Directions[side2Direction]
+            side2Scripts |= resolvedScripts[localSide2]
+
+        # Skip pairs with mixed direction.
+        if side1Direction != side2Direction and not any(
+            side == "Auto" for side in (side1Direction, side2Direction)
+        ):
             LOGGER.info(
-                "Skipping kerning pair <%s %s %s> with mixed script (%s, %s)",
+                "Skipping kerning pair <%s %s %s> with mixed direction (%s, %s)",
                 pair.side1,
                 pair.side2,
                 pair.value,
-                firstScript,
-                secondScript,
+                side1Direction,
+                side2Direction,
             )
             continue
 
-        yield localScript, KerningPair(
+        scripts = side1Scripts | side2Scripts
+        # If only one side has Common, drop it
+        if not all(side & COMMON_SCRIPTS_SET for side in (side1Scripts, side2Scripts)):
+            scripts -= COMMON_SCRIPTS_SET
+
+        yield scripts, KerningPair(
             localSide1,
             localSide2,
             pair.value,
         )
 
 
+def mergeScripts(kerningPerScript):
+    """Merge buckets that have common scripts. If we have [A, B], [B, C], and
+    [D] buckets, we want to merge the first two into [A, B, C] and leave [D] so
+    that all kerning pairs of the three scripts are in the same lookup."""
+    sets = [set(scripts) for scripts in kerningPerScript if scripts]
+    merged = True
+    while merged:
+        merged = False
+        result = []
+        while sets:
+            common, rest = sets[0], sets[1:]
+            sets = []
+            for scripts in rest:
+                if scripts.isdisjoint(common):
+                    sets.append(scripts)
+                else:
+                    merged = True
+                    common |= scripts
+            result.append(common)
+        sets = result
+
+    result = {tuple(sorted(scripts)): [] for scripts in sets}
+    for scripts, pairs in kerningPerScript.items():
+        for scripts2 in sets:
+            if scripts2 & set(scripts):
+                result[tuple(sorted(scripts2))].extend(pairs)
+                break
+    return result
+
+
 def makeAllGlyphClassDefinitions(kerningPerScript, context, feaFile=None):
     # Note: Refer to the context for existing classDefs and mappings of glyph
     # class tuples to feaLib AST to avoid overwriting existing class names,
@@ -931,9 +993,10 @@ def makeAllGlyphClassDefinitions(kerningPerScript, context, feaFile=None):
 
     # Generate common class names first so that common classes are correctly
     # named in other lookups.
-    if COMMON_SCRIPT in kerningPerScript:
-        common_pairs = kerningPerScript[COMMON_SCRIPT]
-        for pair in common_pairs:
+    for scripts, pairs in kerningPerScript.items():
+        if set(scripts) != COMMON_SCRIPTS_SET:
+            continue
+        for pair in pairs:
             if (
                 pair.firstIsClass
                 and pair.side1 not in existingSide1Classes
@@ -964,9 +1027,10 @@ def makeAllGlyphClassDefinitions(kerningPerScript, context, feaFile=None):
                 )
 
     sortedKerningPerScript = sorted(kerningPerScript.items())
-    for script, pairs in sortedKerningPerScript:
-        if script == COMMON_SCRIPT:
+    for scripts, pairs in sortedKerningPerScript:
+        if set(scripts) == COMMON_SCRIPTS_SET:
             continue
+        script = "_".join(scripts).replace(COMMON_SCRIPT, COMMON_CLASS_NAME)
         for pair in pairs:
             if (
                 pair.firstIsClass

From 68d49a8d0a4bc937662016f8ef58b7441877dbbc Mon Sep 17 00:00:00 2001
From: Khaled Hosny <khaled@aliftype.com>
Date: Sun, 21 Jan 2024 19:21:24 +0200
Subject: [PATCH 2/8] Update test expectations

---
 .../featureWriters/kernFeatureWriter_test.py  | 129 ++++++------------
 1 file changed, 45 insertions(+), 84 deletions(-)

diff --git a/tests/featureWriters/kernFeatureWriter_test.py b/tests/featureWriters/kernFeatureWriter_test.py
index 9653c3bb..84624a31 100644
--- a/tests/featureWriters/kernFeatureWriter_test.py
+++ b/tests/featureWriters/kernFeatureWriter_test.py
@@ -595,29 +595,23 @@ def test_arabic_numerals(self, FontClass):
 
         assert dedent(str(generated)) == dedent(
             """\
-            lookup kern_Arab {
-                lookupflag IgnoreMarks;
-                pos four-ar seven-ar -30;
-            } kern_Arab;
-
-            lookup kern_Thaa {
+            lookup kern_Arab_Thaa {
                 lookupflag IgnoreMarks;
                 pos four-ar seven-ar -30;
-            } kern_Thaa;
+            } kern_Arab_Thaa;
 
             feature kern {
                 script DFLT;
                 language dflt;
-                lookup kern_Arab;
-                lookup kern_Thaa;
+                lookup kern_Arab_Thaa;
 
                 script arab;
                 language dflt;
-                lookup kern_Arab;
+                lookup kern_Arab_Thaa;
 
                 script thaa;
                 language dflt;
-                lookup kern_Thaa;
+                lookup kern_Arab_Thaa;
             } kern;
             """
         )
@@ -1496,74 +1490,55 @@ def test_kern_split_and_drop(FontClass, caplog):
 
     assert dedent(str(newFeatures)) == dedent(
         """\
-        @kern1.Grek.bar = [period];
-        @kern1.Grek.foo = [alpha];
-        @kern1.Latn.foo = [a];
-        @kern1.Orya.foo = [a-orya];
-        @kern2.Grek.bar = [period];
-        @kern2.Grek.foo = [alpha];
-        @kern2.Latn.foo = [a];
-        @kern2.Orya.foo = [a-orya];
-
-        lookup kern_Grek {
-            lookupflag IgnoreMarks;
-            pos @kern1.Grek.foo @kern2.Grek.bar 20;
-            pos @kern1.Grek.bar @kern2.Grek.foo 20;
-        } kern_Grek;
-
-        lookup kern_Latn {
+        @kern1.Cyrl_Grek_Latn_Orya.bar = [a-cy];
+        @kern1.Cyrl_Grek_Latn_Orya.bar_1 = [period];
+        @kern1.Cyrl_Grek_Latn_Orya.foo = [a a-orya alpha];
+        @kern2.Cyrl_Grek_Latn_Orya.bar = [a-cy];
+        @kern2.Cyrl_Grek_Latn_Orya.bar_1 = [period];
+        @kern2.Cyrl_Grek_Latn_Orya.foo = [a a-orya alpha];
+
+        lookup kern_Cyrl_Grek_Latn_Orya {
             lookupflag IgnoreMarks;
-            pos @kern1.Latn.foo @kern2.Grek.bar 20;
-            pos @kern1.Grek.bar @kern2.Latn.foo 20;
-        } kern_Latn;
-
-        lookup kern_Orya {
-            lookupflag IgnoreMarks;
-            pos @kern1.Orya.foo @kern2.Grek.bar 20;
-            pos @kern1.Grek.bar @kern2.Orya.foo 20;
-        } kern_Orya;
+            pos @kern1.Cyrl_Grek_Latn_Orya.foo @kern2.Cyrl_Grek_Latn_Orya.bar 20;
+            pos @kern1.Cyrl_Grek_Latn_Orya.foo @kern2.Cyrl_Grek_Latn_Orya.bar_1 20;
+            pos @kern1.Cyrl_Grek_Latn_Orya.bar @kern2.Cyrl_Grek_Latn_Orya.foo 20;
+            pos @kern1.Cyrl_Grek_Latn_Orya.bar_1 @kern2.Cyrl_Grek_Latn_Orya.foo 20;
+        } kern_Cyrl_Grek_Latn_Orya;
 
         feature kern {
             script DFLT;
             language dflt;
-            lookup kern_Grek;
-            lookup kern_Latn;
+            lookup kern_Cyrl_Grek_Latn_Orya;
+
+            script cyrl;
+            language dflt;
+            lookup kern_Cyrl_Grek_Latn_Orya;
 
             script grek;
             language dflt;
-            lookup kern_Grek;
+            lookup kern_Cyrl_Grek_Latn_Orya;
 
             script latn;
             language dflt;
-            lookup kern_Latn;
+            lookup kern_Cyrl_Grek_Latn_Orya;
         } kern;
 
         feature dist {
             script ory2;
             language dflt;
-            lookup kern_Orya;
+            lookup kern_Cyrl_Grek_Latn_Orya;
 
             script orya;
             language dflt;
-            lookup kern_Orya;
+            lookup kern_Cyrl_Grek_Latn_Orya;
         } dist;
         """
     )
 
-    msgs = sorted(msg[-30:] for msg in caplog.messages)
+    msgs = sorted(msg[-31:] for msg in caplog.messages)
     assert msgs == [
-        "with mixed script (Arab, Grek)",
-        "with mixed script (Arab, Latn)",
-        "with mixed script (Arab, Orya)",
-        "with mixed script (Cyrl, Grek)",
-        "with mixed script (Cyrl, Latn)",
-        "with mixed script (Cyrl, Orya)",
-        "with mixed script (Grek, Arab)",
-        "with mixed script (Grek, Cyrl)",
-        "with mixed script (Latn, Arab)",
-        "with mixed script (Latn, Cyrl)",
-        "with mixed script (Orya, Arab)",
-        "with mixed script (Orya, Cyrl)",
+        "with mixed direction (LTR, RTL)",
+        "with mixed direction (RTL, LTR)",
     ]
 
 
@@ -1598,7 +1573,7 @@ def test_kern_split_and_drop_mixed(caplog, FontClass):
         """
     )
     assert (
-        "Skipping kerning pair <('V', 'W') ('W', 'gba-nko') -20> with mixed script (Latn, Nkoo)"
+        "Skipping kerning pair <('V', 'W') ('W', 'gba-nko') -20> with mixed direction (LTR, RTL)"
         in caplog.text
     )
 
@@ -1681,33 +1656,26 @@ def test_kern_multi_script(FontClass):
 
     assert dedent(str(newFeatures)) == dedent(
         """\
-        @kern1.Arab.foo = [lam-ar];
-        @kern1.Nkoo.foo = [gba-nko];
-        @kern2.Arab.foo = [comma-ar];
-
-        lookup kern_Arab {
-            lookupflag IgnoreMarks;
-            pos @kern1.Arab.foo @kern2.Arab.foo <-20 0 -20 0>;
-        } kern_Arab;
+        @kern1.Arab_Nkoo.foo = [gba-nko lam-ar];
+        @kern2.Arab_Nkoo.foo = [comma-ar];
 
-        lookup kern_Nkoo {
+        lookup kern_Arab_Nkoo {
             lookupflag IgnoreMarks;
-            pos @kern1.Nkoo.foo @kern2.Arab.foo <-20 0 -20 0>;
-        } kern_Nkoo;
+            pos @kern1.Arab_Nkoo.foo @kern2.Arab_Nkoo.foo <-20 0 -20 0>;
+        } kern_Arab_Nkoo;
 
         feature kern {
             script DFLT;
             language dflt;
-            lookup kern_Arab;
-            lookup kern_Nkoo;
+            lookup kern_Arab_Nkoo;
 
             script arab;
             language dflt;
-            lookup kern_Arab;
+            lookup kern_Arab_Nkoo;
 
             script nko;
             language dflt;
-            lookup kern_Nkoo;
+            lookup kern_Arab_Nkoo;
         } kern;
         """
     )
@@ -1837,13 +1805,14 @@ def test_kern_zyyy_zinh(FontClass):
             pos uni1DC0 uni1DC0 6;
         } kern_Grek;
 
-        lookup kern_Hani {
+        lookup kern_Hani_Hrkt {
             lookupflag IgnoreMarks;
             pos uni1D360 uni1D360 37;
             pos uni1D370 uni1D370 38;
             pos uni1F250 uni1F250 39;
             pos uni3010 uni3010 8;
             pos uni3030 uni3030 9;
+            pos uni30A0 uni30A0 10;
             pos uni3190 uni3190 11;
             pos uni31C0 uni31C0 12;
             pos uni31D0 uni31D0 13;
@@ -1861,15 +1830,8 @@ def test_kern_zyyy_zinh(FontClass):
             pos uni33E0 uni33E0 25;
             pos uni33F0 uni33F0 26;
             pos uniA700 uniA700 27;
-        } kern_Hani;
-
-        lookup kern_Hrkt {
-            lookupflag IgnoreMarks;
-            pos uni3010 uni3010 8;
-            pos uni3030 uni3030 9;
-            pos uni30A0 uni30A0 10;
             pos uniFF70 uniFF70 29;
-        } kern_Hrkt;
+        } kern_Hani_Hrkt;
 
         lookup kern_Default {
             lookupflag IgnoreMarks;
@@ -1889,8 +1851,7 @@ def test_kern_zyyy_zinh(FontClass):
             language dflt;
             lookup kern_Default;
             lookup kern_Grek;
-            lookup kern_Hani;
-            lookup kern_Hrkt;
+            lookup kern_Hani_Hrkt;
 
             script grek;
             language dflt;
@@ -1900,12 +1861,12 @@ def test_kern_zyyy_zinh(FontClass):
             script hani;
             language dflt;
             lookup kern_Default;
-            lookup kern_Hani;
+            lookup kern_Hani_Hrkt;
 
             script kana;
             language dflt;
             lookup kern_Default;
-            lookup kern_Hrkt;
+            lookup kern_Hani_Hrkt;
         } kern;
 
         feature dist {

From eba8fab15a074782802d3228c19525c19266699c Mon Sep 17 00:00:00 2001
From: Khaled Hosny <khaled@aliftype.com>
Date: Sun, 21 Jan 2024 19:41:01 +0200
Subject: [PATCH 3/8] Re-use helper function

---
 .../featureWriters/kernFeatureWriter.py       | 21 +++++++++----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py
index d5def86f..9d5eea07 100644
--- a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py
+++ b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py
@@ -60,6 +60,12 @@ def unicodeBidiType(uv):
         return None
 
 
+def script_direction(script: str) -> str:
+    if script == COMMON_SCRIPT:
+        return "Auto"
+    return script_horizontal_direction(script, "LTR")
+
+
 @dataclass(frozen=True, order=False)
 class KerningPair:
     __slots__ = ("side1", "side2", "value")
@@ -723,9 +729,7 @@ def _makeSplitScriptKernLookups(self, lookups, pairs, ignoreMarks=True, suffix="
                         pair.value,
                     )
                     continue
-                directions = {
-                    script_horizontal_direction(script, "LTR") for script in scripts
-                }
+                directions = {script_direction(script) for script in scripts}
                 assert len(directions) == 1
                 scriptIsRtl = directions == {"RTL"}
                 # Numbers are always shaped LTR even in RTL scripts:
@@ -785,10 +789,10 @@ def _registerLookups(
             lookupsLTR: list[ast.LookupBlock] = []
             lookupsRTL: list[ast.LookupBlock] = []
             for script, scriptLookups in sorted(lookups.items()):
-                if script != COMMON_SCRIPT and script not in DIST_ENABLED_SCRIPTS:
-                    if script_horizontal_direction(script, "LTR") == "LTR":
+                if script not in DIST_ENABLED_SCRIPTS:
+                    if script_direction(script) == "LTR":
                         lookupsLTR.extend(scriptLookups.values())
-                    elif script_horizontal_direction(script, "LTR") == "RTL":
+                    elif script_direction(script) == "RTL":
                         lookupsRTL.extend(scriptLookups.values())
             dfltLookups.extend(
                 lkp for lkp in (lookupsLTR or lookupsRTL) if lkp not in dfltLookups
@@ -860,11 +864,6 @@ def partitionByScript(
     """Split a potentially mixed-script pair into pairs that make sense based
     on the dominant script, and yield each combination with its dominant script."""
 
-    def script_direction(script: str) -> str:
-        if script == COMMON_SCRIPT:
-            return "Auto"
-        return script_horizontal_direction(script, "LTR")
-
     side1Directions: dict[str, set[str]] = {}
     side2Directions: dict[str, set[str]] = {}
     resolvedScripts: dict[str, set[str]] = {}

From d5db468ac96abb8d3cf90dbaf57ca63021c7b327 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D8=AE=D8=A7=D9=84=D8=AF=20=D8=AD=D8=B3=D9=86=D9=8A=20=28K?=
 =?UTF-8?q?haled=20Hosny=29?= <khaled@aliftype.com>
Date: Mon, 22 Jan 2024 14:26:01 +0200
Subject: [PATCH 4/8] Update Lib/ufo2ft/featureWriters/kernFeatureWriter.py

Co-authored-by: Cosimo Lupo <clupo@google.com>
---
 Lib/ufo2ft/featureWriters/kernFeatureWriter.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py
index 9d5eea07..483ca716 100644
--- a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py
+++ b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py
@@ -164,8 +164,8 @@ class KernFeatureWriter(BaseFeatureWriter):
     * Go through all kerning pairs and split them up by script, to put them in
       different lookups. This reduces the size of each lookup compared to
       splitting by direction, as previously done. If there are kerning pairs
-      with different scripts on each side, theese scripts are all kept together
-      to allow for cross-script kerning (in implmenetations that apply it).
+      with different scripts on each side, these scripts are all kept together
+      to allow for cross-script kerning (in implementations that apply it).
       Scripts with different direction are always split.
         * Partition the first and second side of a pair by script and emit only
           those with the same script (e.g. `a` and `b` are both "Latn", `period`

From 7cfa044c6ac12b86e415dc88dd13bbe8cc419d77 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D8=AE=D8=A7=D9=84=D8=AF=20=D8=AD=D8=B3=D9=86=D9=8A=20=28K?=
 =?UTF-8?q?haled=20Hosny=29?= <khaled@aliftype.com>
Date: Mon, 22 Jan 2024 14:26:21 +0200
Subject: [PATCH 5/8] Update Lib/ufo2ft/featureWriters/kernFeatureWriter.py

Co-authored-by: Cosimo Lupo <clupo@google.com>
---
 Lib/ufo2ft/featureWriters/kernFeatureWriter.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py
index 483ca716..0c0eb07f 100644
--- a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py
+++ b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py
@@ -878,9 +878,7 @@ def partitionByScript(
         if scripts & DFLT_SCRIPTS:
             scripts = COMMON_SCRIPTS_SET
         resolvedScripts[glyph] = scripts
-        scripts = sorted(scripts)
-        directions = [script_direction(script) for script in scripts]
-        for direction in directions:
+        for direction in (script_direction(script) for script in sorted(scripts)):
             side1Directions.setdefault(direction, set()).add(glyph)
     for glyph in pair.secondGlyphs:
         scripts = glyphScripts.get(glyph, DFLT_SCRIPTS)

From 98cf47aca13378ce3084be7de5a0197244225d4e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D8=AE=D8=A7=D9=84=D8=AF=20=D8=AD=D8=B3=D9=86=D9=8A=20=28K?=
 =?UTF-8?q?haled=20Hosny=29?= <khaled@aliftype.com>
Date: Mon, 22 Jan 2024 14:26:42 +0200
Subject: [PATCH 6/8] Update Lib/ufo2ft/featureWriters/kernFeatureWriter.py

Co-authored-by: Cosimo Lupo <clupo@google.com>
---
 Lib/ufo2ft/featureWriters/kernFeatureWriter.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py
index 0c0eb07f..7f81ccfb 100644
--- a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py
+++ b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py
@@ -885,9 +885,7 @@ def partitionByScript(
         if scripts & DFLT_SCRIPTS:
             scripts = COMMON_SCRIPTS_SET
         resolvedScripts[glyph] = scripts
-        scripts = sorted(scripts)
-        directions = [script_direction(script) for script in scripts]
-        for direction in directions:
+        for direction in (script_direction(script) for script in sorted(scripts)):
             side2Directions.setdefault(direction, set()).add(glyph)
 
     for side1Direction, side2Direction in itertools.product(

From 60910725bc0c52dbf7486169dd5283b9ebb4b122 Mon Sep 17 00:00:00 2001
From: Khaled Hosny <khaled@aliftype.com>
Date: Mon, 22 Jan 2024 17:11:58 +0200
Subject: [PATCH 7/8] Add a comment and assertion to make intent explicit

---
 Lib/ufo2ft/featureWriters/kernFeatureWriter.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py
index 7f81ccfb..a17ca9b4 100644
--- a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py
+++ b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py
@@ -959,12 +959,17 @@ def mergeScripts(kerningPerScript):
             result.append(common)
         sets = result
 
+    # Now that we have merged all common-script buckets, we need to re-assign
+    # the kerning pairs to the new buckets.
     result = {tuple(sorted(scripts)): [] for scripts in sets}
     for scripts, pairs in kerningPerScript.items():
         for scripts2 in sets:
             if scripts2 & set(scripts):
                 result[tuple(sorted(scripts2))].extend(pairs)
                 break
+        else:
+            # Shouldn't happen, but just in case.
+            raise AssertionError
     return result
 
 

From 327e760120e01c7c6d415ae0a438036fc364d99d Mon Sep 17 00:00:00 2001
From: Khaled Hosny <khaled@aliftype.com>
Date: Mon, 22 Jan 2024 19:17:26 +0200
Subject: [PATCH 8/8] Log merging of kerning lookups

---
 Lib/ufo2ft/featureWriters/kernFeatureWriter.py | 7 ++++++-
 tests/featureWriters/kernFeatureWriter_test.py | 8 ++++----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py
index a17ca9b4..524c4ddd 100644
--- a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py
+++ b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py
@@ -851,7 +851,12 @@ def splitKerning(pairs, glyphScripts):
 
     kerningPerScript = mergeScripts(kerningPerScript)
 
-    for pairs in kerningPerScript.values():
+    for scripts, pairs in kerningPerScript.items():
+        if len(scripts) > 1:
+            LOGGER.info(
+                "Merging kerning lookups from the following scripts: %s",
+                ", ".join(scripts),
+            )
         pairs.sort()
 
     return kerningPerScript
diff --git a/tests/featureWriters/kernFeatureWriter_test.py b/tests/featureWriters/kernFeatureWriter_test.py
index 84624a31..a4caa618 100644
--- a/tests/featureWriters/kernFeatureWriter_test.py
+++ b/tests/featureWriters/kernFeatureWriter_test.py
@@ -1535,10 +1535,10 @@ def test_kern_split_and_drop(FontClass, caplog):
         """
     )
 
-    msgs = sorted(msg[-31:] for msg in caplog.messages)
-    assert msgs == [
-        "with mixed direction (LTR, RTL)",
-        "with mixed direction (RTL, LTR)",
+    assert caplog.messages == [
+        "Skipping kerning pair <('a', 'a-orya', 'alpha') ('a-cy', 'alef-ar', 'period') 20> with mixed direction (LTR, RTL)",
+        "Skipping kerning pair <('a-cy', 'alef-ar', 'period') ('a', 'a-orya', 'alpha') 20> with mixed direction (RTL, LTR)",
+        "Merging kerning lookups from the following scripts: Cyrl, Grek, Latn, Orya",
     ]