Improve mark writer for character used in multiple scripts (#678)

* Improve mark writer for character used in multiple scripts If a character is used in multiple scripts, and on of these scripts uses e.g. USE shaper, the mark writer will write its anchors in abvm/blwm features exclusively. This breaks the mark attachment if the shaper of the indented script does not enable abvm/blwm features. This is a partial fix, by looking into the language systems declared in the feature file and filtering out abvm scripts not in them. Part of #579 * Write mark and abvm/blwm features for glyphs that need both Some glyphs (e.g. kashida-ar) can be used in scripts that require mark feature and scripts that require abvm/blwm features, but we were writing its mark attachment only in the later. Now we write it in both. Fixes #579 * Minor
googlefonts · Nov 25, 2022 · 1c33aac · 1c33aac
1 parent 9b50177
commit 1c33aac
Show file tree

Hide file tree

Showing 2 changed files with 136 additions and 15 deletions.
diff --git a/Lib/ufo2ft/featureWriters/markFeatureWriter.py b/Lib/ufo2ft/featureWriters/markFeatureWriter.py
@@ -4,6 +4,7 @@
 from functools import partial
 
 from fontTools.misc.fixedTools import otRound
+from fontTools.unicodedata import script_extension
 
 from ufo2ft.constants import INDIC_SCRIPTS, USE_SCRIPTS
 from ufo2ft.featureWriters import BaseFeatureWriter, ast
@@ -300,6 +301,7 @@ def setContext(self, font, feaFile, compiler=None):
         ctx.gdefClasses = self.getGDEFGlyphClasses()
         ctx.anchorLists = self._getAnchorLists()
         ctx.anchorPairs = self._getAnchorPairs()
+        ctx.feaScripts = set(ast.getScriptLanguageSystems(feaFile).keys())
 
     def shouldContinue(self):
         if not self.context.anchorPairs:
@@ -822,13 +824,13 @@ def _makeFeatures(self):
         )
         ctx.markToMarkAttachments = self._makeMarkToMarkAttachments()
 
-        abvmGlyphs = self._getAbvmGlyphs()
+        abvmGlyphs, notAbvmGlyphs = self._getAbvmGlyphs()
 
         def isAbvm(glyphName):
             return glyphName in abvmGlyphs
 
         def isNotAbvm(glyphName):
-            return glyphName not in abvmGlyphs
+            return glyphName in notAbvmGlyphs
 
         features = {}
         todo = ctx.todo
@@ -852,19 +854,43 @@ def isNotAbvm(glyphName):
         return features
 
     def _getAbvmGlyphs(self):
-        cmap = self.makeUnicodeToGlyphNameMapping()
-        unicodeIsAbvm = partial(unicodeInScripts, scripts=self.scriptsUsingAbvm)
-        if any(unicodeIsAbvm(uv) for uv in cmap):
-            # If there are any characters from Indic/USE/Khmer scripts in the cmap, we
-            # compile a temporary GSUB table to resolve substitutions and get
-            # the set of all the relevant glyphs, including alternate glyphs.
-            gsub = self.compileGSUB()
-            glyphGroups = classifyGlyphs(unicodeIsAbvm, cmap, gsub)
-            # the 'glyphGroups' dict is keyed by the return value of the
-            # classifying include, so here 'True' means all the Indic/USE/Khmer glyphs
-            return glyphGroups.get(True, set())
-        else:
-            return set()
+        glyphSet = set(self.getOrderedGlyphSet().keys())
+        scriptsUsingAbvm = self.scriptsUsingAbvm
+        if self.context.feaScripts:
+            # https://github.com/googlefonts/ufo2ft/issues/579 Some characters
+            # can be used in multiple scripts and some of these scripts might
+            # need an abvm feature and some might not, so we filter-out the
+            # abvm scripts that the font does not intend to support.
+            scriptsUsingAbvm = scriptsUsingAbvm & self.context.feaScripts
+        if scriptsUsingAbvm:
+            cmap = self.makeUnicodeToGlyphNameMapping()
+            unicodeIsAbvm = partial(unicodeInScripts, scripts=scriptsUsingAbvm)
+
+            def unicodeIsNotAbvm(uv):
+                return bool(script_extension(chr(uv)) - self.scriptsUsingAbvm)
+
+            if any(unicodeIsAbvm(uv) for uv in cmap):
+                # If there are any characters from Indic/USE/Khmer scripts in
+                # the cmap, we compile a temporary GSUB table to resolve
+                # substitutions and get the set of all the relevant glyphs,
+                # including alternate glyphs.
+                gsub = self.compileGSUB()
+                glyphGroups = classifyGlyphs(unicodeIsAbvm, cmap, gsub)
+                # the 'glyphGroups' dict is keyed by the return value of the
+                # classifying include, so here 'True' means all the
+                # Indic/USE/Khmer glyphs
+                abvmGlyphs = glyphGroups.get(True, set())
+
+                # If a character can be used in Indic/USE/Khmer scripts as well
+                # as other scripts, we want to return it in both 'abvmGlyphs'
+                # (done above) and 'notAbvmGlyphs' (done below) sets.
+                glyphGroups = classifyGlyphs(unicodeIsNotAbvm, cmap, gsub)
+                notAbvmGlyphs = glyphGroups.get(True, set())
+                # Since cmap might not cover all glyphs, we union with the
+                # glyph set.
+                notAbvmGlyphs |= glyphSet - abvmGlyphs
+                return abvmGlyphs, notAbvmGlyphs
+        return set(), glyphSet
 
     def _write(self):
         self._pruneUnusedAnchors()

diff --git a/tests/featureWriters/markFeatureWriter_test.py b/tests/featureWriters/markFeatureWriter_test.py
@@ -824,6 +824,95 @@ def test_abvm_blwm_features(self, FontClass):
             """  # noqa: B950
         )
 
+    def test_shared_script_char(self, FontClass):
+        ufo = FontClass()
+        ufo.info.unitsPerEm = 1000
+
+        dottedCircle = ufo.newGlyph("kashida-ar")
+        dottedCircle.unicode = 0x0640
+        dottedCircle.anchors = [
+            {"name": "top", "x": 100, "y": 100},
+            {"name": "bottom", "x": 100, "y": -100},
+        ]
+
+        nukta = ufo.newGlyph("fatha-ar")
+        nukta.unicode = 0x064E
+        nukta.appendAnchor({"name": "_top", "x": 0, "y": 0})
+
+        nukta = ufo.newGlyph("kasra-ar")
+        nukta.unicode = 0x0650
+        nukta.appendAnchor({"name": "_bottom", "x": 0, "y": 547})
+
+        ufo.features.text = dedent(
+            """\
+            languagesystem DFLT dflt;
+            languagesystem arab dflt;
+            """
+        )
+        generated = self.writeFeatures(ufo)
+
+        assert str(generated) == dedent(
+            """\
+            markClass kasra-ar <anchor 0 547> @MC_bottom;
+            markClass fatha-ar <anchor 0 0> @MC_top;
+
+            feature mark {
+                lookup mark2base {
+                    pos base kashida-ar
+                        <anchor 100 -100> mark @MC_bottom
+                        <anchor 100 100> mark @MC_top;
+                } mark2base;
+
+            } mark;
+            """  # noqa: B950
+        )
+
+        expected = dedent(
+            """\
+            markClass kasra-ar <anchor 0 547> @MC_bottom;
+            markClass fatha-ar <anchor 0 0> @MC_top;
+
+            feature abvm {
+                lookup abvm_mark2base {
+                    pos base kashida-ar
+                        <anchor 100 100> mark @MC_top;
+                } abvm_mark2base;
+
+            } abvm;
+
+            feature blwm {
+                lookup blwm_mark2base {
+                    pos base kashida-ar
+                        <anchor 100 -100> mark @MC_bottom;
+                } blwm_mark2base;
+
+            } blwm;
+
+            feature mark {
+                lookup mark2base {
+                    pos base kashida-ar
+                        <anchor 100 -100> mark @MC_bottom
+                        <anchor 100 100> mark @MC_top;
+                } mark2base;
+
+            } mark;
+            """  # noqa: B950
+        )
+
+        ufo.features.text = ""
+        generated = self.writeFeatures(ufo)
+        assert str(generated) == expected
+
+        ufo.features.text = dedent(
+            """\
+            languagesystem DFLT dflt;
+            languagesystem arab dflt;
+            languagesystem adlm dflt;
+            """
+        )
+        generated = self.writeFeatures(ufo)
+        assert str(generated) == expected
+
     def test_all_features(self, testufo):
         ufo = testufo
         ufo.info.unitsPerEm = 1000
@@ -889,6 +978,12 @@ def test_all_features(self, testufo):
             "foo_bar_baz",
             "bar_foo",
         ]
+        ufo.features.text = dedent(
+            """\
+            languagesystem DFLT dflt;
+            languagesystem taml dflt;
+            """
+        )
         generated = self.writeFeatures(testufo)
 
         assert str(generated) == dedent(