Skip to content

Commit

Permalink
Improve mark writer for character used in multiple scripts (#678)
Browse files Browse the repository at this point in the history
* Improve mark writer for character used in multiple scripts

If a character is used in multiple scripts, and on of these scripts uses
e.g. USE shaper, the mark writer will write its anchors in abvm/blwm
features exclusively. This breaks the mark attachment if the shaper of
the indented script does not enable abvm/blwm features.

This is a partial fix, by looking into the language systems declared in
the feature file and filtering out abvm scripts not in them.

Part of #579

* Write mark and abvm/blwm features for glyphs that need both

Some glyphs (e.g. kashida-ar) can be used in scripts that require mark
feature and scripts that require abvm/blwm features, but we were writing
its mark attachment only in the later. Now we write it in both.

Fixes #579

* Minor
  • Loading branch information
khaledhosny authored Nov 25, 2022
1 parent 9b50177 commit 1c33aac
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 15 deletions.
56 changes: 41 additions & 15 deletions Lib/ufo2ft/featureWriters/markFeatureWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from functools import partial

from fontTools.misc.fixedTools import otRound
from fontTools.unicodedata import script_extension

from ufo2ft.constants import INDIC_SCRIPTS, USE_SCRIPTS
from ufo2ft.featureWriters import BaseFeatureWriter, ast
Expand Down Expand Up @@ -300,6 +301,7 @@ def setContext(self, font, feaFile, compiler=None):
ctx.gdefClasses = self.getGDEFGlyphClasses()
ctx.anchorLists = self._getAnchorLists()
ctx.anchorPairs = self._getAnchorPairs()
ctx.feaScripts = set(ast.getScriptLanguageSystems(feaFile).keys())

def shouldContinue(self):
if not self.context.anchorPairs:
Expand Down Expand Up @@ -822,13 +824,13 @@ def _makeFeatures(self):
)
ctx.markToMarkAttachments = self._makeMarkToMarkAttachments()

abvmGlyphs = self._getAbvmGlyphs()
abvmGlyphs, notAbvmGlyphs = self._getAbvmGlyphs()

def isAbvm(glyphName):
return glyphName in abvmGlyphs

def isNotAbvm(glyphName):
return glyphName not in abvmGlyphs
return glyphName in notAbvmGlyphs

features = {}
todo = ctx.todo
Expand All @@ -852,19 +854,43 @@ def isNotAbvm(glyphName):
return features

def _getAbvmGlyphs(self):
cmap = self.makeUnicodeToGlyphNameMapping()
unicodeIsAbvm = partial(unicodeInScripts, scripts=self.scriptsUsingAbvm)
if any(unicodeIsAbvm(uv) for uv in cmap):
# If there are any characters from Indic/USE/Khmer scripts in the cmap, we
# compile a temporary GSUB table to resolve substitutions and get
# the set of all the relevant glyphs, including alternate glyphs.
gsub = self.compileGSUB()
glyphGroups = classifyGlyphs(unicodeIsAbvm, cmap, gsub)
# the 'glyphGroups' dict is keyed by the return value of the
# classifying include, so here 'True' means all the Indic/USE/Khmer glyphs
return glyphGroups.get(True, set())
else:
return set()
glyphSet = set(self.getOrderedGlyphSet().keys())
scriptsUsingAbvm = self.scriptsUsingAbvm
if self.context.feaScripts:
# https://github.com/googlefonts/ufo2ft/issues/579 Some characters
# can be used in multiple scripts and some of these scripts might
# need an abvm feature and some might not, so we filter-out the
# abvm scripts that the font does not intend to support.
scriptsUsingAbvm = scriptsUsingAbvm & self.context.feaScripts
if scriptsUsingAbvm:
cmap = self.makeUnicodeToGlyphNameMapping()
unicodeIsAbvm = partial(unicodeInScripts, scripts=scriptsUsingAbvm)

def unicodeIsNotAbvm(uv):
return bool(script_extension(chr(uv)) - self.scriptsUsingAbvm)

if any(unicodeIsAbvm(uv) for uv in cmap):
# If there are any characters from Indic/USE/Khmer scripts in
# the cmap, we compile a temporary GSUB table to resolve
# substitutions and get the set of all the relevant glyphs,
# including alternate glyphs.
gsub = self.compileGSUB()
glyphGroups = classifyGlyphs(unicodeIsAbvm, cmap, gsub)
# the 'glyphGroups' dict is keyed by the return value of the
# classifying include, so here 'True' means all the
# Indic/USE/Khmer glyphs
abvmGlyphs = glyphGroups.get(True, set())

# If a character can be used in Indic/USE/Khmer scripts as well
# as other scripts, we want to return it in both 'abvmGlyphs'
# (done above) and 'notAbvmGlyphs' (done below) sets.
glyphGroups = classifyGlyphs(unicodeIsNotAbvm, cmap, gsub)
notAbvmGlyphs = glyphGroups.get(True, set())
# Since cmap might not cover all glyphs, we union with the
# glyph set.
notAbvmGlyphs |= glyphSet - abvmGlyphs
return abvmGlyphs, notAbvmGlyphs
return set(), glyphSet

def _write(self):
self._pruneUnusedAnchors()
Expand Down
95 changes: 95 additions & 0 deletions tests/featureWriters/markFeatureWriter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -824,6 +824,95 @@ def test_abvm_blwm_features(self, FontClass):
""" # noqa: B950
)

def test_shared_script_char(self, FontClass):
ufo = FontClass()
ufo.info.unitsPerEm = 1000

dottedCircle = ufo.newGlyph("kashida-ar")
dottedCircle.unicode = 0x0640
dottedCircle.anchors = [
{"name": "top", "x": 100, "y": 100},
{"name": "bottom", "x": 100, "y": -100},
]

nukta = ufo.newGlyph("fatha-ar")
nukta.unicode = 0x064E
nukta.appendAnchor({"name": "_top", "x": 0, "y": 0})

nukta = ufo.newGlyph("kasra-ar")
nukta.unicode = 0x0650
nukta.appendAnchor({"name": "_bottom", "x": 0, "y": 547})

ufo.features.text = dedent(
"""\
languagesystem DFLT dflt;
languagesystem arab dflt;
"""
)
generated = self.writeFeatures(ufo)

assert str(generated) == dedent(
"""\
markClass kasra-ar <anchor 0 547> @MC_bottom;
markClass fatha-ar <anchor 0 0> @MC_top;
feature mark {
lookup mark2base {
pos base kashida-ar
<anchor 100 -100> mark @MC_bottom
<anchor 100 100> mark @MC_top;
} mark2base;
} mark;
""" # noqa: B950
)

expected = dedent(
"""\
markClass kasra-ar <anchor 0 547> @MC_bottom;
markClass fatha-ar <anchor 0 0> @MC_top;
feature abvm {
lookup abvm_mark2base {
pos base kashida-ar
<anchor 100 100> mark @MC_top;
} abvm_mark2base;
} abvm;
feature blwm {
lookup blwm_mark2base {
pos base kashida-ar
<anchor 100 -100> mark @MC_bottom;
} blwm_mark2base;
} blwm;
feature mark {
lookup mark2base {
pos base kashida-ar
<anchor 100 -100> mark @MC_bottom
<anchor 100 100> mark @MC_top;
} mark2base;
} mark;
""" # noqa: B950
)

ufo.features.text = ""
generated = self.writeFeatures(ufo)
assert str(generated) == expected

ufo.features.text = dedent(
"""\
languagesystem DFLT dflt;
languagesystem arab dflt;
languagesystem adlm dflt;
"""
)
generated = self.writeFeatures(ufo)
assert str(generated) == expected

def test_all_features(self, testufo):
ufo = testufo
ufo.info.unitsPerEm = 1000
Expand Down Expand Up @@ -889,6 +978,12 @@ def test_all_features(self, testufo):
"foo_bar_baz",
"bar_foo",
]
ufo.features.text = dedent(
"""\
languagesystem DFLT dflt;
languagesystem taml dflt;
"""
)
generated = self.writeFeatures(testufo)

assert str(generated) == dedent(
Expand Down

0 comments on commit 1c33aac

Please sign in to comment.