From 573565e2a071f4296056b6b343e4456234ae39f2 Mon Sep 17 00:00:00 2001
From: Khaled Hosny <khaled@aliftype.com>
Date: Thu, 5 Sep 2024 21:15:00 +0300
Subject: [PATCH 1/2] markFeatureWriter: Support contextual ligature anchors

The current code was assuming all contextual anchors are mark-to-base
and was producing wrong output for ligature anchors. With this change,
contextual ligature anchors are properly supported.
---
 .../featureWriters/markFeatureWriter.py       | 199 +++++++++++-------
 .../featureWriters/markFeatureWriter_test.py  | 126 ++++++++++-
 2 files changed, 241 insertions(+), 84 deletions(-)

diff --git a/Lib/ufo2ft/featureWriters/markFeatureWriter.py b/Lib/ufo2ft/featureWriters/markFeatureWriter.py
index 1acf65d1..6816beb4 100644
--- a/Lib/ufo2ft/featureWriters/markFeatureWriter.py
+++ b/Lib/ufo2ft/featureWriters/markFeatureWriter.py
@@ -725,6 +725,29 @@ def _makeMarkToLigaAttachments(self):
             result.append(MarkToLigaPos(glyphName, ligatureMarks))
         return result
 
+    def _makeContextualAttachments(self, glyphClass, liga=False):
+        ctx = self.context
+        result = defaultdict(list)
+        markGlyphNames = ctx.markGlyphNames
+        for glyphName, anchors in sorted(ctx.anchorLists.items()):
+            if glyphName in markGlyphNames:
+                continue
+            if glyphClass and glyphName not in glyphClass:
+                continue
+            for anchor in anchors:
+                # Skip non-contextual anchors
+                if not anchor.isContextual:
+                    continue
+                # If we are building the mark2liga lookup, skip anchors without a number
+                if liga and anchor.number is None:
+                    continue
+                # If we are building the mark2base lookup, skip anchors with a number
+                if not liga and anchor.number is not None:
+                    continue
+                anchor_context = anchor.libData["GPOS_Context"].strip()
+                result[anchor_context].append((glyphName, anchor))
+        return result
+
     @staticmethod
     def _iterAttachments(attachments, include=None, marksFilter=None):
         for pos in attachments:
@@ -778,6 +801,7 @@ def _makeMarkToMarkLookup(
         return lkp
 
     def _makeMarkFeature(self, include):
+        # First make the non-contextual lookups
         baseLkps = []
         for attachments in self.context.groupedMarkToBaseAttachments:
             i = len(baseLkps)
@@ -794,100 +818,113 @@ def _makeMarkFeature(self, include):
             )
             if lookup:
                 ligaLkps.append(lookup)
-        if not baseLkps and not ligaLkps:
-            return
 
-        feature = ast.FeatureBlock("mark")
-        for baseLkp in baseLkps:
-            feature.statements.append(baseLkp)
-        for ligaLkp in ligaLkps:
-            feature.statements.append(ligaLkp)
-        return feature
-
-    def _makeContextualMarkFeature(self, feature):
-        ctx = self.context
-
-        # Arrange by context
-        by_context = defaultdict(list)
-        markGlyphNames = ctx.markGlyphNames
-
-        for glyphName, anchors in sorted(ctx.anchorLists.items()):
-            if glyphName in markGlyphNames:
-                continue
-            for anchor in anchors:
-                if not anchor.isContextual:
-                    continue
-                anchor_context = anchor.libData["GPOS_Context"].strip()
-                by_context[anchor_context].append((glyphName, anchor))
-        if not by_context:
-            return feature, []
-
-        if feature is None:
-            feature = ast.FeatureBlock("mark")
-
-        # Pull the lookups from the feature and replace them with lookup references,
-        # to ensure the order is correct
-        lookups = feature.statements
-        feature.statements = [ast.LookupReferenceStatement(lu) for lu in lookups]
-        dispatch_lookups = {}
+        # Then make the contextual ones
+        refLkps = []
+        ctxLkps = {}
         # We sort the full context by longest first. This isn't perfect
         # but it gives us the best chance that more specific contexts
         # (typically longer) will take precedence over more general ones.
-        for ix, (fullcontext, glyph_anchor_pair) in enumerate(
-            sorted(by_context.items(), key=lambda x: -len(x[0]))
+        for context, glyph_anchor_pair in sorted(
+            self.context.contextualMarkToBaseAnchors.items(), key=lambda x: -len(x[0])
+        ):
+            # Group by anchor
+            attachments = defaultdict(list)
+            for glyphName, anchor in glyph_anchor_pair:
+                attachments[anchor.key].append(MarkToBasePos(glyphName, [anchor]))
+            self._makeContextualMarkLookup(
+                attachments,
+                context,
+                refLkps,
+                ctxLkps,
+            )
+
+        for context, glyph_anchor_pair in sorted(
+            self.context.contextualMarkToLigaAnchors.items(), key=lambda x: -len(x[0])
         ):
-            # Make the contextual lookup
-            lookupname = "ContextualMark_%i" % ix
+            # Group by anchor
+            attachments = defaultdict(list)
+            for glyphName, anchor in glyph_anchor_pair:
+                marks = [[]] * max(
+                    a.number
+                    for a in self.context.anchorLists[glyphName]
+                    if a.key and a.number is not None
+                )
+                marks[anchor.number - 1] = [anchor]
+                attachments[anchor.key].append(MarkToLigaPos(glyphName, marks))
+            self._makeContextualMarkLookup(
+                attachments,
+                context,
+                refLkps,
+                ctxLkps,
+            )
+
+        ctxLkps = list(ctxLkps.values())
+        if not baseLkps and not ligaLkps and not ctxLkps:
+            return None, []
+
+        feature = ast.FeatureBlock("mark")
+        if ctxLkps:
+            # When we have contextual lookups, we need to make sure that the
+            # contextual and non-contextual lookups are in the right order
+            # and we can’t use nested lookups inside the feature block for
+            # the referenced lookups, so we put all lookups outside the feature
+            # and use lookup references instead.
+            # We should probably always do this, as nested lookups are full of
+            # gotchas, but this will require updating many test expectations.
+            lookups = baseLkps + ligaLkps + refLkps + ctxLkps
+            for lookup in baseLkps + ligaLkps + ctxLkps:
+                feature.statements.append(ast.LookupReferenceStatement(lookup))
+        else:
+            lookups = []
+            for lookup in baseLkps + ligaLkps:
+                feature.statements.append(lookup)
+        return feature, lookups
+
+    def _makeContextualMarkLookup(
+        self,
+        attachments,
+        fullcontext,
+        refLkps,
+        ctxLkps,
+    ):
+        for anchorKey, statements in attachments.items():
+            # First make the contextual lookup
             if ";" in fullcontext:
                 before, after = fullcontext.split(";")
-                # I know it's not really a comment but this is the easiest way
-                # to get the lookup flag in there without reparsing it.
             else:
-                after = fullcontext
-                before = ""
+                before, after = "", fullcontext
             after = after.strip()
-            if before not in dispatch_lookups:
-                dispatch_lookups[before] = ast.LookupBlock(
-                    "ContextualMarkDispatch_%i" % len(dispatch_lookups.keys())
+            if before not in ctxLkps:
+                ctxLkps[before] = ast.LookupBlock(
+                    f"ContextualMarkDispatch_{len(ctxLkps)}"
                 )
                 if before:
-                    dispatch_lookups[before].statements.append(
-                        ast.Comment(f"{before};")
-                    )
-                feature.statements.append(
-                    ast.LookupReferenceStatement(dispatch_lookups[before])
-                )
-            lkp = dispatch_lookups[before]
-            lkp.statements.append(ast.Comment(f"# {after}"))
-            lookup = ast.LookupBlock(lookupname)
-            for glyph, anchor in glyph_anchor_pair:
-                lookup.statements.append(MarkToBasePos(glyph, [anchor]).asAST())
-            lookups.append(lookup)
+                    # I know it's not really a comment but this is the easiest way
+                    # to get the lookup flag in there without reparsing it.
+                    ctxLkps[before].statements.append(ast.Comment(f"{before};"))
+            ctxLkp = ctxLkps[before]
+            ctxLkp.statements.append(ast.Comment(f"# {after}"))
 
             # Insert mark glyph names after base glyph names if not specified otherwise.
             if "&" not in after:
                 after = after.replace("*", "* &")
 
-            # Group base glyphs by anchor
-            glyphs = {}
-            for glyph, anchor in glyph_anchor_pair:
-                glyphs.setdefault(anchor.key, [anchor, []])[1].append(glyph)
+            baseGlyphNames = " ".join([s.name for s in statements])
+            marks = ast.MarkClassName(self.context.markClasses[anchorKey]).asFea()
 
-            for anchor, bases in glyphs.values():
-                bases = " ".join(bases)
-                marks = ast.GlyphClass(
-                    self.context.markClasses[anchor.key].glyphs.keys()
-                ).asFea()
+            # Replace * with base glyph names
+            contextual = after.replace("*", f"[{baseGlyphNames}]")
 
-                # Replace * with base glyph names
-                contextual = after.replace("*", f"[{bases}]")
+            # Replace & with mark glyph names
+            refLkpName = f"ContextualMark_{len(refLkps)}"
+            contextual = contextual.replace("&", f"{marks}' lookup {refLkpName}")
+            ctxLkp.statements.append(ast.Comment(f"pos {contextual};"))
 
-                # Replace & with mark glyph names
-                contextual = contextual.replace("&", f"{marks}' lookup {lookupname}")
-                lkp.statements.append(ast.Comment(f"pos {contextual}; # {anchor.name}"))
-
-        lookups.extend(dispatch_lookups.values())
-        return feature, lookups
+            # Then make the non-contextual lookup it references
+            refLkp = ast.LookupBlock(refLkpName)
+            refLkp.statements = [s.asAST() for s in statements]
+            refLkps.append(refLkp)
 
     def _makeMkmkFeature(self, include):
         feature = ast.FeatureBlock("mkmk")
@@ -986,6 +1023,15 @@ def _makeFeatures(self):
         )
         ctx.markToMarkAttachments = self._makeMarkToMarkAttachments()
 
+        baseClass = self.context.gdefClasses.base
+        ctx.contextualMarkToBaseAnchors = self._makeContextualAttachments(baseClass)
+
+        ligatureClass = self.context.gdefClasses.ligature
+        ctx.contextualMarkToLigaAnchors = self._makeContextualAttachments(
+            ligatureClass,
+            True,
+        )
+
         abvmGlyphs, notAbvmGlyphs = self._getAbvmGlyphs()
 
         def isAbvm(glyphName):
@@ -998,8 +1044,7 @@ def isNotAbvm(glyphName):
         lookups = []
         todo = ctx.todo
         if "mark" in todo:
-            mark = self._makeMarkFeature(include=isNotAbvm)
-            mark, markLookups = self._makeContextualMarkFeature(mark)
+            mark, markLookups = self._makeMarkFeature(include=isNotAbvm)
             if mark is not None:
                 features["mark"] = mark
                 lookups.extend(markLookups)
diff --git a/tests/featureWriters/markFeatureWriter_test.py b/tests/featureWriters/markFeatureWriter_test.py
index a0566792..0b21dda6 100644
--- a/tests/featureWriters/markFeatureWriter_test.py
+++ b/tests/featureWriters/markFeatureWriter_test.py
@@ -1748,8 +1748,8 @@ def test_contextual_anchors(self, FontClass):
             "    lookupflag UseMarrkFilteringSet [twodotshorizontalbelow];\n"
             "    # reh-ar * behDotess-ar.medi &\n"
             "    pos reh-ar [behDotless-ar.init] behDotess-ar.medi"
-            " [dotbelow-ar twodotsverticalbelow-ar twodotshorizontalbelow-ar]'"
-            " lookup ContextualMark_0; # *bottom.twodots\n"
+            " @MC_bottom'"
+            " lookup ContextualMark_0;\n"
             "} ContextualMarkDispatch_0;\n"
         )
 
@@ -1759,8 +1759,8 @@ def test_contextual_anchors(self, FontClass):
             "    lookupflag UseMarrkFilteringSet [twodotsverticalbelow];\n"
             "    # reh-ar *\n"
             "    pos reh-ar [behDotless-ar.init behDotless-ar.init.alt]"
-            " [dotbelow-ar twodotsverticalbelow-ar twodotshorizontalbelow-ar]'"
-            " lookup ContextualMark_1; # *bottom.vtwodots\n"
+            " @MC_bottom'"
+            " lookup ContextualMark_1;\n"
             "} ContextualMarkDispatch_1;\n"
         )
 
@@ -1769,8 +1769,8 @@ def test_contextual_anchors(self, FontClass):
             "lookup ContextualMarkDispatch_2 {\n"
             "    # reh-ar *\n"
             "    pos reh-ar [behDotless-ar.init]"
-            " [dotbelow-ar twodotsverticalbelow-ar twodotshorizontalbelow-ar]'"
-            " lookup ContextualMark_2; # *bottom\n"
+            " @MC_bottom'"
+            " lookup ContextualMark_2;\n"
             "} ContextualMarkDispatch_2;\n"
         )
 
@@ -1835,7 +1835,119 @@ def test_contextual_anchors_no_mark_feature(self, testufo):
 
             lookup ContextualMarkDispatch_0 {
                 # f *
-                pos f [a] [acutecomb tildecomb]' lookup ContextualMark_0; # *top
+                pos f [a] @MC_top' lookup ContextualMark_0;
+            } ContextualMarkDispatch_0;
+
+            feature mark {
+                lookup mark2base;
+                lookup mark2liga;
+                lookup ContextualMarkDispatch_0;
+            } mark;
+
+            feature mkmk {
+                lookup mark2mark_top {
+                    @MFS_mark2mark_top = [acutecomb tildecomb];
+                    lookupflag UseMarkFilteringSet @MFS_mark2mark_top;
+                    pos mark tildecomb
+                        <anchor 100 300> mark @MC_top;
+                } mark2mark_top;
+
+            } mkmk;
+            """
+        )
+
+    def test_contextual_liga_anchors(self, testufo):
+        a = testufo["a"]
+
+        a.appendAnchor({"name": "*top", "x": 200, "y": 200, "identifier": "*top"})
+        a.lib[OBJECT_LIBS_KEY] = {
+            "*top": {
+                "GPOS_Context": "f *",
+            },
+        }
+
+        fi = testufo["f_i"]
+        fi.appendAnchor(
+            {"name": "*top_1.tilde", "x": 300, "y": 500, "identifier": "*top_1.tilde"}
+        )
+        fi.appendAnchor(
+            {"name": "*top_1.acute", "x": 200, "y": 300, "identifier": "*top_1.acute"}
+        )
+        fi.lib[OBJECT_LIBS_KEY] = {
+            "*top_1.tilde": {
+                "GPOS_Context": "* tildecomb",
+            },
+            "*top_1.acute": {
+                "GPOS_Context": "* acutecomb",
+            },
+        }
+
+        fl = testufo.newGlyph("f_l")
+        fl.appendAnchor({"name": "top_1", "x": 200, "y": 400})
+        fl.appendAnchor({"name": "top_2", "x": 500, "y": 400})
+        fl.appendAnchor({"name": "*top_2", "x": 100, "y": 400, "identifier": "*top_2"})
+        fl.lib[OBJECT_LIBS_KEY] = {
+            "*top_2": {
+                "GPOS_Context": "* tildecomb",
+            },
+        }
+
+        writer = MarkFeatureWriter()
+        feaFile = ast.FeatureFile()
+        assert str(feaFile) == ""
+        assert writer.write(testufo, feaFile)
+
+        assert str(feaFile) == dedent(
+            """\
+            markClass acutecomb <anchor 100 200> @MC_top;
+            markClass tildecomb <anchor 100 200> @MC_top;
+
+            lookup mark2base {
+                pos base a
+                    <anchor 100 200> mark @MC_top;
+            } mark2base;
+
+            lookup mark2liga {
+                pos ligature f_i
+                        <anchor 100 500> mark @MC_top
+                    ligComponent
+                        <anchor 600 500> mark @MC_top;
+                pos ligature f_l
+                        <anchor 200 400> mark @MC_top
+                    ligComponent
+                        <anchor 500 400> mark @MC_top;
+            } mark2liga;
+
+            lookup ContextualMark_0 {
+                pos base a
+                    <anchor 200 200> mark @MC_top;
+            } ContextualMark_0;
+
+            lookup ContextualMark_1 {
+                pos ligature f_i
+                        <anchor 300 500> mark @MC_top
+                    ligComponent
+                        <anchor NULL>;
+                pos ligature f_l
+                        <anchor NULL>
+                    ligComponent
+                        <anchor 100 400> mark @MC_top;
+            } ContextualMark_1;
+
+            lookup ContextualMark_2 {
+                pos ligature f_i
+                        <anchor 200 300> mark @MC_top
+                    ligComponent
+                        <anchor NULL>;
+            } ContextualMark_2;
+
+            lookup ContextualMarkDispatch_0 {
+                # f *
+                pos f [a] @MC_top' lookup ContextualMark_0;
+                # * tildecomb
+                pos [f_i f_l] @MC_top' lookup ContextualMark_1 tildecomb;
+                # * acutecomb
+                pos [f_i] @MC_top' lookup ContextualMark_2 acutecomb;
             } ContextualMarkDispatch_0;
 
             feature mark {

From 9436cd4360d7a007a534793e3f1579fe1b344ec2 Mon Sep 17 00:00:00 2001
From: Khaled Hosny <khaled@aliftype.com>
Date: Thu, 5 Sep 2024 21:23:45 +0300
Subject: [PATCH 2/2] markFeatureWriter: Catch and skip contextual anchors with
 no context

Instead of raising KeyError trying to access non existing context.
---
 .../featureWriters/markFeatureWriter.py       |  9 +++-
 .../featureWriters/markFeatureWriter_test.py  | 49 +++++++++++++++++++
 2 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/Lib/ufo2ft/featureWriters/markFeatureWriter.py b/Lib/ufo2ft/featureWriters/markFeatureWriter.py
index 6816beb4..a71a88b4 100644
--- a/Lib/ufo2ft/featureWriters/markFeatureWriter.py
+++ b/Lib/ufo2ft/featureWriters/markFeatureWriter.py
@@ -744,7 +744,14 @@ def _makeContextualAttachments(self, glyphClass, liga=False):
                 # If we are building the mark2base lookup, skip anchors with a number
                 if not liga and anchor.number is not None:
                     continue
-                anchor_context = anchor.libData["GPOS_Context"].strip()
+                anchor_context = anchor.libData.get("GPOS_Context", "").strip()
+                if not anchor_context:
+                    self.log.warning(
+                        "contextual anchor '%s' in glyph '%s' has no context data; skipped",
+                        anchor.name,
+                        glyphName,
+                    )
+                    continue
                 result[anchor_context].append((glyphName, anchor))
         return result
 
diff --git a/tests/featureWriters/markFeatureWriter_test.py b/tests/featureWriters/markFeatureWriter_test.py
index 0b21dda6..e00daa45 100644
--- a/tests/featureWriters/markFeatureWriter_test.py
+++ b/tests/featureWriters/markFeatureWriter_test.py
@@ -1968,6 +1968,55 @@ def test_contextual_liga_anchors(self, testufo):
             """
         )
 
+    def test_contextual_anchor_no_context(self, testufo, caplog):
+        a = testufo["a"]
+        a.appendAnchor({"name": "*top", "x": 200, "y": 200, "identifier": "*top"})
+        a.lib[OBJECT_LIBS_KEY] = {"*top": {"foo": "bar"}}
+
+        writer = MarkFeatureWriter()
+        feaFile = ast.FeatureFile()
+        assert str(feaFile) == ""
+
+        logger = "ufo2ft.featureWriters.markFeatureWriter.MarkFeatureWriter"
+        with caplog.at_level(logging.WARNING, logger=logger):
+            assert writer.write(testufo, feaFile)
+        assert len(caplog.records) == 1
+        assert (
+            "contextual anchor '*top' in glyph 'a' has no context data; skipped"
+            in caplog.text
+        )
+        assert str(feaFile) == dedent(
+            """\
+            markClass acutecomb <anchor 100 200> @MC_top;
+            markClass tildecomb <anchor 100 200> @MC_top;
+
+            feature mark {
+                lookup mark2base {
+                    pos base a
+                        <anchor 100 200> mark @MC_top;
+                } mark2base;
+
+                lookup mark2liga {
+                    pos ligature f_i
+                            <anchor 100 500> mark @MC_top
+                        ligComponent
+                            <anchor 600 500> mark @MC_top;
+                } mark2liga;
+
+            } mark;
+
+            feature mkmk {
+                lookup mark2mark_top {
+                    @MFS_mark2mark_top = [acutecomb tildecomb];
+                    lookupflag UseMarkFilteringSet @MFS_mark2mark_top;
+                    pos mark tildecomb
+                        <anchor 100 300> mark @MC_top;
+                } mark2mark_top;
+
+            } mkmk;
+            """
+        )
+
     def test_ignorable_anchors(self, FontClass):
         dirname = os.path.dirname(os.path.dirname(__file__))
         fontPath = os.path.join(dirname, "data", "IgnoreAnchorsTest-Thin.ufo")