From 4690e770886c47708dc56c24725e50a8b6330bca Mon Sep 17 00:00:00 2001 From: Colin Rofls Date: Wed, 7 Feb 2024 16:50:38 -0500 Subject: [PATCH] [read-fonts] Basic closure of glyphs over GSUB This differs from the fonttools implementation slightly, but I'm not sure if those differences are functional or just a result of how fonttools is organized. --- font-test-data/src/lib.rs | 8 + .../test_data/fea/recursive_closure.fea | 15 + .../fea/recursive_closure_glyphs.txt | 7 + .../test_data/ttf/recursive_closure.ttf | Bin 0 -> 144 bytes read-fonts/src/tables/gsub.rs | 2 + read-fonts/src/tables/gsub/closure.rs | 280 ++++++++++++++++++ 6 files changed, 312 insertions(+) create mode 100644 font-test-data/test_data/fea/recursive_closure.fea create mode 100644 font-test-data/test_data/fea/recursive_closure_glyphs.txt create mode 100644 font-test-data/test_data/ttf/recursive_closure.ttf create mode 100644 read-fonts/src/tables/gsub/closure.rs diff --git a/font-test-data/src/lib.rs b/font-test-data/src/lib.rs index 3c04df7f3..beb9124a0 100644 --- a/font-test-data/src/lib.rs +++ b/font-test-data/src/lib.rs @@ -47,6 +47,14 @@ pub static COLRV0V1: &[u8] = include_bytes!("../test_data/ttf/test_glyphs-glyf_c pub static COLRV0V1_VARIABLE: &[u8] = include_bytes!("../test_data/ttf/test_glyphs-glyf_colr_1_variable.ttf"); +pub mod closure { + pub static SIMPLE: &[u8] = include_bytes!("../test_data/ttf/simple_closure.ttf"); + pub static SIMPLE_GLYPHS: &str = include_str!("../test_data/fea/simple_closure_glyphs.txt"); + pub static RECURSIVE: &[u8] = include_bytes!("../test_data/ttf/recursive_closure.ttf"); + pub static RECURSIVE_GLYPHS: &str = + include_str!("../test_data/fea/recursive_closure_glyphs.txt"); +} + pub mod post { #[rustfmt::skip] diff --git a/font-test-data/test_data/fea/recursive_closure.fea b/font-test-data/test_data/fea/recursive_closure.fea new file mode 100644 index 000000000..a22488541 --- /dev/null +++ b/font-test-data/test_data/fea/recursive_closure.fea @@ -0,0 +1,15 @@ +feature test { + # this will be ordered first in lookup list but has a rule that will be hit last + lookup three { + sub c by d; + } three; + + lookup one { + sub a by b; + } one; + + lookup two { + sub b by c; + } two; +} test; + diff --git a/font-test-data/test_data/fea/recursive_closure_glyphs.txt b/font-test-data/test_data/fea/recursive_closure_glyphs.txt new file mode 100644 index 000000000..aa46b48c0 --- /dev/null +++ b/font-test-data/test_data/fea/recursive_closure_glyphs.txt @@ -0,0 +1,7 @@ +a +b +c +d +# these two aren't used +e +f diff --git a/font-test-data/test_data/ttf/recursive_closure.ttf b/font-test-data/test_data/ttf/recursive_closure.ttf new file mode 100644 index 0000000000000000000000000000000000000000..bcc4eeaad2b4f45843afa45f113c66592afd355c GIT binary patch literal 144 zcmZQzWME)mWDo$74DP|9PCL~Pgaer}KwJV8XJFuBkYg}lV03Zw31Q%1U;&CS{QnP@ tVJt~4E&+-$Ffao#BLfqV<^bvhss{5pfHWHrgCG+SgVZ34f%xcRi~#Ef3BLdU literal 0 HcmV?d00001 diff --git a/read-fonts/src/tables/gsub.rs b/read-fonts/src/tables/gsub.rs index d2ba61e62..6c3be9f4d 100644 --- a/read-fonts/src/tables/gsub.rs +++ b/read-fonts/src/tables/gsub.rs @@ -8,6 +8,8 @@ pub use super::layout::{ }; use super::layout::{ExtensionLookup, LookupFlag, Subtables}; +#[cfg(feature = "std")] +mod closure; #[cfg(test)] #[path = "../tests/test_gsub.rs"] mod tests; diff --git a/read-fonts/src/tables/gsub/closure.rs b/read-fonts/src/tables/gsub/closure.rs new file mode 100644 index 000000000..0c4e39302 --- /dev/null +++ b/read-fonts/src/tables/gsub/closure.rs @@ -0,0 +1,280 @@ +//! Computing the closure over a set of glyphs +//! +//! This means taking a set of glyphs and updating it to include any other glyphs +//! reachable from those glyphs via substitution, recursively. + +use std::collections::HashSet; + +use font_types::GlyphId; + +use crate::{ + tables::layout::{ExtensionLookup, Subtables}, + FontRead, ReadError, +}; + +use super::{ + AlternateSubstFormat1, Gsub, LigatureSubstFormat1, MultipleSubstFormat1, SingleSubst, + SingleSubstFormat1, SingleSubstFormat2, SubstitutionSubtables, +}; + +/// A trait for tables which participate in closure +pub(crate) trait GlyphClosure { + /// Update the set of glyphs with any glyphs reachable via substitution. + fn update_reachable_glyphs(&self, glyphs: &mut HashSet) -> Result<(), ReadError>; +} + +#[cfg(feature = "std")] +impl<'a> Gsub<'a> { + /// Return the set of glyphs reachable from the input set via any substituion. + pub fn closure_glyphs( + &self, + mut glyphs: HashSet, + ) -> Result, ReadError> { + // we need to do this iteratively, since any glyph found in one pass + // over the lookups could also be the target of substitutions. + + // we always call this once, and then keep calling if it produces + // additional glyphs + let mut prev_glyph_count = glyphs.len(); + self.closure_glyphs_once(&mut glyphs)?; + let mut new_glyph_count = glyphs.len(); + + while prev_glyph_count != new_glyph_count { + prev_glyph_count = new_glyph_count; + self.closure_glyphs_once(&mut glyphs)?; + new_glyph_count = glyphs.len(); + } + + Ok(glyphs) + } + + fn closure_glyphs_once(&self, glyphs: &mut HashSet) -> Result<(), ReadError> { + let lookup_list = self.lookup_list()?; + for lookup in lookup_list.lookups().iter() { + let subtables = lookup?.subtables()?; + subtables.update_reachable_glyphs(glyphs)?; + } + Ok(()) + } +} + +impl<'a> GlyphClosure for SubstitutionSubtables<'a> { + fn update_reachable_glyphs(&self, glyphs: &mut HashSet) -> Result<(), ReadError> { + match self { + SubstitutionSubtables::Single(tables) => tables.update_reachable_glyphs(glyphs), + SubstitutionSubtables::Multiple(tables) => tables.update_reachable_glyphs(glyphs), + SubstitutionSubtables::Alternate(tables) => tables.update_reachable_glyphs(glyphs), + SubstitutionSubtables::Ligature(tables) => tables.update_reachable_glyphs(glyphs), + _ => Ok(()), + } + } +} + +impl<'a, T: FontRead<'a> + GlyphClosure, Ext: ExtensionLookup<'a, T>> GlyphClosure + for Subtables<'a, T, Ext> +{ + fn update_reachable_glyphs(&self, glyphs: &mut HashSet) -> Result<(), ReadError> { + self.iter() + .try_for_each(|t| t?.update_reachable_glyphs(glyphs)) + } +} + +impl<'a> GlyphClosure for SingleSubst<'a> { + fn update_reachable_glyphs(&self, glyphs: &mut HashSet) -> Result<(), ReadError> { + for (target, replacement) in self.iter_subs()? { + if glyphs.contains(&target) { + glyphs.insert(replacement); + } + } + Ok(()) + } +} + +impl<'a> SingleSubst<'a> { + fn iter_subs(&self) -> Result + '_, ReadError> { + let (left, right) = match self { + SingleSubst::Format1(t) => (Some(t.iter_subs()?), None), + SingleSubst::Format2(t) => (None, Some(t.iter_subs()?)), + }; + Ok(left + .into_iter() + .flatten() + .chain(right.into_iter().flatten())) + } +} + +impl<'a> SingleSubstFormat1<'a> { + fn iter_subs(&self) -> Result + '_, ReadError> { + let delta = self.delta_glyph_id(); + let coverage = self.coverage()?; + Ok(coverage.iter().filter_map(move |gid| { + let raw = (gid.to_u16() as i32).checked_add(delta as i32); + let raw = raw.and_then(|raw| u16::try_from(raw).ok())?; + Some((gid, GlyphId::new(raw))) + })) + } +} + +impl<'a> SingleSubstFormat2<'a> { + fn iter_subs(&self) -> Result + '_, ReadError> { + let coverage = self.coverage()?; + let subs = self.substitute_glyph_ids(); + Ok(coverage.iter().zip(subs.iter().map(|id| id.get()))) + } +} + +impl<'a> GlyphClosure for MultipleSubstFormat1<'a> { + fn update_reachable_glyphs(&self, glyphs: &mut HashSet) -> Result<(), ReadError> { + let coverage = self.coverage()?; + let sequences = self.sequences(); + for (gid, replacements) in coverage.iter().zip(sequences.iter()) { + let replacements = replacements?; + if glyphs.contains(&gid) { + glyphs.extend( + replacements + .substitute_glyph_ids() + .iter() + .map(|gid| gid.get()), + ); + } + } + Ok(()) + } +} + +impl<'a> GlyphClosure for AlternateSubstFormat1<'a> { + fn update_reachable_glyphs(&self, glyphs: &mut HashSet) -> Result<(), ReadError> { + let coverage = self.coverage()?; + let alts = self.alternate_sets(); + for (gid, alts) in coverage.iter().zip(alts.iter()) { + let alts = alts?; + if glyphs.contains(&gid) { + glyphs.extend(alts.alternate_glyph_ids().iter().map(|gid| gid.get())); + } + } + Ok(()) + } +} + +impl<'a> GlyphClosure for LigatureSubstFormat1<'a> { + fn update_reachable_glyphs(&self, glyphs: &mut HashSet) -> Result<(), ReadError> { + let coverage = self.coverage()?; + let ligs = self.ligature_sets(); + for (gid, lig_set) in coverage.iter().zip(ligs.iter()) { + let lig_set = lig_set?; + if glyphs.contains(&gid) { + for lig in lig_set.ligatures().iter() { + let lig = lig?; + if lig + .component_glyph_ids() + .iter() + .all(|gid| glyphs.contains(&gid.get())) + { + glyphs.insert(lig.ligature_glyph()); + } + } + } + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use crate::{FontRef, TableProvider}; + + use super::*; + use font_test_data::closure as test_data; + + struct GlyphMap { + to_gid: HashMap<&'static str, GlyphId>, + from_gid: HashMap, + } + + impl GlyphMap { + fn new(raw_order: &'static str) -> GlyphMap { + let to_gid: HashMap<_, _> = raw_order + .split('\n') + .map(|line| line.trim()) + .filter(|line| !(line.starts_with('#') || line.is_empty())) + .enumerate() + .map(|(gid, name)| (name, GlyphId::new(gid.try_into().unwrap()))) + .collect(); + let from_gid = to_gid.iter().map(|(name, gid)| (*gid, *name)).collect(); + GlyphMap { from_gid, to_gid } + } + + fn get_gid(&self, name: &str) -> Option { + self.to_gid.get(name).copied() + } + + fn get_name(&self, gid: GlyphId) -> Option<&str> { + self.from_gid.get(&gid).copied() + } + } + + fn get_gsub(test_data: &'static [u8]) -> Gsub<'_> { + let font = FontRef::new(test_data).unwrap(); + font.gsub().unwrap() + } + + fn compute_closure(gsub: &Gsub, glyph_map: &GlyphMap, input: &[&str]) -> HashSet { + let input_glyphs = input + .iter() + .map(|name| glyph_map.get_gid(name).unwrap()) + .collect(); + gsub.closure_glyphs(input_glyphs).unwrap() + } + + /// assert a set of glyph ids matches a slice of names + macro_rules! assert_closure_result { + ($glyph_map:expr, $result:expr, $expected:expr) => { + let result = $result + .iter() + .map(|gid| $glyph_map.get_name(*gid).unwrap()) + .collect::>(); + let expected = $expected.iter().copied().collect::>(); + if expected != result { + let in_output = result.difference(&expected).collect::>(); + let in_expected = expected.difference(&result).collect::>(); + let mut msg = format!("Closure output does not match\n"); + if !in_expected.is_empty() { + msg.push_str(format!("missing {in_expected:?}\n").as_str()); + } + if !in_output.is_empty() { + msg.push_str(format!("unexpected {in_output:?}").as_str()); + } + panic!("{msg}") + } + }; + } + + #[test] + fn smoke_test() { + // tests various lookup types. + // test input is font-test-data/test_data/fea/simple_closure.fea + let gsub = get_gsub(test_data::SIMPLE); + let glyph_map = GlyphMap::new(test_data::SIMPLE_GLYPHS); + let result = compute_closure(&gsub, &glyph_map, &["a"]); + + assert_closure_result!( + glyph_map, + result, + &["a", "A", "b", "c", "d", "a_a", "a.1", "a.2"] + ); + } + + #[test] + fn recursive() { + // a scenario in which one substitution adds glyphs that trigger additional + // substitutions. + // + // test input is font-test-data/test_data/fea/recursive_closure.fea + let gsub = get_gsub(test_data::RECURSIVE); + let glyph_map = GlyphMap::new(test_data::RECURSIVE_GLYPHS); + let result = compute_closure(&gsub, &glyph_map, &["a"]); + assert_closure_result!(glyph_map, result, &["a", "b", "c", "d"]); + } +}