From aa290fca06b5d5294cb3a60b384c5a51510ecb7f Mon Sep 17 00:00:00 2001 From: Colin Rofls Date: Fri, 9 Feb 2024 15:44:16 -0500 Subject: [PATCH] [read-fonts] Only inspect reachable lookups for closure Previously we just looked at every lookup in the lookup list when computing the closure. This sounds okay at first glance, but it overlooks the fact that the lookup list can include lookups that are only reachable via other contextual lookups. This introduces a much more rigorous approach, where on each run we recalculate the set of reachable lookups, by checking to see if the current set of glyphs matches any (chain) contextual rules, and then adding the appropriate lookups from those rules. --- font-test-data/src/lib.rs | 7 + .../test_data/fea/context_closure.fea | 28 ++ .../test_data/fea/context_closure_glyphs.txt | 24 ++ .../fea/recursive_context_closure.fea | 12 + .../fea/recursive_context_closure_glyphs.txt | 6 + .../test_data/ttf/context_closure.ttf | Bin 0 -> 464 bytes .../ttf/recursive_context_closure.ttf | Bin 0 -> 224 bytes read-fonts/src/tables/gsub/closure.rs | 307 +++++++++++++++++- 8 files changed, 379 insertions(+), 5 deletions(-) create mode 100644 font-test-data/test_data/fea/context_closure.fea create mode 100644 font-test-data/test_data/fea/context_closure_glyphs.txt create mode 100644 font-test-data/test_data/fea/recursive_context_closure.fea create mode 100644 font-test-data/test_data/fea/recursive_context_closure_glyphs.txt create mode 100644 font-test-data/test_data/ttf/context_closure.ttf create mode 100644 font-test-data/test_data/ttf/recursive_context_closure.ttf diff --git a/font-test-data/src/lib.rs b/font-test-data/src/lib.rs index beb9124a0..60464c44a 100644 --- a/font-test-data/src/lib.rs +++ b/font-test-data/src/lib.rs @@ -53,6 +53,13 @@ pub mod closure { pub static RECURSIVE: &[u8] = include_bytes!("../test_data/ttf/recursive_closure.ttf"); pub static RECURSIVE_GLYPHS: &str = include_str!("../test_data/fea/recursive_closure_glyphs.txt"); + pub static CONTEXTUAL: &[u8] = include_bytes!("../test_data/ttf/context_closure.ttf"); + pub static CONTEXTUAL_GLYPHS: &str = + include_str!("../test_data/fea/context_closure_glyphs.txt"); + pub static RECURSIVE_CONTEXTUAL: &[u8] = + include_bytes!("../test_data/ttf/recursive_context_closure.ttf"); + pub static RECURSIVE_CONTEXTUAL_GLYPHS: &str = + include_str!("../test_data/fea/recursive_context_closure_glyphs.txt"); } pub mod post { diff --git a/font-test-data/test_data/fea/context_closure.fea b/font-test-data/test_data/fea/context_closure.fea new file mode 100644 index 000000000..bee42d62b --- /dev/null +++ b/font-test-data/test_data/fea/context_closure.fea @@ -0,0 +1,28 @@ +feature SUB6 { + lookup GSUB6f1 { + sub one two three' four' five six seven by X; + sub two one three' four' six five seven by Y; + } GSUB6f1; + + # format 2 is generally less efficient than format 3 and hard to generate + # (as in, I'm not aware of any input that will compile to format 2) + lookup GSUB6f3 { + sub [space comma semicolon] e' by e.2; + } GSUB6f3; +} SUB6; + +# can't declare in a feature or it gets added to the feature +lookup MY_RULES { + sub f by f.2; +} MY_RULES; + +feature SUB5 { + lookup GSUB5f1 { + sub a' b' by a_b; + sub c' d' by c_d; + } GSUB5f1; + + lookup GSUB5f3 { + sub f' lookup MY_RULES g'; + } GSUB5f3; +} SUB5; diff --git a/font-test-data/test_data/fea/context_closure_glyphs.txt b/font-test-data/test_data/fea/context_closure_glyphs.txt new file mode 100644 index 000000000..4d3e2a03e --- /dev/null +++ b/font-test-data/test_data/fea/context_closure_glyphs.txt @@ -0,0 +1,24 @@ +.notdef +space +comma +semicolon +one +two +three +four +five +six +seven +X +Y +a +b +c +d +e +f +g +e.2 +f.2 +a_b +c_d diff --git a/font-test-data/test_data/fea/recursive_context_closure.fea b/font-test-data/test_data/fea/recursive_context_closure.fea new file mode 100644 index 000000000..c55101968 --- /dev/null +++ b/font-test-data/test_data/fea/recursive_context_closure.fea @@ -0,0 +1,12 @@ +feature test { + + lookup first { + sub a b' c by B; + sub a B.2' by B.3; + } first; + + lookup second { + sub a B' c by B.2; + } second; + +} test; diff --git a/font-test-data/test_data/fea/recursive_context_closure_glyphs.txt b/font-test-data/test_data/fea/recursive_context_closure_glyphs.txt new file mode 100644 index 000000000..0e0722e2a --- /dev/null +++ b/font-test-data/test_data/fea/recursive_context_closure_glyphs.txt @@ -0,0 +1,6 @@ +a +b +c +B +B.2 +B.3 diff --git a/font-test-data/test_data/ttf/context_closure.ttf b/font-test-data/test_data/ttf/context_closure.ttf new file mode 100644 index 0000000000000000000000000000000000000000..165207af81f9ea33bc1fc147501d947d09855a40 GIT binary patch literal 464 zcmZvYJ5Iwu5Qe{5$Btvb2?jY0Lez)~3Ca`*K|{r(;}jI!f`)>FP;v-PfJDtH!arkY z3q;`^&;Od)#QZ?4zQ0I!{(4$1tY~3l&O-2=+u`6bSy$UOH*T~W7joi7zT-EPk qv{FoS=kGhb-rwDNsaxqx?*DdQieCgj&r^$W!oE_=;QS`W9{d7kY!lxA literal 0 HcmV?d00001 diff --git a/font-test-data/test_data/ttf/recursive_context_closure.ttf b/font-test-data/test_data/ttf/recursive_context_closure.ttf new file mode 100644 index 0000000000000000000000000000000000000000..d68d7646df12daa371379d287a7d0a6f3a75e473 GIT binary patch literal 224 zcmYj~u?@mN3`L*sjwnjS0Ehu7Sb#1<(4+%8S|%U^uofi@l{S1|WC>Ze?C0-q3^3NJ zoG-WjJ$(*pH_ER#bZpsU$J217rHC-kbz|e>If_B5S)}c`^P Gsub<'a> { } fn closure_glyphs_once(&self, glyphs: &mut HashSet) -> Result<(), ReadError> { + let lookups_to_use = self.find_reachable_lookups(glyphs)?; let lookup_list = self.lookup_list()?; - for lookup in lookup_list.lookups().iter() { + for (i, lookup) in lookup_list.lookups().iter().enumerate() { + if !lookups_to_use.contains(&(i as u16)) { + continue; + } let subtables = lookup?.subtables()?; subtables.add_reachable_glyphs(glyphs)?; } Ok(()) } + + fn find_reachable_lookups(&self, glyphs: &HashSet) -> Result, ReadError> { + let feature_list = self.feature_list()?; + let lookup_list = self.lookup_list()?; + // first we want to get the lookups that are directly referenced by a feature + let mut lookup_ids = HashSet::with_capacity(lookup_list.lookup_count() as _); + for rec in feature_list.feature_records() { + let feat = rec.feature(feature_list.offset_data())?; + lookup_ids.extend(feat.lookup_list_indices().iter().map(|idx| idx.get())); + } + + // and now we need to add lookups referenced by contextual lookups, + // IFF they are reachable via the current set of glyphs: + for lookup in lookup_list.lookups().iter() { + let subtables = lookup?.subtables()?; + match subtables { + SubstitutionSubtables::Contextual(tables) => tables + .iter() + .try_for_each(|t| t?.add_reachable_lookups(glyphs, &mut lookup_ids)), + SubstitutionSubtables::ChainContextual(tables) => tables + .iter() + .try_for_each(|t| t?.add_reachable_lookups(glyphs, &mut lookup_ids)), + _ => Ok(()), + }?; + } + Ok(lookup_ids) + } } impl<'a> GlyphClosure for SubstitutionSubtables<'a> { @@ -202,6 +237,226 @@ impl GlyphClosure for ReverseChainSingleSubstFormat1<'_> { } } +impl SequenceContext<'_> { + fn add_reachable_lookups( + &self, + glyphs: &HashSet, + lookups: &mut HashSet, + ) -> Result<(), ReadError> { + match self { + SequenceContext::Format1(table) => table.add_reachable_lookups(glyphs, lookups), + SequenceContext::Format2(table) => table.add_reachable_lookups(glyphs, lookups), + SequenceContext::Format3(table) => table.add_reachable_lookups(glyphs, lookups), + } + } +} + +impl SequenceContextFormat1<'_> { + fn add_reachable_lookups( + &self, + glyphs: &HashSet, + lookups: &mut HashSet, + ) -> Result<(), ReadError> { + let coverage = self.coverage()?; + for seq in coverage + .iter() + .zip(self.seq_rule_sets().iter()) + .filter_map(|(gid, seq)| seq.filter(|_| glyphs.contains(&gid))) + { + for rule in seq?.seq_rules().iter() { + let rule = rule?; + if rule + .input_sequence() + .iter() + .all(|gid| glyphs.contains(&gid.get())) + { + lookups.extend( + rule.seq_lookup_records() + .iter() + .map(|rec| rec.lookup_list_index()), + ); + } + } + } + Ok(()) + } +} + +impl SequenceContextFormat2<'_> { + fn add_reachable_lookups( + &self, + glyphs: &HashSet, + lookups: &mut HashSet, + ) -> Result<(), ReadError> { + let classdef = self.class_def()?; + let our_classes = make_class_set(glyphs, &classdef); + for seq in self + .class_seq_rule_sets() + .iter() + .enumerate() + .filter_map(|(i, seq)| seq.filter(|_| our_classes.contains(&(i as u16)))) + { + for rule in seq?.class_seq_rules().iter() { + let rule = rule?; + if rule + .input_sequence() + .iter() + .all(|class_id| our_classes.contains(&class_id.get())) + { + lookups.extend( + rule.seq_lookup_records() + .iter() + .map(|rec| rec.lookup_list_index()), + ) + } + } + } + Ok(()) + } +} + +impl SequenceContextFormat3<'_> { + fn add_reachable_lookups( + &self, + glyphs: &HashSet, + lookups: &mut HashSet, + ) -> Result<(), ReadError> { + for coverage in self.coverages().iter() { + if !coverage?.iter().any(|gid| glyphs.contains(&gid)) { + return Ok(()); + } + } + lookups.extend( + self.seq_lookup_records() + .iter() + .map(|rec| rec.lookup_list_index()), + ); + Ok(()) + } +} + +impl ChainedSequenceContext<'_> { + fn add_reachable_lookups( + &self, + glyphs: &HashSet, + lookups: &mut HashSet, + ) -> Result<(), ReadError> { + match self { + ChainedSequenceContext::Format1(table) => table.add_reachable_lookups(glyphs, lookups), + ChainedSequenceContext::Format2(table) => table.add_reachable_lookups(glyphs, lookups), + ChainedSequenceContext::Format3(table) => table.add_reachable_lookups(glyphs, lookups), + } + } +} + +impl ChainedSequenceContextFormat1<'_> { + fn add_reachable_lookups( + &self, + glyphs: &HashSet, + lookups: &mut HashSet, + ) -> Result<(), ReadError> { + let coverage = self.coverage()?; + for seq in coverage + .iter() + .zip(self.chained_seq_rule_sets().iter()) + .filter_map(|(gid, seq)| seq.filter(|_| glyphs.contains(&gid))) + { + for rule in seq?.chained_seq_rules().iter() { + let rule = rule?; + if rule + .input_sequence() + .iter() + .chain(rule.backtrack_sequence()) + .chain(rule.lookahead_sequence()) + .all(|gid| glyphs.contains(&gid.get())) + { + lookups.extend( + rule.seq_lookup_records() + .iter() + .map(|rec| rec.lookup_list_index()), + ); + } + } + } + Ok(()) + } +} + +impl ChainedSequenceContextFormat2<'_> { + fn add_reachable_lookups( + &self, + glyphs: &HashSet, + lookups: &mut HashSet, + ) -> Result<(), ReadError> { + let input = self.input_class_def()?; + let backtrack = self.backtrack_class_def()?; + let lookahead = self.lookahead_class_def()?; + + let input_classes = make_class_set(glyphs, &input); + let backtrack_classes = make_class_set(glyphs, &backtrack); + let lookahead_classes = make_class_set(glyphs, &lookahead); + for seq in self + .chained_class_seq_rule_sets() + .iter() + .enumerate() + .filter_map(|(i, seq)| seq.filter(|_| input_classes.contains(&(i as u16)))) + { + for rule in seq?.chained_class_seq_rules().iter() { + let rule = rule?; + if rule + .input_sequence() + .iter() + .all(|cls| input_classes.contains(&cls.get())) + && rule + .backtrack_sequence() + .iter() + .all(|cls| backtrack_classes.contains(&cls.get())) + && rule + .lookahead_sequence() + .iter() + .all(|cls| lookahead_classes.contains(&cls.get())) + { + lookups.extend( + rule.seq_lookup_records() + .iter() + .map(|rec| rec.lookup_list_index()), + ) + } + } + } + Ok(()) + } +} + +impl ChainedSequenceContextFormat3<'_> { + fn add_reachable_lookups( + &self, + glyphs: &HashSet, + lookups: &mut HashSet, + ) -> Result<(), ReadError> { + for coverage in self + .backtrack_coverages() + .iter() + .chain(self.input_coverages().iter()) + .chain(self.lookahead_coverages().iter()) + { + if !coverage?.iter().any(|gid| glyphs.contains(&gid)) { + return Ok(()); + } + } + lookups.extend( + self.seq_lookup_records() + .iter() + .map(|rec| rec.lookup_list_index()), + ); + Ok(()) + } +} + +fn make_class_set(glyphs: &HashSet, classdef: &ClassDef) -> HashSet { + glyphs.iter().map(|gid| classdef.get(*gid)).collect() +} + #[cfg(test)] mod tests { use std::collections::HashMap; @@ -300,4 +555,46 @@ mod tests { let result = compute_closure(&gsub, &glyph_map, &["a"]); assert_closure_result!(glyph_map, result, &["a", "b", "c", "d"]); } + + #[test] + fn contextual_lookups() { + let gsub = get_gsub(test_data::CONTEXTUAL); + let glyph_map = GlyphMap::new(test_data::CONTEXTUAL_GLYPHS); + + // these match the lookups but not the context + let nop = compute_closure(&gsub, &glyph_map, &["three", "four", "e", "f"]); + assert_closure_result!(glyph_map, nop, &["three", "four", "e", "f"]); + + let gsub6f1 = compute_closure( + &gsub, + &glyph_map, + &["one", "two", "three", "four", "five", "six", "seven"], + ); + assert_closure_result!( + glyph_map, + gsub6f1, + &["one", "two", "three", "four", "five", "six", "seven", "X", "Y"] + ); + + let gsub6f3 = compute_closure(&gsub, &glyph_map, &["space", "e"]); + assert_closure_result!(glyph_map, gsub6f3, &["space", "e", "e.2"]); + + let gsub5f3 = compute_closure(&gsub, &glyph_map, &["f", "g"]); + assert_closure_result!(glyph_map, gsub5f3, &["f", "g", "f.2"]); + } + + #[test] + fn recursive_context() { + let gsub = get_gsub(test_data::RECURSIVE_CONTEXTUAL); + let glyph_map = GlyphMap::new(test_data::RECURSIVE_CONTEXTUAL_GLYPHS); + + let nop = compute_closure(&gsub, &glyph_map, &["b", "B"]); + assert_closure_result!(glyph_map, nop, &["b", "B"]); + + let full = compute_closure(&gsub, &glyph_map, &["a", "b", "c"]); + assert_closure_result!(glyph_map, full, &["a", "b", "c", "B", "B.2", "B.3"]); + + let intermediate = compute_closure(&gsub, &glyph_map, &["a", "B.2"]); + assert_closure_result!(glyph_map, intermediate, &["a", "B.2", "B.3"]); + } }