diff --git a/glyphs-reader/Cargo.toml b/glyphs-reader/Cargo.toml index e91ed7014..410cd46a0 100644 --- a/glyphs-reader/Cargo.toml +++ b/glyphs-reader/Cargo.toml @@ -35,10 +35,3 @@ bincode.workspace = true [dev-dependencies] pretty_assertions.workspace = true rstest.workspace = true - -[build-dependencies] -quick-xml = "0.36" -smol_str.workspace = true -serde.workspace = true -thiserror.workspace = true -bincode.workspace = true diff --git a/glyphs-reader/build.rs b/glyphs-reader/build.rs deleted file mode 100644 index 216b13957..000000000 --- a/glyphs-reader/build.rs +++ /dev/null @@ -1,34 +0,0 @@ -use std::env; -use std::path::Path; - -include!("src/glyphdata/glyphdata_impl.rs"); - -fn parse_xml_files() -> Result, GlyphDataError> { - let mut one = parse_xml_file("data/GlyphData.xml")?; - let two = parse_xml_file("data/GlyphData_Ideographs.xml")?; - one.extend(two); - Ok(one) -} - -fn parse_xml_file(path: &str) -> Result, GlyphDataError> { - let Ok(bytes) = std::fs::read(path) else { - panic!("failed to read path '{path}'"); - }; - parse_entries(&bytes) -} - -// tell cargo when to rerun this script -fn register_dependencies() { - println!("cargo::rerun-if-changed=data"); - println!("cargo::rerun-if-changed=src/glyphdata/glyphdata_impl.rs"); -} - -fn main() { - let out_dir = env::var_os("OUT_DIR").unwrap(); - let dest_path = Path::new(&out_dir).join("glyphdata.bin"); - let parsed = parse_xml_files().expect("failed to parse GlyphData xml files"); - let bytes = bincode::serialize(&parsed).expect("bincode failed"); - std::fs::write(dest_path, bytes).unwrap(); - - register_dependencies() -} diff --git a/glyphs-reader/data/update.py b/glyphs-reader/data/update.py index d09c66501..9fb433a02 100644 --- a/glyphs-reader/data/update.py +++ b/glyphs-reader/data/update.py @@ -1,43 +1,188 @@ -"""Update bundled xml files +"""Update bundled data derived from glyphsLib GlyphData.xml and GlyphData_Ideographs.xml. -We try to match the behaviour of the python toolchain, so we want to ship the -same data files as are currently bundled in glyphsLib. This script copies those -files out of the currently active version of glyphsLib. +This script copies files out of the currently active version of glyphsLib and generates +Rust code for efficient access to the default data. Override files must be loaded separately +from XML. We only generate code for the fields we actively use. Usage: - python data/update.py + python glyphs-reader/data/update.py """ +import dataclasses +from dataclasses import dataclass import glyphsLib from importlib import resources -import os -import shutil +from io import StringIO +from lxml import etree +from pathlib import Path +from textwrap import dedent +from typing import Optional, Tuple -def script_dir(): - return os.path.dirname(os.path.abspath(__file__)) -def get_data_file(filepath): - return resources.files(glyphsLib).joinpath("data").joinpath(filepath) +@dataclass(frozen=True) +class GlyphInfo: + codepoint: Optional[int] + name: str + category: str + subcategory: Optional[str] -def copy_data_files(): - target_dir = script_dir() - for target in ["GlyphData.xml", "GlyphData_Ideographs.xml"]: - file = get_data_file(target) - target = os.path.join(target_dir, target) - with file.open("rb") as source, open(target, "wb") as dest: - shutil.copyfileobj(source, dest) +def codename(name: Optional[str]) -> Optional[str]: + if name is None: + return None + return name.replace(" ", "") -def write_version_file(): - version = glyphsLib.__version__ - with open(os.path.join(script_dir(), 'VERSION'), 'w') as f: - f.write(f"XML files copied from glyphsLib version {version}.\n" - "(this file generated by update.py)\n") -def main(_): - copy_data_files() - write_version_file() +def read_glyph_info(file: str) -> Tuple[GlyphInfo]: + file = resources.files(glyphsLib).joinpath("data").joinpath(file) + with open(file) as f: + tree = etree.parse(f) + + by_name = {} + + # Do a full pass to collect names + for e in tree.xpath("//glyph"): + info = GlyphInfo( + e.attrib.get("unicode", None), + e.attrib["name"], + codename(e.attrib["category"]), + codename(e.attrib.get("subCategory", None)), + ) + if info.name not in by_name: + by_name[info.name] = info + else: + print(f"We've already seen {info.name}!") + + # Then add alt_names where they don't overlap names + for e in tree.xpath("//glyph[@altNames]"): + for alt_name in e.attrib["altNames"].split(","): + if alt_name in by_name: + print(f'Ignoring alt name "{alt_name}", already taken') + continue + by_name[alt_name] = dataclasses.replace( + by_name[e.attrib["name"]], name=alt_name, codepoint=None + ) + + return tuple(by_name.values()) + + +def main(): + glyph_infos = sorted( + set(read_glyph_info("GlyphData.xml")) + | set(read_glyph_info("GlyphData_Ideographs.xml")), + key=lambda g: g.name, + ) + names = {g.name for g in glyph_infos} + categories = {g.category for g in glyph_infos} + subcategories = {g.subcategory for g in glyph_infos if g.subcategory is not None} + assert len(names) == len(glyph_infos), "Names aren't unique?" + codepoints = {} + for i, gi in enumerate(glyph_infos): + if gi.codepoint is None: + continue + codepoint = int(gi.codepoint, 16) + if codepoint not in codepoints: + codepoints[codepoint] = i + else: + print( + f"Multiple names are assigned 0x{codepoint:04x}, using the first one we saw" + ) + + dest_file = Path(__file__).parent.parent / "src" / "glyphslib_data.rs" + + with open(dest_file, "w") as f: + f.write( + f"//! Glyph data generated from glyphsLib {glyphsLib.__version__} by {Path(__file__).name}\n" + ) + f.write("//!\n") + f.write(f"//! {len(glyph_infos)} glyph metadata records taken from glyphsLib\n") + + f.write( + dedent( + """ + use std::str::FromStr; + use smol_str::SmolStr; + use crate::glyphdata::GlyphInfo; + + /// The primary category for a given glyph + /// + /// Generated to ensure it matches the glyphsLib dataset. + /// + /// These categories are not the same as the unicode character categories. + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] + #[repr(u8)] + pub enum Category { + """ + ) + ) + for category in sorted(categories): + f.write(f" {category},\n") + f.write("}\n") + + f.write("impl FromStr for Category {\n") + f.write(" type Err = SmolStr;\n\n") + f.write(" fn from_str(s: &str) -> Result {\n") + f.write(" match s {\n") + for category in sorted(categories): + f.write(f' "{category}" => Ok(Self::{category}),\n') + f.write(f" _ => Err(s.into()),\n") + f.write(" }\n") + f.write(" }\n") + f.write("}\n") + f.write("\n") + + f.write( + dedent( + """ + /// The secondary category for a given glyph + /// + /// Generated to ensure it matches the glyphsLib dataset. + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] + #[repr(u8)] + pub enum Subcategory { + """ + ) + ) + for subcategory in sorted(subcategories): + f.write(f" {subcategory},\n") + f.write("}\n\n") + + f.write("impl FromStr for Subcategory {\n") + f.write(" type Err = SmolStr;\n\n") + f.write(" fn from_str(s: &str) -> Result {\n") + f.write(" match s {\n") + for subcategory in sorted(subcategories): + f.write(f' "{subcategory}" => Ok(Subcategory::{subcategory}),\n') + f.write(f" _ => Err(s.into()),\n") + f.write(" }\n") + f.write(" }\n") + f.write("}\n") + f.write("\n") + + f.write("// Sorted by name, has unique names, therefore safe to bsearch\n") + f.write("pub(crate) const GLYPH_INFO: &[GlyphInfo] = &[\n") + for gi in glyph_infos: + codepoint = "None" + if gi.codepoint is not None: + codepoint = f"Some(0x{gi.codepoint})" + subcategory = "None" + if gi.subcategory is not None: + subcategory = f"Some(Subcategory::{gi.subcategory})" + f.write( + f' GlyphInfo::new("{gi.name}", Category::{gi.category}, {subcategory}, {codepoint}),\n' + ) + + f.write("];\n") + + f.write( + "// Sorted by codepoint, has unique codepoints, therefore safe to bsearch\n" + ) + f.write("pub(crate) const CODEPOINT_TO_INFO_IDX: &[(u32, usize)] = &[\n") + for codepoint, i in sorted(codepoints.items()): + f.write(f" (0x{codepoint:04x}, {i}), // {glyph_infos[i].name}\n") + + f.write("];\n") if __name__ == "__main__": - main(None) + main() diff --git a/glyphs-reader/src/font.rs b/glyphs-reader/src/font.rs index 569f84760..d624b4f26 100644 --- a/glyphs-reader/src/font.rs +++ b/glyphs-reader/src/font.rs @@ -11,7 +11,8 @@ use std::hash::Hash; use std::str::FromStr; use std::{fs, path}; -use crate::glyphdata::{Category, GlyphData, Subcategory}; +use crate::glyphdata::GlyphData; +use crate::{Category, Subcategory}; use ascii_plist_derive::FromPlist; use fontdrasil::types::WidthClass; use kurbo::{Affine, Point, Vec2}; @@ -205,7 +206,7 @@ pub struct Glyph { /// The right kerning group pub right_kern: Option, pub category: Option, - pub sub_category: Subcategory, + pub sub_category: Option, } impl Glyph { @@ -214,7 +215,7 @@ impl Glyph { (self.category, self.sub_category), ( Some(Category::Mark), - Subcategory::Nonspacing | Subcategory::SpacingCombining + Some(Subcategory::Nonspacing) | Some(Subcategory::SpacingCombining) ) ) } @@ -1898,7 +1899,7 @@ impl TryFrom for Layer { impl RawGlyph { // we pass in the radix because it depends on the version, stored in the font struct - fn build(self, codepoint_radix: u32) -> Result { + fn build(self, codepoint_radix: u32, glyph_data: &GlyphData) -> Result { let mut instances = Vec::new(); for layer in self.layers { if layer.is_draft() { @@ -1933,12 +1934,12 @@ impl RawGlyph { .unwrap_or_default(); if category.is_none() || sub_category.is_none() { - if let Some((computed_category, computed_subcategory)) = - get_glyph_category(&self.glyphname, &codepoints) + if let Some((computed_category, computed_subcategory, _)) = + glyph_data.query(&self.glyphname, Some(&codepoints)) { // if they were manually set don't change them, otherwise do category = category.or(Some(computed_category)); - sub_category = sub_category.or(Some(computed_subcategory)); + sub_category = sub_category.or(computed_subcategory); } } @@ -1950,20 +1951,11 @@ impl RawGlyph { right_kern: self.kern_right, unicode: codepoints, category, - sub_category: sub_category.unwrap_or_default(), + sub_category, }) } } -// This will eventually need to be replaced with something that can handle -// custom GlyphData.xml files, as well as handle overrides that are part of the -// glyph source. -fn get_glyph_category(name: &str, codepoints: &BTreeSet) -> Option<(Category, Subcategory)> { - GlyphData::bundled() - .get_glyph(name, Some(codepoints)) - .map(|info| (info.category, info.subcategory)) -} - // https://github.com/googlefonts/glyphsLib/blob/24b4d340e4c82948ba121dcfe563c1450a8e69c9/Lib/glyphsLib/builder/constants.py#L186 #[rustfmt::skip] static GLYPHS_TO_OPENTYPE_LANGUAGE_ID: &[(&str, i32)] = &[ @@ -2239,6 +2231,9 @@ impl TryFrom for Font { from.v2_to_v3_names()?; } + // TODO: this should be provided in a manner that allows for overrides + let glyph_data = GlyphData::glyphs_lib_data(); + let radix = if from.is_v2() { 16 } else { 10 }; let glyph_order = parse_glyph_order(&from); @@ -2277,7 +2272,10 @@ impl TryFrom for Font { let mut glyphs = BTreeMap::new(); for raw_glyph in from.glyphs.into_iter() { - glyphs.insert(raw_glyph.glyphname.clone(), raw_glyph.build(radix)?); + glyphs.insert( + raw_glyph.glyphname.clone(), + raw_glyph.build(radix, &glyph_data)?, + ); } let mut features = Vec::new(); @@ -2615,9 +2613,9 @@ mod tests { default_master_idx, RawAxisUserToDesignMap, RawFeature, RawFont, RawFontMaster, RawUserToDesignMapping, }, - glyphdata::{Category, Subcategory}, + glyphdata::GlyphData, plist::FromPlist, - Font, FontMaster, Node, Shape, + Category, Font, FontMaster, Node, Shape, }; use std::{ collections::{BTreeMap, BTreeSet, HashSet}, @@ -3568,9 +3566,11 @@ mod tests { ..Default::default() }; - let cooked = raw.build(16).unwrap(); - assert_eq!(cooked.category, Some(Category::Letter)); - assert_eq!(cooked.sub_category, Subcategory::None); + let cooked = raw.build(16, &GlyphData::glyphs_lib_data()).unwrap(); + assert_eq!( + (cooked.category, cooked.sub_category), + (Some(Category::Letter), None) + ); } #[test] diff --git a/glyphs-reader/src/glyphdata.rs b/glyphs-reader/src/glyphdata.rs index 6ecfcb8ca..30d9ed092 100644 --- a/glyphs-reader/src/glyphdata.rs +++ b/glyphs-reader/src/glyphdata.rs @@ -3,145 +3,379 @@ //! This module provides access to glyph info extracted from bundled //! (and potentially user-provided) data files. -// NOTE: we define the types and parsing code in a separate file, so that -// we can borrow it in our build.rs script without causing a cycle -mod glyphdata_impl; +use quick_xml::{ + events::{BytesStart, Event}, + Reader, +}; use std::{ - borrow::Cow, - collections::{BTreeSet, HashMap, HashSet}, - path::Path, - sync::OnceLock, + collections::{BTreeSet, HashMap}, + num::ParseIntError, + path::{Path, PathBuf}, + str::FromStr, }; -pub use glyphdata_impl::*; use icu_properties::GeneralCategory; use smol_str::SmolStr; -static BUNDLED_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/glyphdata.bin")); +use crate::{glyphslib_data, Category, Subcategory}; /// A queryable set of glyph data /// -/// This is generally expensive to create, and is intended to be cached, or -/// used behind a OnceCell. It is never modified after initial creation. +/// Always includes static data from glyphsLib. Optionally includes a set of override values as well. +/// +/// Access via [`GlyphData::glyphs_lib_data`] is cheap. Instances created with overrides +/// are more expensive. pub struct GlyphData { - // The info for all the glyphs we know of. - data: Vec, - // the values in all maps are indices into the `data` vec. we use u32 to save space. - name_map: HashMap, - unicode_map: HashMap, - alt_name_map: HashMap, + // Sorted by name, unique names, therefore safe to bsearch + data: &'static [GlyphInfo], + // Sorted by codepoint, unique codepoints, therefore safe to bsearch + codepoint_to_data_index: &'static [(u32, usize)], + + // override-names are preferred to names in data + overrides: Option>, + overrrides_by_codepoint: Option>, } impl GlyphData { - /// Return the default glyph data set, derived from GlyphData.xml files - pub fn bundled() -> &'static GlyphData { - static GLYPH_DATA: OnceLock = OnceLock::new(); - GLYPH_DATA.get_or_init(|| GlyphData::new(None).unwrap()) + /// Return the default glyph data set, derived from Python glyphsLib resources + pub fn glyphs_lib_data() -> Self { + Self { + data: glyphslib_data::GLYPH_INFO, + codepoint_to_data_index: glyphslib_data::CODEPOINT_TO_INFO_IDX, + overrides: None, + overrrides_by_codepoint: None, + } } - /// Create a new data set, optionally loading user provided overrides - pub fn new(user_overrides: Option<&Path>) -> Result { - let user_overrides = user_overrides - .map(|path| { - let bytes = std::fs::read(path).map_err(|err| GlyphDataError::UserFile { - path: path.to_owned(), - reason: err.kind(), - }); - bytes.and_then(|xml| parse_entries(&xml)) - }) - .transpose()?; - let bundled = load_bundled_data(); - let all_entries = match user_overrides { - Some(user_overrides) => merge_data(bundled, user_overrides), - None => bundled, - }; + /// Create a new data set with user provided overrides + pub fn with_override_file(override_file: &Path) -> Result { + let bytes = std::fs::read(override_file).map_err(|err| GlyphDataError::UserFile { + path: override_file.to_owned(), + reason: err.kind(), + })?; + let overrides = parse_entries(&bytes)?; + GlyphData::with_overrides(overrides) + } - Ok(Self::new_impl(all_entries)) + /// Create a new data set with user provided overrides + pub(crate) fn with_overrides( + overrides: HashMap, + ) -> Result { + let override_by_codepoint = overrides + .iter() + .filter_map(|(k, v)| v.codepoint.map(|cp| (cp, k.clone()))) + .collect(); + Ok(Self { + data: glyphslib_data::GLYPH_INFO, + codepoint_to_data_index: glyphslib_data::CODEPOINT_TO_INFO_IDX, + overrides: Some(overrides), + overrrides_by_codepoint: Some(override_by_codepoint), + }) } +} - fn new_impl(entries: Vec) -> Self { - let mut name_map = HashMap::with_capacity(entries.len()); - let mut unicode_map = HashMap::with_capacity(entries.len()); - let mut alt_name_map = HashMap::new(); +/// The subset of GlyphData.xml or GlyphData_Ideographs.xml we care about +#[derive(Clone, Copy, Debug)] +pub(crate) struct GlyphInfo { + name: &'static str, + category: Category, + subcategory: Option, + codepoint: Option, +} - for (i, entry) in entries.iter().enumerate() { - name_map.insert(entry.name.clone(), i as u32); - if let Some(cp) = entry.unicode { - unicode_map.insert(cp, i as _); - } - for alt in &entry.alt_names { - alt_name_map.insert(alt.clone(), i as _); +impl GlyphInfo { + pub(crate) const fn new( + name: &'static str, + category: Category, + subcategory: Option, + codepoint: Option, + ) -> Self { + Self { + name, + category, + subcategory, + codepoint, + } + } +} + +/// The category and subcategory to use when specified by an override +pub(crate) struct GlyphOverride { + category: Category, + subcategory: Option, + codepoint: Option, +} + +#[derive(Clone, Debug, thiserror::Error)] +pub enum GlyphDataError { + #[error("Couldn't read user file at '{path}': '{reason}'")] + UserFile { + path: PathBuf, + reason: std::io::ErrorKind, + }, + #[error("Error parsing XML: '{0}'")] + ReaderError(#[from] quick_xml::Error), + #[error("Error parsing XML attribute: '{0}'")] + XmlAttributeError(#[from] quick_xml::events::attributes::AttrError), + #[error("Unknown category '{0}'")] + InvalidCategory(SmolStr), + #[error("Unknown subcategory '{0}'")] + InvalidSubcategory(SmolStr), + #[error("the XML input did not start with a tag")] + WrongFirstElement, + #[error("Missing required attribute '{missing}' in '{attributes}'")] + MissingRequiredAttribute { + attributes: String, + missing: &'static str, + }, + #[error("Invalid unicode value '{raw}': '{inner}'")] + InvalidUnicode { raw: String, inner: ParseIntError }, + #[error("Unexpected attribute '{0}'")] + UnknownAttribute(String), +} + +impl GlyphDataError { + // a little helper here makes our parsing code cleaner + fn missing_attr(name: &'static str, raw_attrs: &[u8]) -> Self { + let attributes = String::from_utf8_lossy(raw_attrs).into_owned(); + Self::MissingRequiredAttribute { + attributes, + missing: name, + } + } +} + +/// Parse glyph info entries out of a GlyphData xml file. +pub(crate) fn parse_entries(xml: &[u8]) -> Result, GlyphDataError> { + fn check_and_advance_past_preamble(reader: &mut Reader<&[u8]>) -> Result<(), GlyphDataError> { + loop { + let event = reader.read_event()?; + match event { + Event::Comment(_) => (), + Event::Decl(_) => (), + Event::DocType(_) => (), + Event::Start(start) if start.name().as_ref() == b"glyphData" => return Ok(()), + _other => { + return Err(GlyphDataError::WrongFirstElement); + } } } + } - Self { - data: entries, - name_map, - unicode_map, - alt_name_map, + let mut reader = Reader::from_reader(xml); + reader.config_mut().trim_text(true); + + check_and_advance_past_preamble(&mut reader)?; + + let mut by_name = HashMap::new(); + let mut alt_names = Vec::new(); + for result in + iter_rows(&mut reader).map(|row| row.map_err(Into::into).and_then(parse_glyph_xml)) + { + let info = result?; + by_name.insert( + info.name.clone(), + GlyphOverride { + category: info.category, + subcategory: info.subcategory, + codepoint: info.codepoint, + }, + ); + for alt in info.alt_names { + alt_names.push(( + alt, + GlyphOverride { + category: info.category, + subcategory: info.subcategory, + codepoint: None, + }, + )); + } + } + + // apply alts after to ensure they can't steal "real" names + for (name, value) in alt_names { + by_name.entry(name).or_insert(value); + } + + Ok(by_name) +} + +fn iter_rows<'a, 'b: 'a>( + reader: &'b mut Reader<&'a [u8]>, +) -> impl Iterator, quick_xml::Error>> + 'a { + std::iter::from_fn(|| match reader.read_event() { + Err(e) => Some(Err(e)), + Ok(Event::Empty(start)) => Some(Ok(start)), + _ => None, + }) +} + +struct GlyphInfoFromXml { + name: SmolStr, + alt_names: Vec, + category: Category, + subcategory: Option, + codepoint: Option, +} + +fn parse_glyph_xml(item: BytesStart) -> Result { + let mut name = None; + let mut category = None; + let mut subcategory = None; + let mut unicode = None; + let mut alt_names = None; + + for attr in item.attributes() { + let attr = attr?; + let value = attr.unescape_value()?; + match attr.key.as_ref() { + b"name" => name = Some(value), + b"category" => category = Some(value), + b"subCategory" => subcategory = Some(value), + b"unicode" => unicode = Some(value), + b"altNames" => alt_names = Some(value), + b"production" | b"unicodeLegacy" | b"case" | b"direction" | b"script" + | b"description" => (), + other => { + return Err(GlyphDataError::UnknownAttribute( + String::from_utf8_lossy(other).into_owned(), + )) + } } } + // now we've found some values, let's finalize them + let name = name + .map(SmolStr::new) + .ok_or_else(|| GlyphDataError::missing_attr("name", item.attributes_raw()))?; + let category = category + .ok_or_else(|| GlyphDataError::missing_attr("category", item.attributes_raw())) + .and_then(|cat| { + Category::from_str(cat.as_ref()).map_err(GlyphDataError::InvalidCategory) + })?; + let subcategory = subcategory + .map(|cat| Subcategory::from_str(cat.as_ref()).map_err(GlyphDataError::InvalidSubcategory)) + .transpose()?; + let codepoint = unicode + .map(|s| { + u32::from_str_radix(&s, 16).map_err(|inner| GlyphDataError::InvalidUnicode { + raw: s.into_owned(), + inner, + }) + }) + .transpose()?; + let alt_names = alt_names + .map(|names| { + names + .as_ref() + .split(',') + .map(|name| SmolStr::from(name.trim())) + .collect() + }) + .unwrap_or_default(); + + Ok(GlyphInfoFromXml { + name, + alt_names, + category, + subcategory, + codepoint, + }) +} + +impl GlyphData { /// Get the info for the given name/codepoints, attempting to synthesize it if necessary /// - /// If this name or these unicode values were included in the bundled data, - /// that will be returned; otherwise we will attempt to compute the value - /// by performing various heuristics based on the name. + /// Returns, from most to least preferred: + /// + /// 1. The matching override value + /// 1. The matching value from bundled data + /// 1. A computed value based on name heuristics /// // See https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/Lib/glyphsLib/glyphdata.py#L94 - pub fn get_glyph( + pub fn query( &self, name: &str, codepoints: Option<&BTreeSet>, - ) -> Option> { - if let Some(info) = self.get_by_name(name).or_else(|| { - codepoints - .into_iter() - .flat_map(|cps| cps.iter()) - .find_map(|cp| self.get_by_codepoint(*cp)) - }) { - return Some(Cow::Borrowed(info)); - } - - // we don't have info for this glyph: can we synthesize it? - // TODO: python does production name here. - // see https://github.com/googlefonts/fontc/issues/780 - - let (category, subcategory) = self.construct_category(name)?; - Some(Cow::Owned(GlyphInfo { - name: name.into(), - category, - subcategory, - unicode: None, - production: None, - alt_names: Default::default(), - })) + ) -> Option<(Category, Option, Option)> { + self.query_no_synthesis(name, codepoints) + // we don't have info for this glyph: can we synthesize it? + .or_else(|| self.construct_category(name)) } - /// Look up info for a glyph by name - /// - /// This checks primary names first, and alternates afterwards. + /// As [`Self::query`] but without a fallback to computed values. /// - /// Note: this is only checking the loaded data, it does not handle - /// computing info if it is missing. - fn get_by_name(&self, name: impl AsRef) -> Option<&GlyphInfo> { - let name = name.as_ref(); - self.name_map - .get(name) - .or_else(|| self.alt_name_map.get(name)) - .and_then(|idx| self.data.get(*idx as usize)) + /// Exists to enable result synthesis to query. + fn query_no_synthesis( + &self, + name: &str, + codepoints: Option<&BTreeSet>, + ) -> Option<(Category, Option, Option)> { + // Override? + if let (Some(overrides), Some(overrides_by_codepoint)) = ( + self.overrides.as_ref(), + self.overrrides_by_codepoint.as_ref(), + ) { + let name: SmolStr = name.into(); + let override_result = overrides.get(&name).or_else(|| { + codepoints + .into_iter() + .flat_map(|cps| cps.iter()) + .find_map(|cp: &u32| { + overrides_by_codepoint + .get(cp) + .and_then(|n| overrides.get(n)) + }) + }); + if let Some(override_result) = override_result { + return Some(( + override_result.category, + override_result.subcategory, + override_result.codepoint, + )); + } + } + + // No override, perhaps we have a direct answer? + let info = self + .data + .binary_search_by(|gi| gi.name.cmp(name)) + .ok() + .map(|i| &self.data[i]) + .or_else(|| { + codepoints + .into_iter() + .flat_map(|cps| cps.iter()) + .find_map(|cp| { + self.codepoint_to_data_index + .binary_search_by(|(info_cp, _)| info_cp.cmp(cp)) + .ok() + .map(|i| &self.data[self.codepoint_to_data_index[i].1]) + }) + }); + info.map(|info| (info.category, info.subcategory, info.codepoint)) } - /// Look up info for a glyph by codepoint - fn get_by_codepoint(&self, codepoint: u32) -> Option<&GlyphInfo> { - self.unicode_map - .get(&codepoint) - .and_then(|idx| self.data.get(*idx as usize)) + fn contains_name(&self, name: &str) -> bool { + if let Some(overrides) = self.overrides.as_ref() { + let name: SmolStr = name.into(); + if overrides.contains_key(&name) { + return true; + } + } + self.data.binary_search_by(|gi| gi.name.cmp(name)).is_ok() } // https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/Lib/glyphsLib/glyphdata.py#L199 - fn construct_category(&self, name: &str) -> Option<(Category, Subcategory)> { + fn construct_category( + &self, + name: &str, + ) -> Option<(Category, Option, Option)> { + // TODO: python does production name here. + // see https://github.com/googlefonts/fontc/issues/780 + // in glyphs.app '_' prefix means "no export" if name.starts_with('_') { return None; @@ -150,30 +384,30 @@ impl GlyphData { .split_glyph_suffix(name) .map(|(base, _)| base) .unwrap_or(name); - if let Some(info) = self.get_by_name(base_name) { - return Some((info.category, info.subcategory)); + if let Some(info) = self.query_no_synthesis(base_name, None) { + return Some(info); } if let Some(base_names) = self.split_ligature_glyph_name(base_name) { let base_names_attributes: Vec<_> = base_names .iter() - .map(|name| self.get_by_name(name)) + .filter_map(|name| self.query_no_synthesis(name, None)) .collect(); - if let Some(first_attr) = base_names_attributes.first().and_then(Option::as_ref) { + if let Some(first_attr) = base_names_attributes.first() { // if first is mark, we're a mark - if first_attr.category == Category::Mark { - return Some((Category::Mark, first_attr.subcategory)); - } else if first_attr.category == Category::Letter { + if first_attr.0 == Category::Mark { + return Some((Category::Mark, first_attr.1, None)); + } else if first_attr.0 == Category::Letter { // if first is letter and rest are marks/separators, we use info from first if base_names_attributes .iter() .skip(1) - .filter_map(|attr| attr.map(|attr| attr.category)) + .map(|(cat, ..)| cat) .all(|cat| matches!(cat, Category::Mark | Category::Separator)) { - return Some((first_attr.category, first_attr.subcategory)); + return Some((first_attr.0, first_attr.1, None)); } else { - return Some((Category::Letter, Subcategory::Ligature)); + return Some((Category::Letter, Some(Subcategory::Ligature), None)); } } } @@ -185,7 +419,9 @@ impl GlyphData { // this doesn't need a &self param, but we want it locally close to the // code that calls it, so we'll make it a type method :shrug: - fn construct_category_via_agl(base_name: &str) -> Option<(Category, Subcategory)> { + fn construct_category_via_agl( + base_name: &str, + ) -> Option<(Category, Option, Option)> { if let Some(first_char) = fontdrasil::agl::glyph_name_to_unicode(base_name) .chars() .next() @@ -195,15 +431,15 @@ impl GlyphData { // Exception: Something like "one_two" should be a (_, Ligature), // "acutecomb_brevecomb" should however stay (Mark, Nonspacing). if base_name.contains('_') && category != Category::Mark { - return Some((category, Subcategory::Ligature)); + return Some((category, Some(Subcategory::Ligature), None)); } else { - return Some((category, subcategory)); + return Some((category, subcategory, None)); } } None } - fn split_glyph_suffix<'a>(&self, name: &'a str) -> Option<(&'a str, &'a str)> { + fn split_glyph_suffix<'n>(&self, name: &'n str) -> Option<(&'n str, &'n str)> { let multi_suffix = name.bytes().filter(|b| *b == b'.').count() > 1; if multi_suffix { // with multiple suffixes, try adding them one at a time and seeing if @@ -217,7 +453,7 @@ impl GlyphData { .skip(1) { let (base, suffix) = name.split_at(idx); - if self.get_by_name(base).is_some() { + if self.contains_name(base) { return Some((base, suffix)); } } @@ -260,7 +496,7 @@ impl GlyphData { let new_part = smol_str::format_smolstr!("{part}-{script}"); // if non-suffixed exists but suffixed doesn't, keep non-suffixed - if self.get_by_name(part.as_ref()).is_some() && self.get_by_name(&new_part).is_none() { + if self.contains_name(part.as_ref()) && !self.contains_name(&new_part) { continue; } *part = new_part; @@ -270,212 +506,201 @@ impl GlyphData { } // https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/Lib/glyphsLib/glyphdata.py#L261 -fn category_from_icu(c: char) -> (Category, Subcategory) { +fn category_from_icu(c: char) -> (Category, Option) { match icu_properties::maps::general_category().get(c) { - GeneralCategory::Unassigned | GeneralCategory::OtherSymbol => { - (Category::Symbol, Subcategory::None) - } + GeneralCategory::Unassigned | GeneralCategory::OtherSymbol => (Category::Symbol, None), GeneralCategory::UppercaseLetter | GeneralCategory::LowercaseLetter | GeneralCategory::TitlecaseLetter - | GeneralCategory::OtherLetter => (Category::Letter, Subcategory::None), - GeneralCategory::ModifierLetter => (Category::Letter, Subcategory::Modifier), - GeneralCategory::NonspacingMark => (Category::Mark, Subcategory::Nonspacing), - GeneralCategory::SpacingMark => (Category::Mark, Subcategory::SpacingCombining), - GeneralCategory::EnclosingMark => (Category::Mark, Subcategory::Enclosing), + | GeneralCategory::OtherLetter => (Category::Letter, None), + GeneralCategory::ModifierLetter => (Category::Letter, Some(Subcategory::Modifier)), + GeneralCategory::NonspacingMark => (Category::Mark, Some(Subcategory::Nonspacing)), + GeneralCategory::SpacingMark => (Category::Mark, Some(Subcategory::SpacingCombining)), + GeneralCategory::EnclosingMark => (Category::Mark, Some(Subcategory::Enclosing)), GeneralCategory::DecimalNumber | GeneralCategory::OtherNumber => { - (Category::Number, Subcategory::DecimalDigit) + (Category::Number, Some(Subcategory::DecimalDigit)) } - GeneralCategory::LetterNumber => (Category::Number, Subcategory::None), - GeneralCategory::SpaceSeparator => (Category::Separator, Subcategory::Space), + GeneralCategory::LetterNumber => (Category::Number, None), + GeneralCategory::SpaceSeparator => (Category::Separator, Some(Subcategory::Space)), GeneralCategory::LineSeparator | GeneralCategory::ParagraphSeparator - | GeneralCategory::Control => (Category::Separator, Subcategory::None), - GeneralCategory::Format => (Category::Separator, Subcategory::Format), - GeneralCategory::PrivateUse => (Category::Letter, Subcategory::Compatibility), - GeneralCategory::DashPunctuation => (Category::Punctuation, Subcategory::Dash), + | GeneralCategory::Control => (Category::Separator, None), + GeneralCategory::Format => (Category::Separator, Some(Subcategory::Format)), + GeneralCategory::PrivateUse => (Category::Letter, Some(Subcategory::Compatibility)), + GeneralCategory::DashPunctuation => (Category::Punctuation, Some(Subcategory::Dash)), GeneralCategory::OpenPunctuation | GeneralCategory::ClosePunctuation => { - (Category::Punctuation, Subcategory::Parenthesis) + (Category::Punctuation, Some(Subcategory::Parenthesis)) } GeneralCategory::ConnectorPunctuation | GeneralCategory::OtherPunctuation => { - (Category::Punctuation, Subcategory::None) + (Category::Punctuation, None) } GeneralCategory::InitialPunctuation | GeneralCategory::FinalPunctuation => { - (Category::Punctuation, Subcategory::Quote) + (Category::Punctuation, Some(Subcategory::Quote)) } - GeneralCategory::MathSymbol => (Category::Symbol, Subcategory::Math), - GeneralCategory::CurrencySymbol => (Category::Symbol, Subcategory::Currency), - GeneralCategory::ModifierSymbol => (Category::Mark, Subcategory::Spacing), + GeneralCategory::MathSymbol => (Category::Symbol, Some(Subcategory::Math)), + GeneralCategory::CurrencySymbol => (Category::Symbol, Some(Subcategory::Currency)), + GeneralCategory::ModifierSymbol => (Category::Mark, Some(Subcategory::Spacing)), GeneralCategory::Surrogate => unreachable!("char cannot represent surrogate code points"), } } -fn load_bundled_data() -> Vec { - bincode::deserialize(BUNDLED_DATA).unwrap() -} - -fn merge_data(mut base: Vec, overrides: Vec) -> Vec { - let skip_names = overrides - .iter() - .map(|info| &info.name) - .collect::>(); - base.retain(|info| !skip_names.contains(&info.name)); - base.extend(overrides); - base -} - #[cfg(test)] mod tests { - use std::sync::OnceLock; use super::*; #[test] fn test_bundled_data() { - let data = load_bundled_data(); - assert_eq!(data.len(), 73329); + let data = GlyphData::glyphs_lib_data().data; + assert!(data.len() > 70000, "{}", data.len()); } #[test] fn simple_overrides() { - let overrides = vec![GlyphInfo { - name: "A".into(), - category: Category::Mark, - subcategory: Subcategory::SpacingCombining, - unicode: Some(b'A' as u32), - production: None, - alt_names: Default::default(), - }]; - let bundled = load_bundled_data(); - let merged = merge_data(bundled, overrides); - let data = GlyphData::new_impl(merged); - - assert_eq!(data.get_by_name("A").unwrap().category, Category::Mark); + let overrides = HashMap::from([( + "A".into(), + GlyphOverride { + category: Category::Mark, + subcategory: Some(Subcategory::SpacingCombining), + codepoint: Some(b'A' as u32), + }, + )]); + let data = GlyphData::with_overrides(overrides).unwrap(); + + assert_eq!(data.query("A", None).unwrap().0, Category::Mark); } #[test] fn overrides_from_file() { - let data = GlyphData::new(Some(Path::new("./data/GlyphData_override_test.xml"))).unwrap(); - assert_eq!(data.get_by_name("zero").unwrap().category, Category::Other); - assert_eq!(data.get_by_name("C").unwrap().category, Category::Number); - assert_eq!( - data.get_by_name("Yogh").unwrap().production, - Some("Yolo".into()) - ); + let data = + GlyphData::with_override_file(Path::new("./data/GlyphData_override_test.xml")).unwrap(); + assert_eq!(data.query("zero", None).unwrap().0, Category::Other); + assert_eq!(data.query("C", None).unwrap().0, Category::Number); } - fn get_category(name: &str, codepoints: &[u32]) -> Option<(Category, Subcategory)> { - static GLYPH_DATA: OnceLock = OnceLock::new(); - let data = GLYPH_DATA.get_or_init(|| GlyphData::new(None).unwrap()); + fn get_category(name: &str, codepoints: &[u32]) -> Option<(Category, Option)> { let codepoints = codepoints.iter().copied().collect(); - data.get_glyph(name, Some(&codepoints)) - .map(|info| (info.category, info.subcategory)) + GlyphData::glyphs_lib_data() + .query(name, Some(&codepoints)) + .map(|(cat, sub, _)| (cat, sub)) } // from python glyphsLib: https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d5/tests/glyphdata_test.py#L106 #[test] fn py_test_category() { for (name, expected) in [ - (".notdef", Some((Category::Separator, Subcategory::None))), + (".notdef", Some((Category::Separator, None))), // this test case requires AGL lookup: - ("uni000D", Some((Category::Separator, Subcategory::None))), + ("uni000D", Some((Category::Separator, None))), ( "boxHeavyUp", - Some((Category::Symbol, Subcategory::Geometry)), + Some((Category::Symbol, Some(Subcategory::Geometry))), + ), + ("eacute", Some((Category::Letter, None))), + ("Abreveacute", Some((Category::Letter, None))), + ("C-fraktur", Some((Category::Letter, None))), + ("fi", Some((Category::Letter, Some(Subcategory::Ligature)))), + ( + "fi.alt", + Some((Category::Letter, Some(Subcategory::Ligature))), + ), + ( + "hib-ko", + Some((Category::Letter, Some(Subcategory::Syllable))), ), - ("eacute", Some((Category::Letter, Subcategory::None))), - ("Abreveacute", Some((Category::Letter, Subcategory::None))), - ("C-fraktur", Some((Category::Letter, Subcategory::None))), - ("fi", Some((Category::Letter, Subcategory::Ligature))), - ("fi.alt", Some((Category::Letter, Subcategory::Ligature))), - ("hib-ko", Some((Category::Letter, Subcategory::Syllable))), ( "one.foo", - Some((Category::Number, Subcategory::DecimalDigit)), + Some((Category::Number, Some(Subcategory::DecimalDigit))), ), ( "one_two.foo", - Some((Category::Number, Subcategory::Ligature)), + Some((Category::Number, Some(Subcategory::Ligature))), + ), + ( + "o_f_f_i", + Some((Category::Letter, Some(Subcategory::Ligature))), ), - ("o_f_f_i", Some((Category::Letter, Subcategory::Ligature))), ( "o_f_f_i.foo", - Some((Category::Letter, Subcategory::Ligature)), + Some((Category::Letter, Some(Subcategory::Ligature))), ), ( "ain_alefMaksura-ar.fina", - Some((Category::Letter, Subcategory::Ligature)), + Some((Category::Letter, Some(Subcategory::Ligature))), + ), + ( + "brevecomb", + Some((Category::Mark, Some(Subcategory::Nonspacing))), ), - ("brevecomb", Some((Category::Mark, Subcategory::Nonspacing))), ( "brevecomb.case", - Some((Category::Mark, Subcategory::Nonspacing)), + Some((Category::Mark, Some(Subcategory::Nonspacing))), ), ( "brevecomb_acutecomb", - Some((Category::Mark, Subcategory::Nonspacing)), + Some((Category::Mark, Some(Subcategory::Nonspacing))), ), ( "brevecomb_acutecomb.case", - Some((Category::Mark, Subcategory::Nonspacing)), + Some((Category::Mark, Some(Subcategory::Nonspacing))), ), ( "caroncomb_dotaccentcomb", - Some((Category::Mark, Subcategory::Nonspacing)), + Some((Category::Mark, Some(Subcategory::Nonspacing))), ), ( "dieresiscomb_caroncomb", - Some((Category::Mark, Subcategory::Nonspacing)), + Some((Category::Mark, Some(Subcategory::Nonspacing))), ), ( "dieresiscomb_macroncomb", - Some((Category::Mark, Subcategory::Nonspacing)), + Some((Category::Mark, Some(Subcategory::Nonspacing))), ), ( "dotaccentcomb_macroncomb", - Some((Category::Mark, Subcategory::Nonspacing)), + Some((Category::Mark, Some(Subcategory::Nonspacing))), ), ( "macroncomb_dieresiscomb", - Some((Category::Mark, Subcategory::Nonspacing)), + Some((Category::Mark, Some(Subcategory::Nonspacing))), ), ( "dotaccentcomb_o", - Some((Category::Mark, Subcategory::Nonspacing)), + Some((Category::Mark, Some(Subcategory::Nonspacing))), ), ( "macronlowmod_O", - Some((Category::Mark, Subcategory::Modifier)), + Some((Category::Mark, Some(Subcategory::Modifier))), ), - ("O_o", Some((Category::Letter, Subcategory::Ligature))), + ("O_o", Some((Category::Letter, Some(Subcategory::Ligature)))), ( "O_dotaccentcomb_o", - Some((Category::Letter, Subcategory::Ligature)), + Some((Category::Letter, Some(Subcategory::Ligature))), ), + ("O_dotaccentcomb", Some((Category::Letter, None))), ( - "O_dotaccentcomb", - Some((Category::Letter, Subcategory::None)), + "O_period", + Some((Category::Letter, Some(Subcategory::Ligature))), ), - ("O_period", Some((Category::Letter, Subcategory::Ligature))), - ("O_nbspace", Some((Category::Letter, Subcategory::None))), + ("O_nbspace", Some((Category::Letter, None))), ("_a", None), ("_aaa", None), ( "dal_alef-ar", - Some((Category::Letter, Subcategory::Ligature)), + Some((Category::Letter, Some(Subcategory::Ligature))), ), ( "dal_lam-ar.dlig", - Some((Category::Letter, Subcategory::Ligature)), + Some((Category::Letter, Some(Subcategory::Ligature))), ), - ("po-khmer", Some((Category::Letter, Subcategory::None))), + ("po-khmer", Some((Category::Letter, None))), ( "po-khmer.below", - Some((Category::Mark, Subcategory::Nonspacing)), + Some((Category::Mark, Some(Subcategory::Nonspacing))), ), ( "po-khmer.below.ro", - Some((Category::Mark, Subcategory::Nonspacing)), + Some((Category::Mark, Some(Subcategory::Nonspacing))), ), ] { let result = get_category(name, &[]); @@ -486,9 +711,13 @@ mod tests { // https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/tests/glyphdata_test.py#L145C5-L153C76 #[test] fn py_category_by_unicode() { - //# "SignU.bn" is a non-standard name not defined in GlyphData.xml + // "SignU.bn" is a non-standard name not defined in GlyphData.xml + // 0x09C1 should match let result = get_category("SignU.bn", &[0x09C1]); - assert_eq!(result, Some((Category::Mark, Subcategory::Nonspacing))) + assert_eq!( + result, + Some((Category::Mark, Some(Subcategory::Nonspacing))) + ) } // https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/tests/glyphdata_test.py#L155C5-L162C1 @@ -496,8 +725,8 @@ mod tests { #[test] fn py_bug_232() { let u = get_category("uni07F0", &[]); - assert_eq!(u, Some((Category::Mark, Subcategory::Nonspacing))); + assert_eq!(u, Some((Category::Mark, Some(Subcategory::Nonspacing)))); let g = get_category("longlowtonecomb-nko", &[]); - assert_eq!(g, Some((Category::Mark, Subcategory::Nonspacing))); + assert_eq!(g, Some((Category::Mark, Some(Subcategory::Nonspacing)))); } } diff --git a/glyphs-reader/src/glyphdata/glyphdata_impl.rs b/glyphs-reader/src/glyphdata/glyphdata_impl.rs deleted file mode 100644 index a12ca7c8f..000000000 --- a/glyphs-reader/src/glyphdata/glyphdata_impl.rs +++ /dev/null @@ -1,332 +0,0 @@ -// NOTE: to avoid a bunch of duplication, this file is also `include!`ed from -// build.rs. - -use std::{fmt::Display, num::ParseIntError, path::PathBuf, str::FromStr}; - -use quick_xml::{ - events::{BytesStart, Event}, - Reader, -}; -use serde::{Deserialize, Serialize}; -use smol_str::SmolStr; - -/// Information about a glyph -/// -/// In general this is derived from bundled data files, but these fields can -/// also be overridden by the font author -#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] -pub struct GlyphInfo { - pub name: SmolStr, - pub category: Category, - pub subcategory: Subcategory, - pub unicode: Option, - pub production: Option, - pub alt_names: Vec, -} - -/// The primary category for a given glyph -/// -/// These categories are not the same as the unicode character categories. -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)] -#[repr(u8)] -pub enum Category { - Mark, - Space, - Separator, - Letter, - Number, - Symbol, - Punctuation, - Other, -} - -/// The subcategory of a given glyph -#[derive( - Clone, Copy, Default, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, -)] -#[repr(u8)] -pub enum Subcategory { - Spacing, - Radical, - Math, - Superscript, - Geometry, - Dash, - DecimalDigit, - Currency, - Fraction, - Halfform, - Small, - Number, - Quote, - Space, - Letter, - Jamo, - Format, - Parenthesis, - Matra, - Arrow, - Nonspacing, - Compatibility, - Syllable, - Ligature, - Modifier, - SpacingCombining, - Emoji, - Enclosing, - #[default] - None, -} - -/// Parse glyph info entries out of a GlyphData xml file. -pub fn parse_entries(xml: &[u8]) -> Result, GlyphDataError> { - fn check_and_advance_past_preamble(reader: &mut Reader<&[u8]>) -> Result<(), GlyphDataError> { - loop { - let event = reader.read_event()?; - match event { - Event::Comment(_) => (), - Event::Decl(_) => (), - Event::DocType(_) => (), - Event::Start(start) if start.name().as_ref() == b"glyphData" => return Ok(()), - _other => { - return Err(GlyphDataError::WrongFirstElement); - } - } - } - } - - let mut reader = Reader::from_reader(xml); - reader.config_mut().trim_text(true); - - check_and_advance_past_preamble(&mut reader)?; - iter_rows(&mut reader) - .map(|row| row.map_err(Into::into).and_then(parse_glyph_xml)) - .collect::>() -} - -fn iter_rows<'a, 'b: 'a>( - reader: &'b mut Reader<&'a [u8]>, -) -> impl Iterator, quick_xml::Error>> + 'a { - std::iter::from_fn(|| match reader.read_event() { - Err(e) => Some(Err(e)), - Ok(Event::Empty(start)) => Some(Ok(start)), - _ => None, - }) -} - -fn parse_glyph_xml(item: BytesStart) -> Result { - let mut name = None; - let mut category = None; - let mut subcategory = None; - let mut unicode = None; - let mut production = None; - let mut alt_names = None; - - for attr in item.attributes() { - let attr = attr?; - let value = attr.unescape_value()?; - match attr.key.as_ref() { - b"name" => name = Some(value), - b"category" => category = Some(value), - b"subCategory" => subcategory = Some(value), - b"unicode" => unicode = Some(value), - b"production" => production = Some(value), - b"altNames" => alt_names = Some(value), - b"unicodeLegacy" | b"case" | b"direction" | b"script" | b"description" => (), - other => { - return Err(GlyphDataError::UnknownAttribute( - String::from_utf8_lossy(other).into_owned(), - )) - } - } - } - - // now we've found some values, let's finalize them - - let name = name - .map(SmolStr::new) - .ok_or_else(|| GlyphDataError::missing_attr("name", item.attributes_raw()))?; - let category = category - .ok_or_else(|| GlyphDataError::missing_attr("category", item.attributes_raw())) - .and_then(|cat| { - Category::from_str(cat.as_ref()).map_err(GlyphDataError::InvalidCategory) - })?; - let subcategory = subcategory - .map(|cat| Subcategory::from_str(cat.as_ref()).map_err(GlyphDataError::InvalidSubcategory)) - .transpose()? - .unwrap_or(Subcategory::None); - let production = production.map(SmolStr::new); - let unicode = unicode - .map(|s| { - u32::from_str_radix(&s, 16).map_err(|inner| GlyphDataError::InvalidUnicode { - raw: s.into_owned(), - inner, - }) - }) - .transpose()?; - let alt_names = alt_names - .map(|names| { - names - .as_ref() - .split(',') - .map(|name| SmolStr::from(name.trim())) - .collect() - }) - .unwrap_or_default(); - - Ok(GlyphInfo { - name, - category, - subcategory, - unicode, - production, - alt_names, - }) -} - -impl FromStr for Category { - type Err = SmolStr; - - fn from_str(s: &str) -> Result { - match s { - "Mark" => Ok(Self::Mark), - "Space" => Ok(Self::Space), - "Separator" => Ok(Self::Separator), - "Letter" => Ok(Self::Letter), - "Number" => Ok(Self::Number), - "Symbol" => Ok(Self::Symbol), - "Punctuation" => Ok(Self::Punctuation), - "Other" => Ok(Self::Other), - _ => Err(s.into()), - } - } -} - -impl FromStr for Subcategory { - type Err = SmolStr; - - fn from_str(s: &str) -> Result { - match s { - "Spacing" => Ok(Self::Spacing), - "Radical" => Ok(Self::Radical), - "Math" => Ok(Self::Math), - "Superscript" => Ok(Self::Superscript), - "Geometry" => Ok(Self::Geometry), - "Dash" => Ok(Self::Dash), - "Decimal Digit" => Ok(Self::DecimalDigit), - "Currency" => Ok(Self::Currency), - "Fraction" => Ok(Self::Fraction), - "Halfform" => Ok(Self::Halfform), - "Small" => Ok(Self::Small), - "Number" => Ok(Self::Number), - "Quote" => Ok(Self::Quote), - "Space" => Ok(Self::Space), - "Letter" => Ok(Self::Letter), - "Jamo" => Ok(Self::Jamo), - "Format" => Ok(Self::Format), - "Parenthesis" => Ok(Self::Parenthesis), - "Matra" => Ok(Self::Matra), - "Arrow" => Ok(Self::Arrow), - "Nonspacing" => Ok(Self::Nonspacing), - "Compatibility" => Ok(Self::Compatibility), - "Syllable" => Ok(Self::Syllable), - "Ligature" => Ok(Self::Ligature), - "Modifier" => Ok(Self::Modifier), - "Spacing Combining" => Ok(Self::SpacingCombining), - "Emoji" => Ok(Self::Emoji), - "Enclosing" => Ok(Self::Enclosing), - "None" => Ok(Self::None), - _ => Err(s.into()), - } - } -} - -impl Display for Category { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Mark => write!(f, "Mark"), - Self::Space => write!(f, "Space"), - Self::Separator => write!(f, "Separator"), - Self::Letter => write!(f, "Letter"), - Self::Number => write!(f, "Number"), - Self::Symbol => write!(f, "Symbol"), - Self::Punctuation => write!(f, "Punctuation"), - Self::Other => write!(f, "Other"), - } - } -} - -impl Display for Subcategory { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Spacing => write!(f, "Spacing"), - Self::Radical => write!(f, "Radical"), - Self::Math => write!(f, "Math"), - Self::Superscript => write!(f, "Superscript"), - Self::Geometry => write!(f, "Geometry"), - Self::Dash => write!(f, "Dash"), - Self::DecimalDigit => write!(f, "Decimal Digit"), - Self::Currency => write!(f, "Currency"), - Self::Fraction => write!(f, "Fraction"), - Self::Halfform => write!(f, "Halfform"), - Self::Small => write!(f, "Small"), - Self::Number => write!(f, "Number"), - Self::Quote => write!(f, "Quote"), - Self::Space => write!(f, "Space"), - Self::Letter => write!(f, "Letter"), - Self::Jamo => write!(f, "Jamo"), - Self::Format => write!(f, "Format"), - Self::Parenthesis => write!(f, "Parenthesis"), - Self::Matra => write!(f, "Matra"), - Self::Arrow => write!(f, "Arrow"), - Self::Nonspacing => write!(f, "Nonspacing"), - Self::Compatibility => write!(f, "Compatibility"), - Self::Syllable => write!(f, "Syllable"), - Self::Ligature => write!(f, "Ligature"), - Self::Modifier => write!(f, "Modifier"), - Self::SpacingCombining => write!(f, "Spacing Combining"), - Self::Emoji => write!(f, "Emoji"), - Self::Enclosing => write!(f, "Enclosing"), - Self::None => write!(f, "None"), - } - } -} - -#[derive(Clone, Debug, thiserror::Error)] -pub enum GlyphDataError { - #[error("Couldn't read user file at '{path}': '{reason}'")] - UserFile { - path: PathBuf, - reason: std::io::ErrorKind, - }, - #[error("Error parsing XML: '{0}'")] - ReaderError(#[from] quick_xml::Error), - #[error("Error parsing XML attribute: '{0}'")] - XmlAttributeError(#[from] quick_xml::events::attributes::AttrError), - #[error("Unknown category '{0}'")] - InvalidCategory(SmolStr), - #[error("Unknown subcategory '{0}'")] - InvalidSubcategory(SmolStr), - #[error("the XML input did not start with a tag")] - WrongFirstElement, - #[error("Missing required attribute '{missing}' in '{attributes}'")] - MissingRequiredAttribute { - attributes: String, - missing: &'static str, - }, - #[error("Invalid unicode value '{raw}': '{inner}'")] - InvalidUnicode { raw: String, inner: ParseIntError }, - #[error("Unexpected attribute '{0}'")] - UnknownAttribute(String), -} - -impl GlyphDataError { - // a little helper here makes our parsing code cleaner - fn missing_attr(name: &'static str, raw_attrs: &[u8]) -> Self { - let attributes = String::from_utf8_lossy(raw_attrs).into_owned(); - Self::MissingRequiredAttribute { - attributes, - missing: name, - } - } -} diff --git a/glyphs-reader/src/lib.rs b/glyphs-reader/src/lib.rs index 3573848d7..c57f620b1 100644 --- a/glyphs-reader/src/lib.rs +++ b/glyphs-reader/src/lib.rs @@ -3,6 +3,7 @@ pub mod error; mod font; pub mod glyphdata; +mod glyphslib_data; mod plist; mod propagate_anchors; @@ -10,4 +11,5 @@ pub use font::{ Axis, Component, FeatureSnippet, Font, FontMaster, Glyph, InstanceType, Layer, Node, NodeType, Path, Shape, }; +pub use glyphslib_data::{Category, Subcategory}; pub use plist::Plist; diff --git a/glyphs-reader/src/propagate_anchors.rs b/glyphs-reader/src/propagate_anchors.rs index 918d14555..bbed4db98 100644 --- a/glyphs-reader/src/propagate_anchors.rs +++ b/glyphs-reader/src/propagate_anchors.rs @@ -11,11 +11,7 @@ use indexmap::IndexMap; use kurbo::{Affine, Vec2}; use smol_str::{format_smolstr, SmolStr}; -use crate::{ - font::Anchor, - glyphdata::{Category, Subcategory}, - Component, Font, Glyph, Layer, Shape, -}; +use crate::{font::Anchor, Category, Component, Font, Glyph, Layer, Shape, Subcategory}; impl Font { /// Copy anchors from component glyphs into their including composites @@ -110,7 +106,7 @@ fn anchors_traversing_components<'a>( return origin_adjusted_anchors(&layer.anchors).collect(); } - let is_ligature = glyph.sub_category == Subcategory::Ligature; + let is_ligature = glyph.sub_category == Some(Subcategory::Ligature); let mut has_underscore = layer .anchors .iter() @@ -446,6 +442,8 @@ fn depth_sorted_composite_glyphs(glyphs: &BTreeMap) -> Vec Self { - let info = GlyphData::bundled().get_glyph(name, None); let mut this = GlyphBuilder(Glyph { name: name.into(), export: true, - category: info.as_ref().map(|i| i.category), - sub_category: info.as_ref().map(|i| i.subcategory).unwrap_or_default(), - - unicode: info.and_then(|i| i.unicode).into_iter().collect(), ..Default::default() }); + if let Some((category, sub_category, unicode)) = + GlyphData::glyphs_lib_data().query(name, None) + { + this.set_category(category); + if let Some(sub_category) = sub_category { + this.set_subcategory(sub_category); + } + if let Some(unicode) = unicode { + this.set_unicode(unicode); + } + } this.add_layer(); this } @@ -508,14 +512,18 @@ mod tests { self.0.layers.last_mut().unwrap() } - #[allow(dead_code)] + fn set_unicode(&mut self, unicode: u32) -> &mut Self { + self.0.unicode = BTreeSet::from([unicode]); + self + } + fn set_category(&mut self, category: Category) -> &mut Self { self.0.category = Some(category); self } fn set_subcategory(&mut self, sub_category: Subcategory) -> &mut Self { - self.0.sub_category = sub_category; + self.0.sub_category = Some(sub_category); self } diff --git a/glyphs2fontir/src/source.rs b/glyphs2fontir/src/source.rs index 62031118c..4e0d718c2 100644 --- a/glyphs2fontir/src/source.rs +++ b/glyphs2fontir/src/source.rs @@ -25,10 +25,7 @@ use fontir::{ source::{Input, Source}, stateset::StateSet, }; -use glyphs_reader::{ - glyphdata::{Category, Subcategory}, - Font, InstanceType, -}; +use glyphs_reader::{Category, Font, InstanceType, Subcategory}; use ordered_float::OrderedFloat; use smol_str::SmolStr; use write_fonts::{ @@ -548,10 +545,11 @@ fn category_for_glyph(glyph: &glyphs_reader::Glyph) -> Option { // 'attaching anchor'; see https://github.com/googlefonts/glyphsLib/issues/1024 .any(|anchor| !anchor.name.starts_with('_')); match (glyph.category, glyph.sub_category) { - (_, Subcategory::Ligature) if has_attaching_anchor => Some(GlyphClassDef::Ligature), - (Some(Category::Mark), Subcategory::Nonspacing | Subcategory::SpacingCombining) => { - Some(GlyphClassDef::Mark) - } + (_, Some(Subcategory::Ligature)) if has_attaching_anchor => Some(GlyphClassDef::Ligature), + ( + Some(Category::Mark), + Some(Subcategory::Nonspacing) | Some(Subcategory::SpacingCombining), + ) => Some(GlyphClassDef::Mark), _ if has_attaching_anchor => Some(GlyphClassDef::Base), _ => None, } @@ -1132,7 +1130,7 @@ mod tests { source::Source, stateset::StateSet, }; - use glyphs_reader::{glyphdata::Category, Font}; + use glyphs_reader::{Category, Font}; use indexmap::IndexSet; use ir::{test_helpers::Round2, Panose}; use write_fonts::types::{NameId, Tag};