Skip to content

Commit

Permalink
Loading glyphslib derived data is showing up in profiles, try to make…
Browse files Browse the repository at this point in the history
… it not do so
  • Loading branch information
rsheeter committed Oct 24, 2024
1 parent 081b604 commit 143c8d1
Show file tree
Hide file tree
Showing 9 changed files with 672 additions and 663 deletions.
7 changes: 0 additions & 7 deletions glyphs-reader/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,3 @@ bincode.workspace = true
[dev-dependencies]
pretty_assertions.workspace = true
rstest.workspace = true

[build-dependencies]
quick-xml = "0.36"
smol_str.workspace = true
serde.workspace = true
thiserror.workspace = true
bincode.workspace = true
34 changes: 0 additions & 34 deletions glyphs-reader/build.rs

This file was deleted.

199 changes: 172 additions & 27 deletions glyphs-reader/data/update.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,188 @@
"""Update bundled xml files
"""Update bundled data derived from glyphsLib GlyphData.xml and GlyphData_Ideographs.xml.
We try to match the behaviour of the python toolchain, so we want to ship the
same data files as are currently bundled in glyphsLib. This script copies those
files out of the currently active version of glyphsLib.
This script copies files out of the currently active version of glyphsLib and generates
Rust code for efficient access to the default data. Override files must be loaded separately
from XML. We only generate code for the fields we actively use.
Usage:
python data/update.py
python glyphs-reader/data/update.py
"""

import dataclasses
from dataclasses import dataclass
import glyphsLib
from importlib import resources
import os
import shutil
from io import StringIO
from lxml import etree
from pathlib import Path
from textwrap import dedent
from typing import Optional, Tuple

def script_dir():
return os.path.dirname(os.path.abspath(__file__))

def get_data_file(filepath):
return resources.files(glyphsLib).joinpath("data").joinpath(filepath)
@dataclass(frozen=True)
class GlyphInfo:
codepoint: Optional[int]
name: str
category: str
subcategory: Optional[str]


def copy_data_files():
target_dir = script_dir()
for target in ["GlyphData.xml", "GlyphData_Ideographs.xml"]:
file = get_data_file(target)
target = os.path.join(target_dir, target)
with file.open("rb") as source, open(target, "wb") as dest:
shutil.copyfileobj(source, dest)
def codename(name: Optional[str]) -> Optional[str]:
if name is None:
return None
return name.replace(" ", "")

def write_version_file():
version = glyphsLib.__version__
with open(os.path.join(script_dir(), 'VERSION'), 'w') as f:
f.write(f"XML files copied from glyphsLib version {version}.\n"
"(this file generated by update.py)\n")

def main(_):
copy_data_files()
write_version_file()
def read_glyph_info(file: str) -> Tuple[GlyphInfo]:
file = resources.files(glyphsLib).joinpath("data").joinpath(file)
with open(file) as f:
tree = etree.parse(f)

by_name = {}

# Do a full pass to collect names
for e in tree.xpath("//glyph"):
info = GlyphInfo(
e.attrib.get("unicode", None),
e.attrib["name"],
codename(e.attrib["category"]),
codename(e.attrib.get("subCategory", None)),
)
if info.name not in by_name:
by_name[info.name] = info
else:
print(f"We've already seen {info.name}!")

# Then add alt_names where they don't overlap names
for e in tree.xpath("//glyph[@altNames]"):
for alt_name in e.attrib["altNames"].split(","):
if alt_name in by_name:
print(f'Ignoring alt name "{alt_name}", already taken')
continue
by_name[alt_name] = dataclasses.replace(
by_name[e.attrib["name"]], name=alt_name, codepoint=None
)

return tuple(by_name.values())


def main():
glyph_infos = sorted(
set(read_glyph_info("GlyphData.xml"))
| set(read_glyph_info("GlyphData_Ideographs.xml")),
key=lambda g: g.name,
)
names = {g.name for g in glyph_infos}
categories = {g.category for g in glyph_infos}
subcategories = {g.subcategory for g in glyph_infos if g.subcategory is not None}
assert len(names) == len(glyph_infos), "Names aren't unique?"
codepoints = {}
for i, gi in enumerate(glyph_infos):
if gi.codepoint is None:
continue
codepoint = int(gi.codepoint, 16)
if codepoint not in codepoints:
codepoints[codepoint] = i
else:
print(
f"Multiple names are assigned 0x{codepoint:04x}, using the first one we saw"
)

dest_file = Path(__file__).parent.parent / "src" / "glyphslib_data.rs"

with open(dest_file, "w") as f:
f.write(
f"//! Glyph data generated from glyphsLib {glyphsLib.__version__} by {Path(__file__).name}\n"
)
f.write("//!\n")
f.write(f"//! {len(glyph_infos)} glyph metadata records taken from glyphsLib\n")

f.write(
dedent(
"""
use std::str::FromStr;
use smol_str::SmolStr;
use crate::glyphdata::GlyphInfo;
/// The primary category for a given glyph
///
/// Generated to ensure it matches the glyphsLib dataset.
///
/// These categories are not the same as the unicode character categories.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[repr(u8)]
pub enum Category {
"""
)
)
for category in sorted(categories):
f.write(f" {category},\n")
f.write("}\n")

f.write("impl FromStr for Category {\n")
f.write(" type Err = SmolStr;\n\n")
f.write(" fn from_str(s: &str) -> Result<Self, Self::Err> {\n")
f.write(" match s {\n")
for category in sorted(categories):
f.write(f' "{category}" => Ok(Self::{category}),\n')
f.write(f" _ => Err(s.into()),\n")
f.write(" }\n")
f.write(" }\n")
f.write("}\n")
f.write("\n")

f.write(
dedent(
"""
/// The secondary category for a given glyph
///
/// Generated to ensure it matches the glyphsLib dataset.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[repr(u8)]
pub enum Subcategory {
"""
)
)
for subcategory in sorted(subcategories):
f.write(f" {subcategory},\n")
f.write("}\n\n")

f.write("impl FromStr for Subcategory {\n")
f.write(" type Err = SmolStr;\n\n")
f.write(" fn from_str(s: &str) -> Result<Self, Self::Err> {\n")
f.write(" match s {\n")
for subcategory in sorted(subcategories):
f.write(f' "{subcategory}" => Ok(Subcategory::{subcategory}),\n')
f.write(f" _ => Err(s.into()),\n")
f.write(" }\n")
f.write(" }\n")
f.write("}\n")
f.write("\n")

f.write("// Sorted by name, has unique names, therefore safe to bsearch\n")
f.write("pub(crate) const GLYPH_INFO: &[GlyphInfo] = &[\n")
for gi in glyph_infos:
codepoint = "None"
if gi.codepoint is not None:
codepoint = f"Some(0x{gi.codepoint})"
subcategory = "None"
if gi.subcategory is not None:
subcategory = f"Some(Subcategory::{gi.subcategory})"
f.write(
f' GlyphInfo::new("{gi.name}", Category::{gi.category}, {subcategory}, {codepoint}),\n'
)

f.write("];\n")

f.write(
"// Sorted by codepoint, has unique codepoints, therefore safe to bsearch\n"
)
f.write("pub(crate) const CODEPOINT_TO_INFO_IDX: &[(u32, usize)] = &[\n")
for codepoint, i in sorted(codepoints.items()):
f.write(f" (0x{codepoint:04x}, {i}), // {glyph_infos[i].name}\n")

f.write("];\n")


if __name__ == "__main__":
main(None)
main()
46 changes: 23 additions & 23 deletions glyphs-reader/src/font.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ use std::hash::Hash;
use std::str::FromStr;
use std::{fs, path};

use crate::glyphdata::{Category, GlyphData, Subcategory};
use crate::glyphdata::GlyphData;
use crate::{Category, Subcategory};
use ascii_plist_derive::FromPlist;
use fontdrasil::types::WidthClass;
use kurbo::{Affine, Point, Vec2};
Expand Down Expand Up @@ -205,7 +206,7 @@ pub struct Glyph {
/// The right kerning group
pub right_kern: Option<SmolStr>,
pub category: Option<Category>,
pub sub_category: Subcategory,
pub sub_category: Option<Subcategory>,
}

impl Glyph {
Expand All @@ -214,7 +215,7 @@ impl Glyph {
(self.category, self.sub_category),
(
Some(Category::Mark),
Subcategory::Nonspacing | Subcategory::SpacingCombining
Some(Subcategory::Nonspacing) | Some(Subcategory::SpacingCombining)
)
)
}
Expand Down Expand Up @@ -1898,7 +1899,7 @@ impl TryFrom<RawLayer> for Layer {

impl RawGlyph {
// we pass in the radix because it depends on the version, stored in the font struct
fn build(self, codepoint_radix: u32) -> Result<Glyph, Error> {
fn build(self, codepoint_radix: u32, glyph_data: &GlyphData) -> Result<Glyph, Error> {
let mut instances = Vec::new();
for layer in self.layers {
if layer.is_draft() {
Expand Down Expand Up @@ -1933,12 +1934,12 @@ impl RawGlyph {
.unwrap_or_default();

if category.is_none() || sub_category.is_none() {
if let Some((computed_category, computed_subcategory)) =
get_glyph_category(&self.glyphname, &codepoints)
if let Some((computed_category, computed_subcategory, _)) =
glyph_data.query(&self.glyphname, Some(&codepoints))
{
// if they were manually set don't change them, otherwise do
category = category.or(Some(computed_category));
sub_category = sub_category.or(Some(computed_subcategory));
sub_category = sub_category.or(computed_subcategory);
}
}

Expand All @@ -1950,20 +1951,11 @@ impl RawGlyph {
right_kern: self.kern_right,
unicode: codepoints,
category,
sub_category: sub_category.unwrap_or_default(),
sub_category,
})
}
}

// This will eventually need to be replaced with something that can handle
// custom GlyphData.xml files, as well as handle overrides that are part of the
// glyph source.
fn get_glyph_category(name: &str, codepoints: &BTreeSet<u32>) -> Option<(Category, Subcategory)> {
GlyphData::bundled()
.get_glyph(name, Some(codepoints))
.map(|info| (info.category, info.subcategory))
}

// https://github.com/googlefonts/glyphsLib/blob/24b4d340e4c82948ba121dcfe563c1450a8e69c9/Lib/glyphsLib/builder/constants.py#L186
#[rustfmt::skip]
static GLYPHS_TO_OPENTYPE_LANGUAGE_ID: &[(&str, i32)] = &[
Expand Down Expand Up @@ -2239,6 +2231,9 @@ impl TryFrom<RawFont> for Font {
from.v2_to_v3_names()?;
}

// TODO: this should be provided in a manner that allows for overrides
let glyph_data = GlyphData::glyphs_lib_data();

let radix = if from.is_v2() { 16 } else { 10 };
let glyph_order = parse_glyph_order(&from);

Expand Down Expand Up @@ -2277,7 +2272,10 @@ impl TryFrom<RawFont> for Font {

let mut glyphs = BTreeMap::new();
for raw_glyph in from.glyphs.into_iter() {
glyphs.insert(raw_glyph.glyphname.clone(), raw_glyph.build(radix)?);
glyphs.insert(
raw_glyph.glyphname.clone(),
raw_glyph.build(radix, &glyph_data)?,
);
}

let mut features = Vec::new();
Expand Down Expand Up @@ -2615,9 +2613,9 @@ mod tests {
default_master_idx, RawAxisUserToDesignMap, RawFeature, RawFont, RawFontMaster,
RawUserToDesignMapping,
},
glyphdata::{Category, Subcategory},
glyphdata::GlyphData,
plist::FromPlist,
Font, FontMaster, Node, Shape,
Category, Font, FontMaster, Node, Shape,
};
use std::{
collections::{BTreeMap, BTreeSet, HashSet},
Expand Down Expand Up @@ -3568,9 +3566,11 @@ mod tests {
..Default::default()
};

let cooked = raw.build(16).unwrap();
assert_eq!(cooked.category, Some(Category::Letter));
assert_eq!(cooked.sub_category, Subcategory::None);
let cooked = raw.build(16, &GlyphData::glyphs_lib_data()).unwrap();
assert_eq!(
(cooked.category, cooked.sub_category),
(Some(Category::Letter), None)
);
}

#[test]
Expand Down
Loading

0 comments on commit 143c8d1

Please sign in to comment.