Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Try out generating code instead of bincode for glyphsLib data to avoid runtime deserialization #1066

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions glyphs-reader/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,3 @@ bincode.workspace = true
[dev-dependencies]
pretty_assertions.workspace = true
rstest.workspace = true

[build-dependencies]
quick-xml = "0.36"
smol_str.workspace = true
serde.workspace = true
thiserror.workspace = true
bincode.workspace = true
34 changes: 0 additions & 34 deletions glyphs-reader/build.rs

This file was deleted.

33,013 changes: 0 additions & 33,013 deletions glyphs-reader/data/GlyphData.xml

This file was deleted.

40,358 changes: 0 additions & 40,358 deletions glyphs-reader/data/GlyphData_Ideographs.xml

This file was deleted.

140 changes: 113 additions & 27 deletions glyphs-reader/data/update.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,129 @@
"""Update bundled xml files
"""Update bundled data derived from glyphsLib GlyphData.xml and GlyphData_Ideographs.xml.

We try to match the behaviour of the python toolchain, so we want to ship the
same data files as are currently bundled in glyphsLib. This script copies those
files out of the currently active version of glyphsLib.
This script copies files out of the currently active version of glyphsLib and generates
Rust code for efficient access to the default data. Override files must be loaded separately
from XML. We only generate code for the fields we actively use.

Usage:
python data/update.py
python glyphs-reader/data/update.py
"""

import dataclasses
from dataclasses import dataclass
import glyphsLib
from importlib import resources
import os
import shutil
from io import StringIO
from lxml import etree
from pathlib import Path
from textwrap import dedent
from typing import Optional, Tuple

def script_dir():
return os.path.dirname(os.path.abspath(__file__))

def get_data_file(filepath):
return resources.files(glyphsLib).joinpath("data").joinpath(filepath)
@dataclass(frozen=True)
class GlyphInfo:
codepoint: Optional[int]
name: str
category: str
subcategory: Optional[str]


def copy_data_files():
target_dir = script_dir()
for target in ["GlyphData.xml", "GlyphData_Ideographs.xml"]:
file = get_data_file(target)
target = os.path.join(target_dir, target)
with file.open("rb") as source, open(target, "wb") as dest:
shutil.copyfileobj(source, dest)
def codename(name: Optional[str]) -> Optional[str]:
if name is None:
return None
return name.replace(" ", "")

def write_version_file():
version = glyphsLib.__version__
with open(os.path.join(script_dir(), 'VERSION'), 'w') as f:
f.write(f"XML files copied from glyphsLib version {version}.\n"
"(this file generated by update.py)\n")

def main(_):
copy_data_files()
write_version_file()
def read_glyph_info(file: str) -> Tuple[GlyphInfo]:
file = resources.files(glyphsLib).joinpath("data").joinpath(file)
with open(file) as f:
tree = etree.parse(f)

by_name = {}

# Do a full pass to collect names
for e in tree.xpath("//glyph"):
info = GlyphInfo(
e.attrib.get("unicode", None),
e.attrib["name"].strip(),
codename(e.attrib["category"]),
codename(e.attrib.get("subCategory", None)),
)
if info.name not in by_name:
by_name[info.name] = info
else:
print(f"We've already seen {info.name}!")

# Then add alt_names where they don't overlap names
for e in tree.xpath("//glyph[@altNames]"):
for alt_name in e.attrib["altNames"].split(","):
alt_name = alt_name.strip()
if alt_name in by_name:
print(f'Ignoring alt name "{alt_name}", already taken')
continue
by_name[alt_name] = dataclasses.replace(
by_name[e.attrib["name"]], name=alt_name, codepoint=None
)

return tuple(by_name.values())


def main():
glyph_infos = sorted(
set(read_glyph_info("GlyphData.xml"))
| set(read_glyph_info("GlyphData_Ideographs.xml")),
key=lambda g: g.name,
)
names = {g.name for g in glyph_infos}
categories = {g.category for g in glyph_infos}
subcategories = {g.subcategory for g in glyph_infos if g.subcategory is not None}
assert len(names) == len(glyph_infos), "Names aren't unique?"
codepoints = {}
for i, gi in enumerate(glyph_infos):
if gi.codepoint is None:
continue
codepoint = int(gi.codepoint, 16)
if codepoint not in codepoints:
codepoints[codepoint] = i
else:
print(
f"Multiple names are assigned 0x{codepoint:04x}, using the first one we saw"
)

dest_file = Path(__file__).parent.parent / "src" / "glyphslib_data.rs"

with open(dest_file, "w") as f:
f.write(
f"//! Glyph data generated from glyphsLib {glyphsLib.__version__} by {Path(__file__).name}\n"
)
f.write("//!\n")
f.write(f"//! {len(glyph_infos)} glyph metadata records taken from glyphsLib\n")
f.write("\n")
f.write("use crate::{Category as C, Subcategory as S, glyphdata::{gi, GlyphInfo}};\n")
f.write("// Sorted by name, has unique names, therefore safe to bsearch\n")

f.write("pub(crate) const GLYPH_INFO: &[GlyphInfo] = &[\n")
for gi in glyph_infos:
codepoint = "None"
if gi.codepoint is not None:
codepoint = f"Some(0x{gi.codepoint})"
subcategory = "None"
if gi.subcategory is not None:
subcategory = f"Some(S::{gi.subcategory})"
f.write(
f' gi("{gi.name}", C::{gi.category}, {subcategory}, {codepoint}),\n'
)

f.write("];\n")

f.write(
"// Sorted by codepoint, has unique codepoints, therefore safe to bsearch\n"
)
f.write("pub(crate) const CODEPOINT_TO_INFO_IDX: &[(u32, usize)] = &[\n")
for codepoint, i in sorted(codepoints.items()):
f.write(f" (0x{codepoint:04x}, {i}), // {glyph_infos[i].name}\n")

f.write("];\n")


if __name__ == "__main__":
main(None)
main()
48 changes: 23 additions & 25 deletions glyphs-reader/src/font.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ use std::hash::Hash;
use std::str::FromStr;
use std::{fs, path};

use crate::glyphdata::{Category, GlyphData, Subcategory};
use crate::glyphdata::GlyphData;
use crate::{Category, Subcategory};
use ascii_plist_derive::FromPlist;
use fontdrasil::types::WidthClass;
use kurbo::{Affine, Point, Vec2};
Expand Down Expand Up @@ -205,7 +206,7 @@ pub struct Glyph {
/// The right kerning group
pub right_kern: Option<SmolStr>,
pub category: Option<Category>,
pub sub_category: Subcategory,
pub sub_category: Option<Subcategory>,
rsheeter marked this conversation as resolved.
Show resolved Hide resolved
}

impl Glyph {
Expand All @@ -214,7 +215,7 @@ impl Glyph {
(self.category, self.sub_category),
(
Some(Category::Mark),
Subcategory::Nonspacing | Subcategory::SpacingCombining
Some(Subcategory::Nonspacing) | Some(Subcategory::SpacingCombining)
)
)
}
Expand Down Expand Up @@ -1898,7 +1899,7 @@ impl TryFrom<RawLayer> for Layer {

impl RawGlyph {
// we pass in the radix because it depends on the version, stored in the font struct
fn build(self, codepoint_radix: u32) -> Result<Glyph, Error> {
fn build(self, codepoint_radix: u32, glyph_data: &GlyphData) -> Result<Glyph, Error> {
let mut instances = Vec::new();
for layer in self.layers {
if layer.is_draft() {
Expand Down Expand Up @@ -1933,12 +1934,10 @@ impl RawGlyph {
.unwrap_or_default();

if category.is_none() || sub_category.is_none() {
if let Some((computed_category, computed_subcategory)) =
get_glyph_category(&self.glyphname, &codepoints)
{
if let Some(result) = glyph_data.query(&self.glyphname, Some(&codepoints)) {
// if they were manually set don't change them, otherwise do
category = category.or(Some(computed_category));
sub_category = sub_category.or(Some(computed_subcategory));
category = category.or(Some(result.category));
sub_category = sub_category.or(result.subcategory);
}
}

Expand All @@ -1950,20 +1949,11 @@ impl RawGlyph {
right_kern: self.kern_right,
unicode: codepoints,
category,
sub_category: sub_category.unwrap_or_default(),
sub_category,
})
}
}

// This will eventually need to be replaced with something that can handle
// custom GlyphData.xml files, as well as handle overrides that are part of the
// glyph source.
fn get_glyph_category(name: &str, codepoints: &BTreeSet<u32>) -> Option<(Category, Subcategory)> {
GlyphData::bundled()
.get_glyph(name, Some(codepoints))
.map(|info| (info.category, info.subcategory))
}

// https://github.com/googlefonts/glyphsLib/blob/24b4d340e4c82948ba121dcfe563c1450a8e69c9/Lib/glyphsLib/builder/constants.py#L186
#[rustfmt::skip]
static GLYPHS_TO_OPENTYPE_LANGUAGE_ID: &[(&str, i32)] = &[
Expand Down Expand Up @@ -2239,6 +2229,9 @@ impl TryFrom<RawFont> for Font {
from.v2_to_v3_names()?;
}

// TODO: this should be provided in a manner that allows for overrides
let glyph_data = GlyphData::glyphs_lib_data();

let radix = if from.is_v2() { 16 } else { 10 };
let glyph_order = parse_glyph_order(&from);

Expand Down Expand Up @@ -2277,7 +2270,10 @@ impl TryFrom<RawFont> for Font {

let mut glyphs = BTreeMap::new();
for raw_glyph in from.glyphs.into_iter() {
glyphs.insert(raw_glyph.glyphname.clone(), raw_glyph.build(radix)?);
glyphs.insert(
raw_glyph.glyphname.clone(),
raw_glyph.build(radix, &glyph_data)?,
);
}

let mut features = Vec::new();
Expand Down Expand Up @@ -2615,9 +2611,9 @@ mod tests {
default_master_idx, RawAxisUserToDesignMap, RawFeature, RawFont, RawFontMaster,
RawUserToDesignMapping,
},
glyphdata::{Category, Subcategory},
glyphdata::GlyphData,
plist::FromPlist,
Font, FontMaster, Node, Shape,
Category, Font, FontMaster, Node, Shape,
};
use std::{
collections::{BTreeMap, BTreeSet, HashSet},
Expand Down Expand Up @@ -3568,9 +3564,11 @@ mod tests {
..Default::default()
};

let cooked = raw.build(16).unwrap();
assert_eq!(cooked.category, Some(Category::Letter));
assert_eq!(cooked.sub_category, Subcategory::None);
let cooked = raw.build(16, &GlyphData::glyphs_lib_data()).unwrap();
assert_eq!(
(cooked.category, cooked.sub_category),
(Some(Category::Letter), None)
);
}

#[test]
Expand Down
Loading
Loading