-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Loading glyphslib derived data is showing up in profiles, try to make…
… it not do so
- Loading branch information
Showing
9 changed files
with
669 additions
and
663 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,43 +1,188 @@ | ||
"""Update bundled xml files | ||
"""Update bundled data derived from glyphsLib GlyphData.xml and GlyphData_Ideographs.xml. | ||
We try to match the behaviour of the python toolchain, so we want to ship the | ||
same data files as are currently bundled in glyphsLib. This script copies those | ||
files out of the currently active version of glyphsLib. | ||
This script copies files out of the currently active version of glyphsLib and generates | ||
Rust code for efficient access to the default data. Override files must be loaded separately | ||
from XML. We only generate code for the fields we actively use. | ||
Usage: | ||
python data/update.py | ||
python glyphs-reader/data/update.py | ||
""" | ||
|
||
import dataclasses | ||
from dataclasses import dataclass | ||
import glyphsLib | ||
from importlib import resources | ||
import os | ||
import shutil | ||
from io import StringIO | ||
from lxml import etree | ||
from pathlib import Path | ||
from textwrap import dedent | ||
from typing import Optional, Tuple | ||
|
||
def script_dir(): | ||
return os.path.dirname(os.path.abspath(__file__)) | ||
|
||
def get_data_file(filepath): | ||
return resources.files(glyphsLib).joinpath("data").joinpath(filepath) | ||
@dataclass(frozen=True) | ||
class GlyphInfo: | ||
codepoint: Optional[int] | ||
name: str | ||
category: str | ||
subcategory: Optional[str] | ||
|
||
|
||
def copy_data_files(): | ||
target_dir = script_dir() | ||
for target in ["GlyphData.xml", "GlyphData_Ideographs.xml"]: | ||
file = get_data_file(target) | ||
target = os.path.join(target_dir, target) | ||
with file.open("rb") as source, open(target, "wb") as dest: | ||
shutil.copyfileobj(source, dest) | ||
def codename(name: Optional[str]) -> Optional[str]: | ||
if name is None: | ||
return None | ||
return name.replace(" ", "") | ||
|
||
def write_version_file(): | ||
version = glyphsLib.__version__ | ||
with open(os.path.join(script_dir(), 'VERSION'), 'w') as f: | ||
f.write(f"XML files copied from glyphsLib version {version}.\n" | ||
"(this file generated by update.py)\n") | ||
|
||
def main(_): | ||
copy_data_files() | ||
write_version_file() | ||
def read_glyph_info(file: str) -> Tuple[GlyphInfo]: | ||
file = resources.files(glyphsLib).joinpath("data").joinpath(file) | ||
with open(file) as f: | ||
tree = etree.parse(f) | ||
|
||
by_name = {} | ||
|
||
# Do a full pass to collect names | ||
for e in tree.xpath("//glyph"): | ||
info = GlyphInfo( | ||
e.attrib.get("unicode", None), | ||
e.attrib["name"], | ||
codename(e.attrib["category"]), | ||
codename(e.attrib.get("subCategory", None)), | ||
) | ||
if info.name not in by_name: | ||
by_name[info.name] = info | ||
else: | ||
print(f"We've already seen {info.name}!") | ||
|
||
# Then add alt_names where they don't overlap names | ||
for e in tree.xpath("//glyph[@altNames]"): | ||
for alt_name in e.attrib["altNames"].split(","): | ||
if alt_name in by_name: | ||
print(f'Ignoring alt name "{alt_name}", already taken') | ||
continue | ||
by_name[alt_name] = dataclasses.replace( | ||
by_name[e.attrib["name"]], name=alt_name, codepoint=None | ||
) | ||
|
||
return tuple(by_name.values()) | ||
|
||
|
||
def main(): | ||
glyph_infos = sorted( | ||
set(read_glyph_info("GlyphData.xml")) | ||
| set(read_glyph_info("GlyphData_Ideographs.xml")), | ||
key=lambda g: g.name, | ||
) | ||
names = {g.name for g in glyph_infos} | ||
categories = {g.category for g in glyph_infos} | ||
subcategories = {g.subcategory for g in glyph_infos if g.subcategory is not None} | ||
assert len(names) == len(glyph_infos), "Names aren't unique?" | ||
codepoints = {} | ||
for i, gi in enumerate(glyph_infos): | ||
if gi.codepoint is None: | ||
continue | ||
codepoint = int(gi.codepoint, 16) | ||
if codepoint not in codepoints: | ||
codepoints[codepoint] = i | ||
else: | ||
print( | ||
f"Multiple names are assigned 0x{codepoint:04x}, using the first one we saw" | ||
) | ||
|
||
dest_file = Path(__file__).parent.parent / "src" / "glyphslib_data.rs" | ||
|
||
with open(dest_file, "w") as f: | ||
f.write( | ||
f"//! Glyph data generated from glyphsLib {glyphsLib.__version__} by {Path(__file__).name}\n" | ||
) | ||
f.write("//!\n") | ||
f.write(f"//! {len(glyph_infos)} glyph metadata records taken from glyphsLib\n") | ||
|
||
f.write( | ||
dedent( | ||
""" | ||
use std::str::FromStr; | ||
use smol_str::SmolStr; | ||
use crate::glyphdata::GlyphInfo; | ||
/// The primary category for a given glyph | ||
/// | ||
/// Generated to ensure it matches the glyphsLib dataset. | ||
/// | ||
/// These categories are not the same as the unicode character categories. | ||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] | ||
#[repr(u8)] | ||
pub enum Category { | ||
""" | ||
) | ||
) | ||
for category in sorted(categories): | ||
f.write(f" {category},\n") | ||
f.write("}\n") | ||
|
||
f.write("impl FromStr for Category {\n") | ||
f.write(" type Err = SmolStr;\n\n") | ||
f.write(" fn from_str(s: &str) -> Result<Self, Self::Err> {\n") | ||
f.write(" match s {\n") | ||
for category in sorted(categories): | ||
f.write(f' "{category}" => Ok(Self::{category}),\n') | ||
f.write(f" _ => Err(s.into()),\n") | ||
f.write(" }\n") | ||
f.write(" }\n") | ||
f.write("}\n") | ||
f.write("\n") | ||
|
||
f.write( | ||
dedent( | ||
""" | ||
/// The secondary category for a given glyph | ||
/// | ||
/// Generated to ensure it matches the glyphsLib dataset. | ||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] | ||
#[repr(u8)] | ||
pub enum Subcategory { | ||
""" | ||
) | ||
) | ||
for subcategory in sorted(subcategories): | ||
f.write(f" {subcategory},\n") | ||
f.write("}\n\n") | ||
|
||
f.write("impl FromStr for Subcategory {\n") | ||
f.write(" type Err = SmolStr;\n\n") | ||
f.write(" fn from_str(s: &str) -> Result<Self, Self::Err> {\n") | ||
f.write(" match s {\n") | ||
for subcategory in sorted(subcategories): | ||
f.write(f' "{subcategory}" => Ok(Subcategory::{subcategory}),\n') | ||
f.write(f" _ => Err(s.into()),\n") | ||
f.write(" }\n") | ||
f.write(" }\n") | ||
f.write("}\n") | ||
f.write("\n") | ||
|
||
f.write("// Sorted by name, has unique names, therefore safe to bsearch\n") | ||
f.write("pub(crate) const GLYPH_INFO: &[GlyphInfo] = &[\n") | ||
for gi in glyph_infos: | ||
codepoint = "None" | ||
if gi.codepoint is not None: | ||
codepoint = f"Some(0x{gi.codepoint})" | ||
subcategory = "None" | ||
if gi.subcategory is not None: | ||
subcategory = f"Some(Subcategory::{gi.subcategory})" | ||
f.write( | ||
f' GlyphInfo::new("{gi.name}", Category::{gi.category}, {subcategory}, {codepoint}),\n' | ||
) | ||
|
||
f.write("];\n") | ||
|
||
f.write( | ||
"// Sorted by codepoint, has unique codepoints, therefore safe to bsearch\n" | ||
) | ||
f.write("pub(crate) const CODEPOINT_TO_INFO_IDX: &[(u32, usize)] = &[\n") | ||
for codepoint, i in sorted(codepoints.items()): | ||
f.write(f" (0x{codepoint:04x}, {i}), // {glyph_infos[i].name}\n") | ||
|
||
f.write("];\n") | ||
|
||
|
||
if __name__ == "__main__": | ||
main(None) | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.