diff --git a/glyphs-reader/Cargo.toml b/glyphs-reader/Cargo.toml
index e91ed7014..410cd46a0 100644
--- a/glyphs-reader/Cargo.toml
+++ b/glyphs-reader/Cargo.toml
@@ -35,10 +35,3 @@ bincode.workspace = true
 [dev-dependencies]
 pretty_assertions.workspace = true
 rstest.workspace = true
-
-[build-dependencies]
-quick-xml = "0.36"
-smol_str.workspace = true
-serde.workspace = true
-thiserror.workspace = true
-bincode.workspace = true
diff --git a/glyphs-reader/build.rs b/glyphs-reader/build.rs
deleted file mode 100644
index 216b13957..000000000
--- a/glyphs-reader/build.rs
+++ /dev/null
@@ -1,34 +0,0 @@
-use std::env;
-use std::path::Path;
-
-include!("src/glyphdata/glyphdata_impl.rs");
-
-fn parse_xml_files() -> Result<Vec<GlyphInfo>, GlyphDataError> {
-    let mut one = parse_xml_file("data/GlyphData.xml")?;
-    let two = parse_xml_file("data/GlyphData_Ideographs.xml")?;
-    one.extend(two);
-    Ok(one)
-}
-
-fn parse_xml_file(path: &str) -> Result<Vec<GlyphInfo>, GlyphDataError> {
-    let Ok(bytes) = std::fs::read(path) else {
-        panic!("failed to read path '{path}'");
-    };
-    parse_entries(&bytes)
-}
-
-// tell cargo when to rerun this script
-fn register_dependencies() {
-    println!("cargo::rerun-if-changed=data");
-    println!("cargo::rerun-if-changed=src/glyphdata/glyphdata_impl.rs");
-}
-
-fn main() {
-    let out_dir = env::var_os("OUT_DIR").unwrap();
-    let dest_path = Path::new(&out_dir).join("glyphdata.bin");
-    let parsed = parse_xml_files().expect("failed to parse GlyphData xml files");
-    let bytes = bincode::serialize(&parsed).expect("bincode failed");
-    std::fs::write(dest_path, bytes).unwrap();
-
-    register_dependencies()
-}
diff --git a/glyphs-reader/data/update.py b/glyphs-reader/data/update.py
index d09c66501..9fb433a02 100644
--- a/glyphs-reader/data/update.py
+++ b/glyphs-reader/data/update.py
@@ -1,43 +1,188 @@
-"""Update bundled xml files
+"""Update bundled data derived from glyphsLib GlyphData.xml and GlyphData_Ideographs.xml.
 
-We try to match the behaviour of the python toolchain, so we want to ship the
-same data files as are currently bundled in glyphsLib. This script copies those
-files out of the currently active version of glyphsLib.
+This script copies files out of the currently active version of glyphsLib and generates
+Rust code for efficient access to the default data. Override files must be loaded separately
+from XML. We only generate code for the fields we actively use.
 
 Usage:
-    python data/update.py
+    python glyphs-reader/data/update.py
 """
 
+import dataclasses
+from dataclasses import dataclass
 import glyphsLib
 from importlib import resources
-import os
-import shutil
+from io import StringIO
+from lxml import etree
+from pathlib import Path
+from textwrap import dedent
+from typing import Optional, Tuple
 
-def script_dir():
-    return os.path.dirname(os.path.abspath(__file__))
 
-def get_data_file(filepath):
-    return resources.files(glyphsLib).joinpath("data").joinpath(filepath)
+@dataclass(frozen=True)
+class GlyphInfo:
+    codepoint: Optional[int]
+    name: str
+    category: str
+    subcategory: Optional[str]
 
 
-def copy_data_files():
-    target_dir = script_dir()
-    for target in ["GlyphData.xml", "GlyphData_Ideographs.xml"]:
-        file = get_data_file(target)
-        target = os.path.join(target_dir, target)
-        with file.open("rb") as source, open(target, "wb") as dest:
-            shutil.copyfileobj(source, dest)
+def codename(name: Optional[str]) -> Optional[str]:
+    if name is None:
+        return None
+    return name.replace(" ", "")
 
-def write_version_file():
-    version = glyphsLib.__version__
-    with open(os.path.join(script_dir(), 'VERSION'), 'w') as f:
-        f.write(f"XML files copied from glyphsLib version {version}.\n"
-                "(this file generated by update.py)\n")
 
-def main(_):
-    copy_data_files()
-    write_version_file()
+def read_glyph_info(file: str) -> Tuple[GlyphInfo]:
+    file = resources.files(glyphsLib).joinpath("data").joinpath(file)
+    with open(file) as f:
+        tree = etree.parse(f)
+
+    by_name = {}
+
+    # Do a full pass to collect names
+    for e in tree.xpath("//glyph"):
+        info = GlyphInfo(
+            e.attrib.get("unicode", None),
+            e.attrib["name"],
+            codename(e.attrib["category"]),
+            codename(e.attrib.get("subCategory", None)),
+        )
+        if info.name not in by_name:
+            by_name[info.name] = info
+        else:
+            print(f"We've already seen {info.name}!")
+
+    # Then add alt_names where they don't overlap names
+    for e in tree.xpath("//glyph[@altNames]"):
+        for alt_name in e.attrib["altNames"].split(","):
+            if alt_name in by_name:
+                print(f'Ignoring alt name "{alt_name}", already taken')
+                continue
+            by_name[alt_name] = dataclasses.replace(
+                by_name[e.attrib["name"]], name=alt_name, codepoint=None
+            )
+
+    return tuple(by_name.values())
+
+
+def main():
+    glyph_infos = sorted(
+        set(read_glyph_info("GlyphData.xml"))
+        | set(read_glyph_info("GlyphData_Ideographs.xml")),
+        key=lambda g: g.name,
+    )
+    names = {g.name for g in glyph_infos}
+    categories = {g.category for g in glyph_infos}
+    subcategories = {g.subcategory for g in glyph_infos if g.subcategory is not None}
+    assert len(names) == len(glyph_infos), "Names aren't unique?"
+    codepoints = {}
+    for i, gi in enumerate(glyph_infos):
+        if gi.codepoint is None:
+            continue
+        codepoint = int(gi.codepoint, 16)
+        if codepoint not in codepoints:
+            codepoints[codepoint] = i
+        else:
+            print(
+                f"Multiple names are assigned 0x{codepoint:04x}, using the first one we saw"
+            )
+
+    dest_file = Path(__file__).parent.parent / "src" / "glyphslib_data.rs"
+
+    with open(dest_file, "w") as f:
+        f.write(
+            f"//! Glyph data generated from glyphsLib {glyphsLib.__version__} by {Path(__file__).name}\n"
+        )
+        f.write("//!\n")
+        f.write(f"//! {len(glyph_infos)} glyph metadata records taken from glyphsLib\n")
+
+        f.write(
+            dedent(
+                """
+                use std::str::FromStr;
+                use smol_str::SmolStr;
+                use crate::glyphdata::GlyphInfo;
+
+                /// The primary category for a given glyph
+                ///
+                /// Generated to ensure it matches the glyphsLib dataset.
+                ///
+                /// These categories are not the same as the unicode character categories.
+                #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+                #[repr(u8)]
+                pub enum Category {
+			"""
+            )
+        )
+        for category in sorted(categories):
+            f.write(f"    {category},\n")
+        f.write("}\n")
+
+        f.write("impl FromStr for Category {\n")
+        f.write("    type Err = SmolStr;\n\n")
+        f.write("    fn from_str(s: &str) -> Result<Self, Self::Err> {\n")
+        f.write("        match s {\n")
+        for category in sorted(categories):
+            f.write(f'            "{category}" => Ok(Self::{category}),\n')
+        f.write(f"            _ => Err(s.into()),\n")
+        f.write("        }\n")
+        f.write("    }\n")
+        f.write("}\n")
+        f.write("\n")
+
+        f.write(
+            dedent(
+                """
+			/// The secondary category for a given glyph
+			///
+			/// Generated to ensure it matches the glyphsLib dataset.
+			#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+			#[repr(u8)]
+			pub enum Subcategory {
+			"""
+            )
+        )
+        for subcategory in sorted(subcategories):
+            f.write(f"    {subcategory},\n")
+        f.write("}\n\n")
+
+        f.write("impl FromStr for Subcategory {\n")
+        f.write("    type Err = SmolStr;\n\n")
+        f.write("    fn from_str(s: &str) -> Result<Self, Self::Err> {\n")
+        f.write("        match s {\n")
+        for subcategory in sorted(subcategories):
+            f.write(f'            "{subcategory}" => Ok(Subcategory::{subcategory}),\n')
+        f.write(f"            _ => Err(s.into()),\n")
+        f.write("        }\n")
+        f.write("    }\n")
+        f.write("}\n")
+        f.write("\n")
+
+        f.write("// Sorted by name, has unique names, therefore safe to bsearch\n")
+        f.write("pub(crate) const GLYPH_INFO: &[GlyphInfo] = &[\n")
+        for gi in glyph_infos:
+            codepoint = "None"
+            if gi.codepoint is not None:
+                codepoint = f"Some(0x{gi.codepoint})"
+            subcategory = "None"
+            if gi.subcategory is not None:
+                subcategory = f"Some(Subcategory::{gi.subcategory})"
+            f.write(
+                f'    GlyphInfo::new("{gi.name}", Category::{gi.category}, {subcategory}, {codepoint}),\n'
+            )
+
+        f.write("];\n")
+
+        f.write(
+            "// Sorted by codepoint, has unique codepoints, therefore safe to bsearch\n"
+        )
+        f.write("pub(crate) const CODEPOINT_TO_INFO_IDX: &[(u32, usize)] = &[\n")
+        for codepoint, i in sorted(codepoints.items()):
+            f.write(f"    (0x{codepoint:04x}, {i}), // {glyph_infos[i].name}\n")
+
+        f.write("];\n")
 
 
 if __name__ == "__main__":
-    main(None)
+    main()
diff --git a/glyphs-reader/src/font.rs b/glyphs-reader/src/font.rs
index 569f84760..d624b4f26 100644
--- a/glyphs-reader/src/font.rs
+++ b/glyphs-reader/src/font.rs
@@ -11,7 +11,8 @@ use std::hash::Hash;
 use std::str::FromStr;
 use std::{fs, path};
 
-use crate::glyphdata::{Category, GlyphData, Subcategory};
+use crate::glyphdata::GlyphData;
+use crate::{Category, Subcategory};
 use ascii_plist_derive::FromPlist;
 use fontdrasil::types::WidthClass;
 use kurbo::{Affine, Point, Vec2};
@@ -205,7 +206,7 @@ pub struct Glyph {
     /// The right kerning group
     pub right_kern: Option<SmolStr>,
     pub category: Option<Category>,
-    pub sub_category: Subcategory,
+    pub sub_category: Option<Subcategory>,
 }
 
 impl Glyph {
@@ -214,7 +215,7 @@ impl Glyph {
             (self.category, self.sub_category),
             (
                 Some(Category::Mark),
-                Subcategory::Nonspacing | Subcategory::SpacingCombining
+                Some(Subcategory::Nonspacing) | Some(Subcategory::SpacingCombining)
             )
         )
     }
@@ -1898,7 +1899,7 @@ impl TryFrom<RawLayer> for Layer {
 
 impl RawGlyph {
     // we pass in the radix because it depends on the version, stored in the font struct
-    fn build(self, codepoint_radix: u32) -> Result<Glyph, Error> {
+    fn build(self, codepoint_radix: u32, glyph_data: &GlyphData) -> Result<Glyph, Error> {
         let mut instances = Vec::new();
         for layer in self.layers {
             if layer.is_draft() {
@@ -1933,12 +1934,12 @@ impl RawGlyph {
             .unwrap_or_default();
 
         if category.is_none() || sub_category.is_none() {
-            if let Some((computed_category, computed_subcategory)) =
-                get_glyph_category(&self.glyphname, &codepoints)
+            if let Some((computed_category, computed_subcategory, _)) =
+                glyph_data.query(&self.glyphname, Some(&codepoints))
             {
                 // if they were manually set don't change them, otherwise do
                 category = category.or(Some(computed_category));
-                sub_category = sub_category.or(Some(computed_subcategory));
+                sub_category = sub_category.or(computed_subcategory);
             }
         }
 
@@ -1950,20 +1951,11 @@ impl RawGlyph {
             right_kern: self.kern_right,
             unicode: codepoints,
             category,
-            sub_category: sub_category.unwrap_or_default(),
+            sub_category,
         })
     }
 }
 
-// This will eventually need to be replaced with something that can handle
-// custom GlyphData.xml files, as well as handle overrides that are part of the
-// glyph source.
-fn get_glyph_category(name: &str, codepoints: &BTreeSet<u32>) -> Option<(Category, Subcategory)> {
-    GlyphData::bundled()
-        .get_glyph(name, Some(codepoints))
-        .map(|info| (info.category, info.subcategory))
-}
-
 // https://github.com/googlefonts/glyphsLib/blob/24b4d340e4c82948ba121dcfe563c1450a8e69c9/Lib/glyphsLib/builder/constants.py#L186
 #[rustfmt::skip]
 static GLYPHS_TO_OPENTYPE_LANGUAGE_ID: &[(&str, i32)] = &[
@@ -2239,6 +2231,9 @@ impl TryFrom<RawFont> for Font {
             from.v2_to_v3_names()?;
         }
 
+        // TODO: this should be provided in a manner that allows for overrides
+        let glyph_data = GlyphData::glyphs_lib_data();
+
         let radix = if from.is_v2() { 16 } else { 10 };
         let glyph_order = parse_glyph_order(&from);
 
@@ -2277,7 +2272,10 @@ impl TryFrom<RawFont> for Font {
 
         let mut glyphs = BTreeMap::new();
         for raw_glyph in from.glyphs.into_iter() {
-            glyphs.insert(raw_glyph.glyphname.clone(), raw_glyph.build(radix)?);
+            glyphs.insert(
+                raw_glyph.glyphname.clone(),
+                raw_glyph.build(radix, &glyph_data)?,
+            );
         }
 
         let mut features = Vec::new();
@@ -2615,9 +2613,9 @@ mod tests {
             default_master_idx, RawAxisUserToDesignMap, RawFeature, RawFont, RawFontMaster,
             RawUserToDesignMapping,
         },
-        glyphdata::{Category, Subcategory},
+        glyphdata::GlyphData,
         plist::FromPlist,
-        Font, FontMaster, Node, Shape,
+        Category, Font, FontMaster, Node, Shape,
     };
     use std::{
         collections::{BTreeMap, BTreeSet, HashSet},
@@ -3568,9 +3566,11 @@ mod tests {
             ..Default::default()
         };
 
-        let cooked = raw.build(16).unwrap();
-        assert_eq!(cooked.category, Some(Category::Letter));
-        assert_eq!(cooked.sub_category, Subcategory::None);
+        let cooked = raw.build(16, &GlyphData::glyphs_lib_data()).unwrap();
+        assert_eq!(
+            (cooked.category, cooked.sub_category),
+            (Some(Category::Letter), None)
+        );
     }
 
     #[test]
diff --git a/glyphs-reader/src/glyphdata.rs b/glyphs-reader/src/glyphdata.rs
index 6ecfcb8ca..30d9ed092 100644
--- a/glyphs-reader/src/glyphdata.rs
+++ b/glyphs-reader/src/glyphdata.rs
@@ -3,145 +3,379 @@
 //! This module provides access to glyph info extracted from bundled
 //! (and potentially user-provided) data files.
 
-// NOTE: we define the types and parsing code in a separate file, so that
-// we can borrow it in our build.rs script without causing a cycle
-mod glyphdata_impl;
+use quick_xml::{
+    events::{BytesStart, Event},
+    Reader,
+};
 use std::{
-    borrow::Cow,
-    collections::{BTreeSet, HashMap, HashSet},
-    path::Path,
-    sync::OnceLock,
+    collections::{BTreeSet, HashMap},
+    num::ParseIntError,
+    path::{Path, PathBuf},
+    str::FromStr,
 };
 
-pub use glyphdata_impl::*;
 use icu_properties::GeneralCategory;
 
 use smol_str::SmolStr;
 
-static BUNDLED_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/glyphdata.bin"));
+use crate::{glyphslib_data, Category, Subcategory};
 
 /// A queryable set of glyph data
 ///
-/// This is generally expensive to create, and is intended to be cached, or
-/// used behind a OnceCell. It is never modified after initial creation.
+/// Always includes static data from glyphsLib. Optionally includes a set of override values as well.
+///
+/// Access via [`GlyphData::glyphs_lib_data`] is cheap. Instances created with overrides
+/// are more expensive.
 pub struct GlyphData {
-    // The info for all the glyphs we know of.
-    data: Vec<GlyphInfo>,
-    // the values in all maps are indices into the `data` vec. we use u32 to save space.
-    name_map: HashMap<SmolStr, u32>,
-    unicode_map: HashMap<u32, u32>,
-    alt_name_map: HashMap<SmolStr, u32>,
+    // Sorted by name, unique names, therefore safe to bsearch
+    data: &'static [GlyphInfo],
+    // Sorted by codepoint, unique codepoints, therefore safe to bsearch
+    codepoint_to_data_index: &'static [(u32, usize)],
+
+    // override-names are preferred to names in data
+    overrides: Option<HashMap<SmolStr, GlyphOverride>>,
+    overrrides_by_codepoint: Option<HashMap<u32, SmolStr>>,
 }
 
 impl GlyphData {
-    /// Return the default glyph data set, derived from GlyphData.xml files
-    pub fn bundled() -> &'static GlyphData {
-        static GLYPH_DATA: OnceLock<GlyphData> = OnceLock::new();
-        GLYPH_DATA.get_or_init(|| GlyphData::new(None).unwrap())
+    /// Return the default glyph data set, derived from Python glyphsLib resources
+    pub fn glyphs_lib_data() -> Self {
+        Self {
+            data: glyphslib_data::GLYPH_INFO,
+            codepoint_to_data_index: glyphslib_data::CODEPOINT_TO_INFO_IDX,
+            overrides: None,
+            overrrides_by_codepoint: None,
+        }
     }
 
-    /// Create a new data set, optionally loading user provided overrides
-    pub fn new(user_overrides: Option<&Path>) -> Result<Self, GlyphDataError> {
-        let user_overrides = user_overrides
-            .map(|path| {
-                let bytes = std::fs::read(path).map_err(|err| GlyphDataError::UserFile {
-                    path: path.to_owned(),
-                    reason: err.kind(),
-                });
-                bytes.and_then(|xml| parse_entries(&xml))
-            })
-            .transpose()?;
-        let bundled = load_bundled_data();
-        let all_entries = match user_overrides {
-            Some(user_overrides) => merge_data(bundled, user_overrides),
-            None => bundled,
-        };
+    /// Create a new data set with user provided overrides
+    pub fn with_override_file(override_file: &Path) -> Result<Self, GlyphDataError> {
+        let bytes = std::fs::read(override_file).map_err(|err| GlyphDataError::UserFile {
+            path: override_file.to_owned(),
+            reason: err.kind(),
+        })?;
+        let overrides = parse_entries(&bytes)?;
+        GlyphData::with_overrides(overrides)
+    }
 
-        Ok(Self::new_impl(all_entries))
+    /// Create a new data set with user provided overrides
+    pub(crate) fn with_overrides(
+        overrides: HashMap<SmolStr, GlyphOverride>,
+    ) -> Result<Self, GlyphDataError> {
+        let override_by_codepoint = overrides
+            .iter()
+            .filter_map(|(k, v)| v.codepoint.map(|cp| (cp, k.clone())))
+            .collect();
+        Ok(Self {
+            data: glyphslib_data::GLYPH_INFO,
+            codepoint_to_data_index: glyphslib_data::CODEPOINT_TO_INFO_IDX,
+            overrides: Some(overrides),
+            overrrides_by_codepoint: Some(override_by_codepoint),
+        })
     }
+}
 
-    fn new_impl(entries: Vec<GlyphInfo>) -> Self {
-        let mut name_map = HashMap::with_capacity(entries.len());
-        let mut unicode_map = HashMap::with_capacity(entries.len());
-        let mut alt_name_map = HashMap::new();
+/// The subset of GlyphData.xml or GlyphData_Ideographs.xml we care about
+#[derive(Clone, Copy, Debug)]
+pub(crate) struct GlyphInfo {
+    name: &'static str,
+    category: Category,
+    subcategory: Option<Subcategory>,
+    codepoint: Option<u32>,
+}
 
-        for (i, entry) in entries.iter().enumerate() {
-            name_map.insert(entry.name.clone(), i as u32);
-            if let Some(cp) = entry.unicode {
-                unicode_map.insert(cp, i as _);
-            }
-            for alt in &entry.alt_names {
-                alt_name_map.insert(alt.clone(), i as _);
+impl GlyphInfo {
+    pub(crate) const fn new(
+        name: &'static str,
+        category: Category,
+        subcategory: Option<Subcategory>,
+        codepoint: Option<u32>,
+    ) -> Self {
+        Self {
+            name,
+            category,
+            subcategory,
+            codepoint,
+        }
+    }
+}
+
+/// The category and subcategory to use when specified by an override
+pub(crate) struct GlyphOverride {
+    category: Category,
+    subcategory: Option<Subcategory>,
+    codepoint: Option<u32>,
+}
+
+#[derive(Clone, Debug, thiserror::Error)]
+pub enum GlyphDataError {
+    #[error("Couldn't read user file at '{path}': '{reason}'")]
+    UserFile {
+        path: PathBuf,
+        reason: std::io::ErrorKind,
+    },
+    #[error("Error parsing XML: '{0}'")]
+    ReaderError(#[from] quick_xml::Error),
+    #[error("Error parsing XML attribute: '{0}'")]
+    XmlAttributeError(#[from] quick_xml::events::attributes::AttrError),
+    #[error("Unknown category '{0}'")]
+    InvalidCategory(SmolStr),
+    #[error("Unknown subcategory '{0}'")]
+    InvalidSubcategory(SmolStr),
+    #[error("the XML input did not start with a <glyphdata> tag")]
+    WrongFirstElement,
+    #[error("Missing required attribute '{missing}' in '{attributes}'")]
+    MissingRequiredAttribute {
+        attributes: String,
+        missing: &'static str,
+    },
+    #[error("Invalid unicode value '{raw}': '{inner}'")]
+    InvalidUnicode { raw: String, inner: ParseIntError },
+    #[error("Unexpected attribute '{0}'")]
+    UnknownAttribute(String),
+}
+
+impl GlyphDataError {
+    // a little helper here makes our parsing code cleaner
+    fn missing_attr(name: &'static str, raw_attrs: &[u8]) -> Self {
+        let attributes = String::from_utf8_lossy(raw_attrs).into_owned();
+        Self::MissingRequiredAttribute {
+            attributes,
+            missing: name,
+        }
+    }
+}
+
+/// Parse glyph info entries out of a GlyphData xml file.
+pub(crate) fn parse_entries(xml: &[u8]) -> Result<HashMap<SmolStr, GlyphOverride>, GlyphDataError> {
+    fn check_and_advance_past_preamble(reader: &mut Reader<&[u8]>) -> Result<(), GlyphDataError> {
+        loop {
+            let event = reader.read_event()?;
+            match event {
+                Event::Comment(_) => (),
+                Event::Decl(_) => (),
+                Event::DocType(_) => (),
+                Event::Start(start) if start.name().as_ref() == b"glyphData" => return Ok(()),
+                _other => {
+                    return Err(GlyphDataError::WrongFirstElement);
+                }
             }
         }
+    }
 
-        Self {
-            data: entries,
-            name_map,
-            unicode_map,
-            alt_name_map,
+    let mut reader = Reader::from_reader(xml);
+    reader.config_mut().trim_text(true);
+
+    check_and_advance_past_preamble(&mut reader)?;
+
+    let mut by_name = HashMap::new();
+    let mut alt_names = Vec::new();
+    for result in
+        iter_rows(&mut reader).map(|row| row.map_err(Into::into).and_then(parse_glyph_xml))
+    {
+        let info = result?;
+        by_name.insert(
+            info.name.clone(),
+            GlyphOverride {
+                category: info.category,
+                subcategory: info.subcategory,
+                codepoint: info.codepoint,
+            },
+        );
+        for alt in info.alt_names {
+            alt_names.push((
+                alt,
+                GlyphOverride {
+                    category: info.category,
+                    subcategory: info.subcategory,
+                    codepoint: None,
+                },
+            ));
+        }
+    }
+
+    // apply alts after to ensure they can't steal "real" names
+    for (name, value) in alt_names {
+        by_name.entry(name).or_insert(value);
+    }
+
+    Ok(by_name)
+}
+
+fn iter_rows<'a, 'b: 'a>(
+    reader: &'b mut Reader<&'a [u8]>,
+) -> impl Iterator<Item = Result<BytesStart<'a>, quick_xml::Error>> + 'a {
+    std::iter::from_fn(|| match reader.read_event() {
+        Err(e) => Some(Err(e)),
+        Ok(Event::Empty(start)) => Some(Ok(start)),
+        _ => None,
+    })
+}
+
+struct GlyphInfoFromXml {
+    name: SmolStr,
+    alt_names: Vec<SmolStr>,
+    category: Category,
+    subcategory: Option<Subcategory>,
+    codepoint: Option<u32>,
+}
+
+fn parse_glyph_xml(item: BytesStart) -> Result<GlyphInfoFromXml, GlyphDataError> {
+    let mut name = None;
+    let mut category = None;
+    let mut subcategory = None;
+    let mut unicode = None;
+    let mut alt_names = None;
+
+    for attr in item.attributes() {
+        let attr = attr?;
+        let value = attr.unescape_value()?;
+        match attr.key.as_ref() {
+            b"name" => name = Some(value),
+            b"category" => category = Some(value),
+            b"subCategory" => subcategory = Some(value),
+            b"unicode" => unicode = Some(value),
+            b"altNames" => alt_names = Some(value),
+            b"production" | b"unicodeLegacy" | b"case" | b"direction" | b"script"
+            | b"description" => (),
+            other => {
+                return Err(GlyphDataError::UnknownAttribute(
+                    String::from_utf8_lossy(other).into_owned(),
+                ))
+            }
         }
     }
 
+    // now we've found some values, let's finalize them
+    let name = name
+        .map(SmolStr::new)
+        .ok_or_else(|| GlyphDataError::missing_attr("name", item.attributes_raw()))?;
+    let category = category
+        .ok_or_else(|| GlyphDataError::missing_attr("category", item.attributes_raw()))
+        .and_then(|cat| {
+            Category::from_str(cat.as_ref()).map_err(GlyphDataError::InvalidCategory)
+        })?;
+    let subcategory = subcategory
+        .map(|cat| Subcategory::from_str(cat.as_ref()).map_err(GlyphDataError::InvalidSubcategory))
+        .transpose()?;
+    let codepoint = unicode
+        .map(|s| {
+            u32::from_str_radix(&s, 16).map_err(|inner| GlyphDataError::InvalidUnicode {
+                raw: s.into_owned(),
+                inner,
+            })
+        })
+        .transpose()?;
+    let alt_names = alt_names
+        .map(|names| {
+            names
+                .as_ref()
+                .split(',')
+                .map(|name| SmolStr::from(name.trim()))
+                .collect()
+        })
+        .unwrap_or_default();
+
+    Ok(GlyphInfoFromXml {
+        name,
+        alt_names,
+        category,
+        subcategory,
+        codepoint,
+    })
+}
+
+impl GlyphData {
     /// Get the info for the given name/codepoints, attempting to synthesize it if necessary
     ///
-    /// If this name or these unicode values were included in the bundled data,
-    /// that will be returned; otherwise we will attempt to compute the value
-    /// by performing various heuristics based on the name.
+    /// Returns, from most to least preferred:
+    ///
+    /// 1. The matching override value
+    /// 1. The matching value from bundled data
+    /// 1. A computed value based on name heuristics
     ///
     // See https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/Lib/glyphsLib/glyphdata.py#L94
-    pub fn get_glyph(
+    pub fn query(
         &self,
         name: &str,
         codepoints: Option<&BTreeSet<u32>>,
-    ) -> Option<Cow<GlyphInfo>> {
-        if let Some(info) = self.get_by_name(name).or_else(|| {
-            codepoints
-                .into_iter()
-                .flat_map(|cps| cps.iter())
-                .find_map(|cp| self.get_by_codepoint(*cp))
-        }) {
-            return Some(Cow::Borrowed(info));
-        }
-
-        // we don't have info for this glyph: can we synthesize it?
-        // TODO: python does production name here.
-        // see https://github.com/googlefonts/fontc/issues/780
-
-        let (category, subcategory) = self.construct_category(name)?;
-        Some(Cow::Owned(GlyphInfo {
-            name: name.into(),
-            category,
-            subcategory,
-            unicode: None,
-            production: None,
-            alt_names: Default::default(),
-        }))
+    ) -> Option<(Category, Option<Subcategory>, Option<u32>)> {
+        self.query_no_synthesis(name, codepoints)
+            // we don't have info for this glyph: can we synthesize it?
+            .or_else(|| self.construct_category(name))
     }
 
-    /// Look up info for a glyph by name
-    ///
-    /// This checks primary names first, and alternates afterwards.
+    /// As [`Self::query`] but without a fallback to computed values.
     ///
-    /// Note: this is only checking the loaded data, it does not handle
-    /// computing info if it is missing.
-    fn get_by_name(&self, name: impl AsRef<str>) -> Option<&GlyphInfo> {
-        let name = name.as_ref();
-        self.name_map
-            .get(name)
-            .or_else(|| self.alt_name_map.get(name))
-            .and_then(|idx| self.data.get(*idx as usize))
+    /// Exists to enable result synthesis to query.
+    fn query_no_synthesis(
+        &self,
+        name: &str,
+        codepoints: Option<&BTreeSet<u32>>,
+    ) -> Option<(Category, Option<Subcategory>, Option<u32>)> {
+        // Override?
+        if let (Some(overrides), Some(overrides_by_codepoint)) = (
+            self.overrides.as_ref(),
+            self.overrrides_by_codepoint.as_ref(),
+        ) {
+            let name: SmolStr = name.into();
+            let override_result = overrides.get(&name).or_else(|| {
+                codepoints
+                    .into_iter()
+                    .flat_map(|cps| cps.iter())
+                    .find_map(|cp: &u32| {
+                        overrides_by_codepoint
+                            .get(cp)
+                            .and_then(|n| overrides.get(n))
+                    })
+            });
+            if let Some(override_result) = override_result {
+                return Some((
+                    override_result.category,
+                    override_result.subcategory,
+                    override_result.codepoint,
+                ));
+            }
+        }
+
+        // No override, perhaps we have a direct answer?
+        let info = self
+            .data
+            .binary_search_by(|gi| gi.name.cmp(name))
+            .ok()
+            .map(|i| &self.data[i])
+            .or_else(|| {
+                codepoints
+                    .into_iter()
+                    .flat_map(|cps| cps.iter())
+                    .find_map(|cp| {
+                        self.codepoint_to_data_index
+                            .binary_search_by(|(info_cp, _)| info_cp.cmp(cp))
+                            .ok()
+                            .map(|i| &self.data[self.codepoint_to_data_index[i].1])
+                    })
+            });
+        info.map(|info| (info.category, info.subcategory, info.codepoint))
     }
 
-    /// Look up info for a glyph by codepoint
-    fn get_by_codepoint(&self, codepoint: u32) -> Option<&GlyphInfo> {
-        self.unicode_map
-            .get(&codepoint)
-            .and_then(|idx| self.data.get(*idx as usize))
+    fn contains_name(&self, name: &str) -> bool {
+        if let Some(overrides) = self.overrides.as_ref() {
+            let name: SmolStr = name.into();
+            if overrides.contains_key(&name) {
+                return true;
+            }
+        }
+        self.data.binary_search_by(|gi| gi.name.cmp(name)).is_ok()
     }
 
     // https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/Lib/glyphsLib/glyphdata.py#L199
-    fn construct_category(&self, name: &str) -> Option<(Category, Subcategory)> {
+    fn construct_category(
+        &self,
+        name: &str,
+    ) -> Option<(Category, Option<Subcategory>, Option<u32>)> {
+        // TODO: python does production name here.
+        // see https://github.com/googlefonts/fontc/issues/780
+
         // in glyphs.app '_' prefix means "no export"
         if name.starts_with('_') {
             return None;
@@ -150,30 +384,30 @@ impl GlyphData {
             .split_glyph_suffix(name)
             .map(|(base, _)| base)
             .unwrap_or(name);
-        if let Some(info) = self.get_by_name(base_name) {
-            return Some((info.category, info.subcategory));
+        if let Some(info) = self.query_no_synthesis(base_name, None) {
+            return Some(info);
         }
 
         if let Some(base_names) = self.split_ligature_glyph_name(base_name) {
             let base_names_attributes: Vec<_> = base_names
                 .iter()
-                .map(|name| self.get_by_name(name))
+                .filter_map(|name| self.query_no_synthesis(name, None))
                 .collect();
-            if let Some(first_attr) = base_names_attributes.first().and_then(Option::as_ref) {
+            if let Some(first_attr) = base_names_attributes.first() {
                 // if first is mark, we're a mark
-                if first_attr.category == Category::Mark {
-                    return Some((Category::Mark, first_attr.subcategory));
-                } else if first_attr.category == Category::Letter {
+                if first_attr.0 == Category::Mark {
+                    return Some((Category::Mark, first_attr.1, None));
+                } else if first_attr.0 == Category::Letter {
                     // if first is letter and rest are marks/separators, we use info from first
                     if base_names_attributes
                         .iter()
                         .skip(1)
-                        .filter_map(|attr| attr.map(|attr| attr.category))
+                        .map(|(cat, ..)| cat)
                         .all(|cat| matches!(cat, Category::Mark | Category::Separator))
                     {
-                        return Some((first_attr.category, first_attr.subcategory));
+                        return Some((first_attr.0, first_attr.1, None));
                     } else {
-                        return Some((Category::Letter, Subcategory::Ligature));
+                        return Some((Category::Letter, Some(Subcategory::Ligature), None));
                     }
                 }
             }
@@ -185,7 +419,9 @@ impl GlyphData {
 
     // this doesn't need a &self param, but we want it locally close to the
     // code that calls it, so we'll make it a type method :shrug:
-    fn construct_category_via_agl(base_name: &str) -> Option<(Category, Subcategory)> {
+    fn construct_category_via_agl(
+        base_name: &str,
+    ) -> Option<(Category, Option<Subcategory>, Option<u32>)> {
         if let Some(first_char) = fontdrasil::agl::glyph_name_to_unicode(base_name)
             .chars()
             .next()
@@ -195,15 +431,15 @@ impl GlyphData {
             // Exception: Something like "one_two" should be a (_, Ligature),
             // "acutecomb_brevecomb" should however stay (Mark, Nonspacing).
             if base_name.contains('_') && category != Category::Mark {
-                return Some((category, Subcategory::Ligature));
+                return Some((category, Some(Subcategory::Ligature), None));
             } else {
-                return Some((category, subcategory));
+                return Some((category, subcategory, None));
             }
         }
         None
     }
 
-    fn split_glyph_suffix<'a>(&self, name: &'a str) -> Option<(&'a str, &'a str)> {
+    fn split_glyph_suffix<'n>(&self, name: &'n str) -> Option<(&'n str, &'n str)> {
         let multi_suffix = name.bytes().filter(|b| *b == b'.').count() > 1;
         if multi_suffix {
             // with multiple suffixes, try adding them one at a time and seeing if
@@ -217,7 +453,7 @@ impl GlyphData {
                 .skip(1)
             {
                 let (base, suffix) = name.split_at(idx);
-                if self.get_by_name(base).is_some() {
+                if self.contains_name(base) {
                     return Some((base, suffix));
                 }
             }
@@ -260,7 +496,7 @@ impl GlyphData {
 
             let new_part = smol_str::format_smolstr!("{part}-{script}");
             // if non-suffixed exists but suffixed doesn't, keep non-suffixed
-            if self.get_by_name(part.as_ref()).is_some() && self.get_by_name(&new_part).is_none() {
+            if self.contains_name(part.as_ref()) && !self.contains_name(&new_part) {
                 continue;
             }
             *part = new_part;
@@ -270,212 +506,201 @@ impl GlyphData {
 }
 
 // https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/Lib/glyphsLib/glyphdata.py#L261
-fn category_from_icu(c: char) -> (Category, Subcategory) {
+fn category_from_icu(c: char) -> (Category, Option<Subcategory>) {
     match icu_properties::maps::general_category().get(c) {
-        GeneralCategory::Unassigned | GeneralCategory::OtherSymbol => {
-            (Category::Symbol, Subcategory::None)
-        }
+        GeneralCategory::Unassigned | GeneralCategory::OtherSymbol => (Category::Symbol, None),
         GeneralCategory::UppercaseLetter
         | GeneralCategory::LowercaseLetter
         | GeneralCategory::TitlecaseLetter
-        | GeneralCategory::OtherLetter => (Category::Letter, Subcategory::None),
-        GeneralCategory::ModifierLetter => (Category::Letter, Subcategory::Modifier),
-        GeneralCategory::NonspacingMark => (Category::Mark, Subcategory::Nonspacing),
-        GeneralCategory::SpacingMark => (Category::Mark, Subcategory::SpacingCombining),
-        GeneralCategory::EnclosingMark => (Category::Mark, Subcategory::Enclosing),
+        | GeneralCategory::OtherLetter => (Category::Letter, None),
+        GeneralCategory::ModifierLetter => (Category::Letter, Some(Subcategory::Modifier)),
+        GeneralCategory::NonspacingMark => (Category::Mark, Some(Subcategory::Nonspacing)),
+        GeneralCategory::SpacingMark => (Category::Mark, Some(Subcategory::SpacingCombining)),
+        GeneralCategory::EnclosingMark => (Category::Mark, Some(Subcategory::Enclosing)),
         GeneralCategory::DecimalNumber | GeneralCategory::OtherNumber => {
-            (Category::Number, Subcategory::DecimalDigit)
+            (Category::Number, Some(Subcategory::DecimalDigit))
         }
-        GeneralCategory::LetterNumber => (Category::Number, Subcategory::None),
-        GeneralCategory::SpaceSeparator => (Category::Separator, Subcategory::Space),
+        GeneralCategory::LetterNumber => (Category::Number, None),
+        GeneralCategory::SpaceSeparator => (Category::Separator, Some(Subcategory::Space)),
         GeneralCategory::LineSeparator
         | GeneralCategory::ParagraphSeparator
-        | GeneralCategory::Control => (Category::Separator, Subcategory::None),
-        GeneralCategory::Format => (Category::Separator, Subcategory::Format),
-        GeneralCategory::PrivateUse => (Category::Letter, Subcategory::Compatibility),
-        GeneralCategory::DashPunctuation => (Category::Punctuation, Subcategory::Dash),
+        | GeneralCategory::Control => (Category::Separator, None),
+        GeneralCategory::Format => (Category::Separator, Some(Subcategory::Format)),
+        GeneralCategory::PrivateUse => (Category::Letter, Some(Subcategory::Compatibility)),
+        GeneralCategory::DashPunctuation => (Category::Punctuation, Some(Subcategory::Dash)),
         GeneralCategory::OpenPunctuation | GeneralCategory::ClosePunctuation => {
-            (Category::Punctuation, Subcategory::Parenthesis)
+            (Category::Punctuation, Some(Subcategory::Parenthesis))
         }
         GeneralCategory::ConnectorPunctuation | GeneralCategory::OtherPunctuation => {
-            (Category::Punctuation, Subcategory::None)
+            (Category::Punctuation, None)
         }
         GeneralCategory::InitialPunctuation | GeneralCategory::FinalPunctuation => {
-            (Category::Punctuation, Subcategory::Quote)
+            (Category::Punctuation, Some(Subcategory::Quote))
         }
-        GeneralCategory::MathSymbol => (Category::Symbol, Subcategory::Math),
-        GeneralCategory::CurrencySymbol => (Category::Symbol, Subcategory::Currency),
-        GeneralCategory::ModifierSymbol => (Category::Mark, Subcategory::Spacing),
+        GeneralCategory::MathSymbol => (Category::Symbol, Some(Subcategory::Math)),
+        GeneralCategory::CurrencySymbol => (Category::Symbol, Some(Subcategory::Currency)),
+        GeneralCategory::ModifierSymbol => (Category::Mark, Some(Subcategory::Spacing)),
         GeneralCategory::Surrogate => unreachable!("char cannot represent surrogate code points"),
     }
 }
 
-fn load_bundled_data() -> Vec<GlyphInfo> {
-    bincode::deserialize(BUNDLED_DATA).unwrap()
-}
-
-fn merge_data(mut base: Vec<GlyphInfo>, overrides: Vec<GlyphInfo>) -> Vec<GlyphInfo> {
-    let skip_names = overrides
-        .iter()
-        .map(|info| &info.name)
-        .collect::<HashSet<_>>();
-    base.retain(|info| !skip_names.contains(&info.name));
-    base.extend(overrides);
-    base
-}
-
 #[cfg(test)]
 mod tests {
-    use std::sync::OnceLock;
 
     use super::*;
 
     #[test]
     fn test_bundled_data() {
-        let data = load_bundled_data();
-        assert_eq!(data.len(), 73329);
+        let data = GlyphData::glyphs_lib_data().data;
+        assert!(data.len() > 70000, "{}", data.len());
     }
 
     #[test]
     fn simple_overrides() {
-        let overrides = vec![GlyphInfo {
-            name: "A".into(),
-            category: Category::Mark,
-            subcategory: Subcategory::SpacingCombining,
-            unicode: Some(b'A' as u32),
-            production: None,
-            alt_names: Default::default(),
-        }];
-        let bundled = load_bundled_data();
-        let merged = merge_data(bundled, overrides);
-        let data = GlyphData::new_impl(merged);
-
-        assert_eq!(data.get_by_name("A").unwrap().category, Category::Mark);
+        let overrides = HashMap::from([(
+            "A".into(),
+            GlyphOverride {
+                category: Category::Mark,
+                subcategory: Some(Subcategory::SpacingCombining),
+                codepoint: Some(b'A' as u32),
+            },
+        )]);
+        let data = GlyphData::with_overrides(overrides).unwrap();
+
+        assert_eq!(data.query("A", None).unwrap().0, Category::Mark);
     }
 
     #[test]
     fn overrides_from_file() {
-        let data = GlyphData::new(Some(Path::new("./data/GlyphData_override_test.xml"))).unwrap();
-        assert_eq!(data.get_by_name("zero").unwrap().category, Category::Other);
-        assert_eq!(data.get_by_name("C").unwrap().category, Category::Number);
-        assert_eq!(
-            data.get_by_name("Yogh").unwrap().production,
-            Some("Yolo".into())
-        );
+        let data =
+            GlyphData::with_override_file(Path::new("./data/GlyphData_override_test.xml")).unwrap();
+        assert_eq!(data.query("zero", None).unwrap().0, Category::Other);
+        assert_eq!(data.query("C", None).unwrap().0, Category::Number);
     }
 
-    fn get_category(name: &str, codepoints: &[u32]) -> Option<(Category, Subcategory)> {
-        static GLYPH_DATA: OnceLock<GlyphData> = OnceLock::new();
-        let data = GLYPH_DATA.get_or_init(|| GlyphData::new(None).unwrap());
+    fn get_category(name: &str, codepoints: &[u32]) -> Option<(Category, Option<Subcategory>)> {
         let codepoints = codepoints.iter().copied().collect();
-        data.get_glyph(name, Some(&codepoints))
-            .map(|info| (info.category, info.subcategory))
+        GlyphData::glyphs_lib_data()
+            .query(name, Some(&codepoints))
+            .map(|(cat, sub, _)| (cat, sub))
     }
 
     // from python glyphsLib: https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d5/tests/glyphdata_test.py#L106
     #[test]
     fn py_test_category() {
         for (name, expected) in [
-            (".notdef", Some((Category::Separator, Subcategory::None))),
+            (".notdef", Some((Category::Separator, None))),
             // this test case requires AGL lookup:
-            ("uni000D", Some((Category::Separator, Subcategory::None))),
+            ("uni000D", Some((Category::Separator, None))),
             (
                 "boxHeavyUp",
-                Some((Category::Symbol, Subcategory::Geometry)),
+                Some((Category::Symbol, Some(Subcategory::Geometry))),
+            ),
+            ("eacute", Some((Category::Letter, None))),
+            ("Abreveacute", Some((Category::Letter, None))),
+            ("C-fraktur", Some((Category::Letter, None))),
+            ("fi", Some((Category::Letter, Some(Subcategory::Ligature)))),
+            (
+                "fi.alt",
+                Some((Category::Letter, Some(Subcategory::Ligature))),
+            ),
+            (
+                "hib-ko",
+                Some((Category::Letter, Some(Subcategory::Syllable))),
             ),
-            ("eacute", Some((Category::Letter, Subcategory::None))),
-            ("Abreveacute", Some((Category::Letter, Subcategory::None))),
-            ("C-fraktur", Some((Category::Letter, Subcategory::None))),
-            ("fi", Some((Category::Letter, Subcategory::Ligature))),
-            ("fi.alt", Some((Category::Letter, Subcategory::Ligature))),
-            ("hib-ko", Some((Category::Letter, Subcategory::Syllable))),
             (
                 "one.foo",
-                Some((Category::Number, Subcategory::DecimalDigit)),
+                Some((Category::Number, Some(Subcategory::DecimalDigit))),
             ),
             (
                 "one_two.foo",
-                Some((Category::Number, Subcategory::Ligature)),
+                Some((Category::Number, Some(Subcategory::Ligature))),
+            ),
+            (
+                "o_f_f_i",
+                Some((Category::Letter, Some(Subcategory::Ligature))),
             ),
-            ("o_f_f_i", Some((Category::Letter, Subcategory::Ligature))),
             (
                 "o_f_f_i.foo",
-                Some((Category::Letter, Subcategory::Ligature)),
+                Some((Category::Letter, Some(Subcategory::Ligature))),
             ),
             (
                 "ain_alefMaksura-ar.fina",
-                Some((Category::Letter, Subcategory::Ligature)),
+                Some((Category::Letter, Some(Subcategory::Ligature))),
+            ),
+            (
+                "brevecomb",
+                Some((Category::Mark, Some(Subcategory::Nonspacing))),
             ),
-            ("brevecomb", Some((Category::Mark, Subcategory::Nonspacing))),
             (
                 "brevecomb.case",
-                Some((Category::Mark, Subcategory::Nonspacing)),
+                Some((Category::Mark, Some(Subcategory::Nonspacing))),
             ),
             (
                 "brevecomb_acutecomb",
-                Some((Category::Mark, Subcategory::Nonspacing)),
+                Some((Category::Mark, Some(Subcategory::Nonspacing))),
             ),
             (
                 "brevecomb_acutecomb.case",
-                Some((Category::Mark, Subcategory::Nonspacing)),
+                Some((Category::Mark, Some(Subcategory::Nonspacing))),
             ),
             (
                 "caroncomb_dotaccentcomb",
-                Some((Category::Mark, Subcategory::Nonspacing)),
+                Some((Category::Mark, Some(Subcategory::Nonspacing))),
             ),
             (
                 "dieresiscomb_caroncomb",
-                Some((Category::Mark, Subcategory::Nonspacing)),
+                Some((Category::Mark, Some(Subcategory::Nonspacing))),
             ),
             (
                 "dieresiscomb_macroncomb",
-                Some((Category::Mark, Subcategory::Nonspacing)),
+                Some((Category::Mark, Some(Subcategory::Nonspacing))),
             ),
             (
                 "dotaccentcomb_macroncomb",
-                Some((Category::Mark, Subcategory::Nonspacing)),
+                Some((Category::Mark, Some(Subcategory::Nonspacing))),
             ),
             (
                 "macroncomb_dieresiscomb",
-                Some((Category::Mark, Subcategory::Nonspacing)),
+                Some((Category::Mark, Some(Subcategory::Nonspacing))),
             ),
             (
                 "dotaccentcomb_o",
-                Some((Category::Mark, Subcategory::Nonspacing)),
+                Some((Category::Mark, Some(Subcategory::Nonspacing))),
             ),
             (
                 "macronlowmod_O",
-                Some((Category::Mark, Subcategory::Modifier)),
+                Some((Category::Mark, Some(Subcategory::Modifier))),
             ),
-            ("O_o", Some((Category::Letter, Subcategory::Ligature))),
+            ("O_o", Some((Category::Letter, Some(Subcategory::Ligature)))),
             (
                 "O_dotaccentcomb_o",
-                Some((Category::Letter, Subcategory::Ligature)),
+                Some((Category::Letter, Some(Subcategory::Ligature))),
             ),
+            ("O_dotaccentcomb", Some((Category::Letter, None))),
             (
-                "O_dotaccentcomb",
-                Some((Category::Letter, Subcategory::None)),
+                "O_period",
+                Some((Category::Letter, Some(Subcategory::Ligature))),
             ),
-            ("O_period", Some((Category::Letter, Subcategory::Ligature))),
-            ("O_nbspace", Some((Category::Letter, Subcategory::None))),
+            ("O_nbspace", Some((Category::Letter, None))),
             ("_a", None),
             ("_aaa", None),
             (
                 "dal_alef-ar",
-                Some((Category::Letter, Subcategory::Ligature)),
+                Some((Category::Letter, Some(Subcategory::Ligature))),
             ),
             (
                 "dal_lam-ar.dlig",
-                Some((Category::Letter, Subcategory::Ligature)),
+                Some((Category::Letter, Some(Subcategory::Ligature))),
             ),
-            ("po-khmer", Some((Category::Letter, Subcategory::None))),
+            ("po-khmer", Some((Category::Letter, None))),
             (
                 "po-khmer.below",
-                Some((Category::Mark, Subcategory::Nonspacing)),
+                Some((Category::Mark, Some(Subcategory::Nonspacing))),
             ),
             (
                 "po-khmer.below.ro",
-                Some((Category::Mark, Subcategory::Nonspacing)),
+                Some((Category::Mark, Some(Subcategory::Nonspacing))),
             ),
         ] {
             let result = get_category(name, &[]);
@@ -486,9 +711,13 @@ mod tests {
     // https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/tests/glyphdata_test.py#L145C5-L153C76
     #[test]
     fn py_category_by_unicode() {
-        //# "SignU.bn" is a non-standard name not defined in GlyphData.xml
+        // "SignU.bn" is a non-standard name not defined in GlyphData.xml
+        // 0x09C1 should match
         let result = get_category("SignU.bn", &[0x09C1]);
-        assert_eq!(result, Some((Category::Mark, Subcategory::Nonspacing)))
+        assert_eq!(
+            result,
+            Some((Category::Mark, Some(Subcategory::Nonspacing)))
+        )
     }
 
     // https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/tests/glyphdata_test.py#L155C5-L162C1
@@ -496,8 +725,8 @@ mod tests {
     #[test]
     fn py_bug_232() {
         let u = get_category("uni07F0", &[]);
-        assert_eq!(u, Some((Category::Mark, Subcategory::Nonspacing)));
+        assert_eq!(u, Some((Category::Mark, Some(Subcategory::Nonspacing))));
         let g = get_category("longlowtonecomb-nko", &[]);
-        assert_eq!(g, Some((Category::Mark, Subcategory::Nonspacing)));
+        assert_eq!(g, Some((Category::Mark, Some(Subcategory::Nonspacing))));
     }
 }
diff --git a/glyphs-reader/src/glyphdata/glyphdata_impl.rs b/glyphs-reader/src/glyphdata/glyphdata_impl.rs
deleted file mode 100644
index a12ca7c8f..000000000
--- a/glyphs-reader/src/glyphdata/glyphdata_impl.rs
+++ /dev/null
@@ -1,332 +0,0 @@
-// NOTE: to avoid a bunch of duplication, this file is also `include!`ed from
-// build.rs.
-
-use std::{fmt::Display, num::ParseIntError, path::PathBuf, str::FromStr};
-
-use quick_xml::{
-    events::{BytesStart, Event},
-    Reader,
-};
-use serde::{Deserialize, Serialize};
-use smol_str::SmolStr;
-
-/// Information about a glyph
-///
-/// In general this is derived from bundled data files, but these fields can
-/// also be overridden by the font author
-#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
-pub struct GlyphInfo {
-    pub name: SmolStr,
-    pub category: Category,
-    pub subcategory: Subcategory,
-    pub unicode: Option<u32>,
-    pub production: Option<SmolStr>,
-    pub alt_names: Vec<SmolStr>,
-}
-
-/// The primary category for a given glyph
-///
-/// These categories are not the same as the unicode character categories.
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
-#[repr(u8)]
-pub enum Category {
-    Mark,
-    Space,
-    Separator,
-    Letter,
-    Number,
-    Symbol,
-    Punctuation,
-    Other,
-}
-
-/// The subcategory of a given glyph
-#[derive(
-    Clone, Copy, Default, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize,
-)]
-#[repr(u8)]
-pub enum Subcategory {
-    Spacing,
-    Radical,
-    Math,
-    Superscript,
-    Geometry,
-    Dash,
-    DecimalDigit,
-    Currency,
-    Fraction,
-    Halfform,
-    Small,
-    Number,
-    Quote,
-    Space,
-    Letter,
-    Jamo,
-    Format,
-    Parenthesis,
-    Matra,
-    Arrow,
-    Nonspacing,
-    Compatibility,
-    Syllable,
-    Ligature,
-    Modifier,
-    SpacingCombining,
-    Emoji,
-    Enclosing,
-    #[default]
-    None,
-}
-
-/// Parse glyph info entries out of a GlyphData xml file.
-pub fn parse_entries(xml: &[u8]) -> Result<Vec<GlyphInfo>, GlyphDataError> {
-    fn check_and_advance_past_preamble(reader: &mut Reader<&[u8]>) -> Result<(), GlyphDataError> {
-        loop {
-            let event = reader.read_event()?;
-            match event {
-                Event::Comment(_) => (),
-                Event::Decl(_) => (),
-                Event::DocType(_) => (),
-                Event::Start(start) if start.name().as_ref() == b"glyphData" => return Ok(()),
-                _other => {
-                    return Err(GlyphDataError::WrongFirstElement);
-                }
-            }
-        }
-    }
-
-    let mut reader = Reader::from_reader(xml);
-    reader.config_mut().trim_text(true);
-
-    check_and_advance_past_preamble(&mut reader)?;
-    iter_rows(&mut reader)
-        .map(|row| row.map_err(Into::into).and_then(parse_glyph_xml))
-        .collect::<Result<_, _>>()
-}
-
-fn iter_rows<'a, 'b: 'a>(
-    reader: &'b mut Reader<&'a [u8]>,
-) -> impl Iterator<Item = Result<BytesStart<'a>, quick_xml::Error>> + 'a {
-    std::iter::from_fn(|| match reader.read_event() {
-        Err(e) => Some(Err(e)),
-        Ok(Event::Empty(start)) => Some(Ok(start)),
-        _ => None,
-    })
-}
-
-fn parse_glyph_xml(item: BytesStart) -> Result<GlyphInfo, GlyphDataError> {
-    let mut name = None;
-    let mut category = None;
-    let mut subcategory = None;
-    let mut unicode = None;
-    let mut production = None;
-    let mut alt_names = None;
-
-    for attr in item.attributes() {
-        let attr = attr?;
-        let value = attr.unescape_value()?;
-        match attr.key.as_ref() {
-            b"name" => name = Some(value),
-            b"category" => category = Some(value),
-            b"subCategory" => subcategory = Some(value),
-            b"unicode" => unicode = Some(value),
-            b"production" => production = Some(value),
-            b"altNames" => alt_names = Some(value),
-            b"unicodeLegacy" | b"case" | b"direction" | b"script" | b"description" => (),
-            other => {
-                return Err(GlyphDataError::UnknownAttribute(
-                    String::from_utf8_lossy(other).into_owned(),
-                ))
-            }
-        }
-    }
-
-    // now we've found some values, let's finalize them
-
-    let name = name
-        .map(SmolStr::new)
-        .ok_or_else(|| GlyphDataError::missing_attr("name", item.attributes_raw()))?;
-    let category = category
-        .ok_or_else(|| GlyphDataError::missing_attr("category", item.attributes_raw()))
-        .and_then(|cat| {
-            Category::from_str(cat.as_ref()).map_err(GlyphDataError::InvalidCategory)
-        })?;
-    let subcategory = subcategory
-        .map(|cat| Subcategory::from_str(cat.as_ref()).map_err(GlyphDataError::InvalidSubcategory))
-        .transpose()?
-        .unwrap_or(Subcategory::None);
-    let production = production.map(SmolStr::new);
-    let unicode = unicode
-        .map(|s| {
-            u32::from_str_radix(&s, 16).map_err(|inner| GlyphDataError::InvalidUnicode {
-                raw: s.into_owned(),
-                inner,
-            })
-        })
-        .transpose()?;
-    let alt_names = alt_names
-        .map(|names| {
-            names
-                .as_ref()
-                .split(',')
-                .map(|name| SmolStr::from(name.trim()))
-                .collect()
-        })
-        .unwrap_or_default();
-
-    Ok(GlyphInfo {
-        name,
-        category,
-        subcategory,
-        unicode,
-        production,
-        alt_names,
-    })
-}
-
-impl FromStr for Category {
-    type Err = SmolStr;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        match s {
-            "Mark" => Ok(Self::Mark),
-            "Space" => Ok(Self::Space),
-            "Separator" => Ok(Self::Separator),
-            "Letter" => Ok(Self::Letter),
-            "Number" => Ok(Self::Number),
-            "Symbol" => Ok(Self::Symbol),
-            "Punctuation" => Ok(Self::Punctuation),
-            "Other" => Ok(Self::Other),
-            _ => Err(s.into()),
-        }
-    }
-}
-
-impl FromStr for Subcategory {
-    type Err = SmolStr;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        match s {
-            "Spacing" => Ok(Self::Spacing),
-            "Radical" => Ok(Self::Radical),
-            "Math" => Ok(Self::Math),
-            "Superscript" => Ok(Self::Superscript),
-            "Geometry" => Ok(Self::Geometry),
-            "Dash" => Ok(Self::Dash),
-            "Decimal Digit" => Ok(Self::DecimalDigit),
-            "Currency" => Ok(Self::Currency),
-            "Fraction" => Ok(Self::Fraction),
-            "Halfform" => Ok(Self::Halfform),
-            "Small" => Ok(Self::Small),
-            "Number" => Ok(Self::Number),
-            "Quote" => Ok(Self::Quote),
-            "Space" => Ok(Self::Space),
-            "Letter" => Ok(Self::Letter),
-            "Jamo" => Ok(Self::Jamo),
-            "Format" => Ok(Self::Format),
-            "Parenthesis" => Ok(Self::Parenthesis),
-            "Matra" => Ok(Self::Matra),
-            "Arrow" => Ok(Self::Arrow),
-            "Nonspacing" => Ok(Self::Nonspacing),
-            "Compatibility" => Ok(Self::Compatibility),
-            "Syllable" => Ok(Self::Syllable),
-            "Ligature" => Ok(Self::Ligature),
-            "Modifier" => Ok(Self::Modifier),
-            "Spacing Combining" => Ok(Self::SpacingCombining),
-            "Emoji" => Ok(Self::Emoji),
-            "Enclosing" => Ok(Self::Enclosing),
-            "None" => Ok(Self::None),
-            _ => Err(s.into()),
-        }
-    }
-}
-
-impl Display for Category {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Self::Mark => write!(f, "Mark"),
-            Self::Space => write!(f, "Space"),
-            Self::Separator => write!(f, "Separator"),
-            Self::Letter => write!(f, "Letter"),
-            Self::Number => write!(f, "Number"),
-            Self::Symbol => write!(f, "Symbol"),
-            Self::Punctuation => write!(f, "Punctuation"),
-            Self::Other => write!(f, "Other"),
-        }
-    }
-}
-
-impl Display for Subcategory {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Self::Spacing => write!(f, "Spacing"),
-            Self::Radical => write!(f, "Radical"),
-            Self::Math => write!(f, "Math"),
-            Self::Superscript => write!(f, "Superscript"),
-            Self::Geometry => write!(f, "Geometry"),
-            Self::Dash => write!(f, "Dash"),
-            Self::DecimalDigit => write!(f, "Decimal Digit"),
-            Self::Currency => write!(f, "Currency"),
-            Self::Fraction => write!(f, "Fraction"),
-            Self::Halfform => write!(f, "Halfform"),
-            Self::Small => write!(f, "Small"),
-            Self::Number => write!(f, "Number"),
-            Self::Quote => write!(f, "Quote"),
-            Self::Space => write!(f, "Space"),
-            Self::Letter => write!(f, "Letter"),
-            Self::Jamo => write!(f, "Jamo"),
-            Self::Format => write!(f, "Format"),
-            Self::Parenthesis => write!(f, "Parenthesis"),
-            Self::Matra => write!(f, "Matra"),
-            Self::Arrow => write!(f, "Arrow"),
-            Self::Nonspacing => write!(f, "Nonspacing"),
-            Self::Compatibility => write!(f, "Compatibility"),
-            Self::Syllable => write!(f, "Syllable"),
-            Self::Ligature => write!(f, "Ligature"),
-            Self::Modifier => write!(f, "Modifier"),
-            Self::SpacingCombining => write!(f, "Spacing Combining"),
-            Self::Emoji => write!(f, "Emoji"),
-            Self::Enclosing => write!(f, "Enclosing"),
-            Self::None => write!(f, "None"),
-        }
-    }
-}
-
-#[derive(Clone, Debug, thiserror::Error)]
-pub enum GlyphDataError {
-    #[error("Couldn't read user file at '{path}': '{reason}'")]
-    UserFile {
-        path: PathBuf,
-        reason: std::io::ErrorKind,
-    },
-    #[error("Error parsing XML: '{0}'")]
-    ReaderError(#[from] quick_xml::Error),
-    #[error("Error parsing XML attribute: '{0}'")]
-    XmlAttributeError(#[from] quick_xml::events::attributes::AttrError),
-    #[error("Unknown category '{0}'")]
-    InvalidCategory(SmolStr),
-    #[error("Unknown subcategory '{0}'")]
-    InvalidSubcategory(SmolStr),
-    #[error("the XML input did not start with a <glyphdata> tag")]
-    WrongFirstElement,
-    #[error("Missing required attribute '{missing}' in '{attributes}'")]
-    MissingRequiredAttribute {
-        attributes: String,
-        missing: &'static str,
-    },
-    #[error("Invalid unicode value '{raw}': '{inner}'")]
-    InvalidUnicode { raw: String, inner: ParseIntError },
-    #[error("Unexpected attribute '{0}'")]
-    UnknownAttribute(String),
-}
-
-impl GlyphDataError {
-    // a little helper here makes our parsing code cleaner
-    fn missing_attr(name: &'static str, raw_attrs: &[u8]) -> Self {
-        let attributes = String::from_utf8_lossy(raw_attrs).into_owned();
-        Self::MissingRequiredAttribute {
-            attributes,
-            missing: name,
-        }
-    }
-}
diff --git a/glyphs-reader/src/lib.rs b/glyphs-reader/src/lib.rs
index 3573848d7..c57f620b1 100644
--- a/glyphs-reader/src/lib.rs
+++ b/glyphs-reader/src/lib.rs
@@ -3,6 +3,7 @@
 pub mod error;
 mod font;
 pub mod glyphdata;
+mod glyphslib_data;
 mod plist;
 mod propagate_anchors;
 
@@ -10,4 +11,5 @@ pub use font::{
     Axis, Component, FeatureSnippet, Font, FontMaster, Glyph, InstanceType, Layer, Node, NodeType,
     Path, Shape,
 };
+pub use glyphslib_data::{Category, Subcategory};
 pub use plist::Plist;
diff --git a/glyphs-reader/src/propagate_anchors.rs b/glyphs-reader/src/propagate_anchors.rs
index 918d14555..bbed4db98 100644
--- a/glyphs-reader/src/propagate_anchors.rs
+++ b/glyphs-reader/src/propagate_anchors.rs
@@ -11,11 +11,7 @@ use indexmap::IndexMap;
 use kurbo::{Affine, Vec2};
 use smol_str::{format_smolstr, SmolStr};
 
-use crate::{
-    font::Anchor,
-    glyphdata::{Category, Subcategory},
-    Component, Font, Glyph, Layer, Shape,
-};
+use crate::{font::Anchor, Category, Component, Font, Glyph, Layer, Shape, Subcategory};
 
 impl Font {
     /// Copy anchors from component glyphs into their including composites
@@ -110,7 +106,7 @@ fn anchors_traversing_components<'a>(
         return origin_adjusted_anchors(&layer.anchors).collect();
     }
 
-    let is_ligature = glyph.sub_category == Subcategory::Ligature;
+    let is_ligature = glyph.sub_category == Some(Subcategory::Ligature);
     let mut has_underscore = layer
         .anchors
         .iter()
@@ -446,6 +442,8 @@ fn depth_sorted_composite_glyphs(glyphs: &BTreeMap<SmolStr, Glyph>) -> Vec<SmolS
 #[cfg(test)]
 mod tests {
 
+    use std::collections::BTreeSet;
+
     use kurbo::Point;
 
     use crate::{glyphdata::GlyphData, Layer, Shape};
@@ -477,16 +475,22 @@ mod tests {
 
     impl GlyphBuilder {
         fn new(name: &str) -> Self {
-            let info = GlyphData::bundled().get_glyph(name, None);
             let mut this = GlyphBuilder(Glyph {
                 name: name.into(),
                 export: true,
-                category: info.as_ref().map(|i| i.category),
-                sub_category: info.as_ref().map(|i| i.subcategory).unwrap_or_default(),
-
-                unicode: info.and_then(|i| i.unicode).into_iter().collect(),
                 ..Default::default()
             });
+            if let Some((category, sub_category, unicode)) =
+                GlyphData::glyphs_lib_data().query(name, None)
+            {
+                this.set_category(category);
+                if let Some(sub_category) = sub_category {
+                    this.set_subcategory(sub_category);
+                }
+                if let Some(unicode) = unicode {
+                    this.set_unicode(unicode);
+                }
+            }
             this.add_layer();
             this
         }
@@ -508,14 +512,18 @@ mod tests {
             self.0.layers.last_mut().unwrap()
         }
 
-        #[allow(dead_code)]
+        fn set_unicode(&mut self, unicode: u32) -> &mut Self {
+            self.0.unicode = BTreeSet::from([unicode]);
+            self
+        }
+
         fn set_category(&mut self, category: Category) -> &mut Self {
             self.0.category = Some(category);
             self
         }
 
         fn set_subcategory(&mut self, sub_category: Subcategory) -> &mut Self {
-            self.0.sub_category = sub_category;
+            self.0.sub_category = Some(sub_category);
             self
         }
 
diff --git a/glyphs2fontir/src/source.rs b/glyphs2fontir/src/source.rs
index 62031118c..4e0d718c2 100644
--- a/glyphs2fontir/src/source.rs
+++ b/glyphs2fontir/src/source.rs
@@ -25,10 +25,7 @@ use fontir::{
     source::{Input, Source},
     stateset::StateSet,
 };
-use glyphs_reader::{
-    glyphdata::{Category, Subcategory},
-    Font, InstanceType,
-};
+use glyphs_reader::{Category, Font, InstanceType, Subcategory};
 use ordered_float::OrderedFloat;
 use smol_str::SmolStr;
 use write_fonts::{
@@ -548,10 +545,11 @@ fn category_for_glyph(glyph: &glyphs_reader::Glyph) -> Option<GlyphClassDef> {
         // 'attaching anchor'; see https://github.com/googlefonts/glyphsLib/issues/1024
         .any(|anchor| !anchor.name.starts_with('_'));
     match (glyph.category, glyph.sub_category) {
-        (_, Subcategory::Ligature) if has_attaching_anchor => Some(GlyphClassDef::Ligature),
-        (Some(Category::Mark), Subcategory::Nonspacing | Subcategory::SpacingCombining) => {
-            Some(GlyphClassDef::Mark)
-        }
+        (_, Some(Subcategory::Ligature)) if has_attaching_anchor => Some(GlyphClassDef::Ligature),
+        (
+            Some(Category::Mark),
+            Some(Subcategory::Nonspacing) | Some(Subcategory::SpacingCombining),
+        ) => Some(GlyphClassDef::Mark),
         _ if has_attaching_anchor => Some(GlyphClassDef::Base),
         _ => None,
     }
@@ -1132,7 +1130,7 @@ mod tests {
         source::Source,
         stateset::StateSet,
     };
-    use glyphs_reader::{glyphdata::Category, Font};
+    use glyphs_reader::{Category, Font};
     use indexmap::IndexSet;
     use ir::{test_helpers::Round2, Panose};
     use write_fonts::types::{NameId, Tag};