[read-fonts] tt bytecode decoder

Adds an opcode definition, instruction representation and decoder for TrueType bytecode. Also renames Args -> InlineOperands and moves the type from skrifa to read-fonts.
googlefonts · Feb 7, 2024 · 9839450 · 9839450
1 parent 74cb7c1
commit 9839450
Show file tree

Hide file tree

Showing 11 changed files with 1,252 additions and 123 deletions.
diff --git a/read-fonts/src/tables.rs b/read-fonts/src/tables.rs
@@ -32,6 +32,7 @@ pub mod post;
 pub mod postscript;
 pub mod sbix;
 pub mod stat;
+pub mod truetype;
 pub mod variations;
 pub mod vhea;
 pub mod vmtx;

diff --git a/read-fonts/src/tables/truetype.rs b/read-fonts/src/tables/truetype.rs
@@ -0,0 +1,3 @@
+//! TrueType (glyf) common code.
+
+pub mod bytecode;
diff --git a/read-fonts/src/tables/truetype/bytecode.rs b/read-fonts/src/tables/truetype/bytecode.rs
@@ -0,0 +1,14 @@
+//! TrueType hinting bytecode.
+
+mod decode;
+mod instruction;
+mod opcode;
+
+pub use decode::{decode_all, DecodeError, Decoder};
+pub use instruction::{InlineOperands, Instruction};
+pub use opcode::Opcode;
+
+// Exported publicly for use by skrifa when the scaler_test feature is
+// enabled.
+#[cfg(any(test, feature = "scaler_test"))]
+pub use instruction::MockInlineOperands;
diff --git a/read-fonts/src/tables/truetype/bytecode/decode.rs b/read-fonts/src/tables/truetype/bytecode/decode.rs
@@ -0,0 +1,237 @@
+//! TrueType bytecode decoder.
+
+use super::{InlineOperands, Instruction, Opcode};
+
+/// An error returned by [`Decoder::decode`] if the end of the bytecode
+/// stream is reached unexpectedly.
+#[derive(Copy, Clone, Debug)]
+pub struct DecodeError(());
+
+impl std::fmt::Display for DecodeError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str("unexpected end of bytecode")
+    }
+}
+
+/// Decodes instructions from TrueType bytecode.
+#[derive(Copy, Clone)]
+pub struct Decoder<'a> {
+    /// The bytecode for the program.
+    pub bytecode: &'a [u8],
+    /// The "program counter" or current offset into the bytecode.
+    pub pc: usize,
+}
+
+impl<'a> Decoder<'a> {
+    /// Creates a new decoder for the given bytecode and program counter.
+    pub fn new(bytecode: &'a [u8], pc: usize) -> Self {
+        Self { bytecode, pc }
+    }
+
+    /// Decodes the next instruction.
+    ///
+    /// Returns `None` at the end of the bytecode stream.
+    pub fn decode(&mut self) -> Option<Result<Instruction<'a>, DecodeError>> {
+        let opcode = Opcode::from_byte(*self.bytecode.get(self.pc)?);
+        Some(self.decode_inner(opcode))
+    }
+
+    fn decode_inner(&mut self, opcode: Opcode) -> Result<Instruction<'a>, DecodeError> {
+        let mut opcode_len = opcode.len();
+        let mut count_len = 0;
+        // If the opcode length is negative the next byte contains the number
+        // of inline operands and |opcode_len| is the size of each operand.
+        // <https://gitlab.freedesktop.org/freetype/freetype/-/blob/57617782464411201ce7bbc93b086c1b4d7d84a5/src/truetype/ttinterp.c#L7046>
+        if opcode_len < 0 {
+            let inline_count = *self.bytecode.get(self.pc + 1).ok_or(DecodeError(()))?;
+            opcode_len = -opcode_len * inline_count as i32 + 2;
+            count_len = 1;
+        }
+        let opcode_len = opcode_len as usize;
+        let pc = self.pc;
+        let next_pc = pc + opcode_len;
+        // Skip opcode and potential inline operand count byte.
+        let inline_start = pc + 1 + count_len;
+        let inline_size = next_pc - inline_start;
+        let mut inline_operands = InlineOperands::default();
+        if inline_size > 0 {
+            inline_operands.bytes = self
+                .bytecode
+                .get(inline_start..inline_start + inline_size)
+                .ok_or(DecodeError(()))?;
+            inline_operands.is_words = opcode.is_push_words();
+        }
+        self.pc += opcode_len;
+        Ok(Instruction {
+            opcode,
+            inline_operands,
+            pc,
+        })
+    }
+}
+
+/// Returns an iterator that yields all instructions in the given bytecode
+/// starting at the specified program counter.
+pub fn decode_all(
+    bytecode: &[u8],
+    pc: usize,
+) -> impl Iterator<Item = Result<Instruction<'_>, DecodeError>> + '_ + Clone {
+    let mut decoder = Decoder::new(bytecode, pc);
+    std::iter::from_fn(move || decoder.decode())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::Opcode;
+
+    #[test]
+    fn mixed_ops() {
+        let mut enc = Encoder::default();
+        // intermix push and non-push ops of various sizes to test boundary
+        // conditions
+        let cases: &[(Opcode, &[i16])] = &[
+            (Opcode::PUSHB100, &[1, 2, 3, 255, 5]),
+            (Opcode::PUSHW010, &[-1, 4508, -3]),
+            (Opcode::IUP0, &[]),
+            (Opcode::NPUSHB, &[55; 255]),
+            (Opcode::MDRP00110, &[]),
+            (Opcode::NPUSHW, &[i16::MIN; 32]),
+            (Opcode::LOOPCALL, &[]),
+            (Opcode::FLIPOFF, &[]),
+            (
+                Opcode::PUSHW011,
+                &[i16::MIN, i16::MIN / 2, i16::MAX, i16::MAX / 2],
+            ),
+            (Opcode::GETVARIATION, &[]),
+        ];
+        for (opcode, values) in cases {
+            if !values.is_empty() {
+                enc.encode_push(values);
+            } else {
+                enc.encode(*opcode);
+            }
+        }
+        let all_ins = super::decode_all(&enc.0, 0)
+            .map(|ins| ins.unwrap())
+            .collect::<Vec<_>>();
+        for (ins, (expected_opcode, expected_values)) in all_ins.iter().zip(cases) {
+            assert_eq!(ins.opcode, *expected_opcode);
+            let values = ins
+                .inline_operands
+                .values()
+                .map(|v| v as i16)
+                .collect::<Vec<_>>();
+            assert_eq!(&values, expected_values);
+        }
+    }
+
+    #[test]
+    fn non_push_ops() {
+        // test decoding of all single byte (non-push) opcodes
+        let non_push_ops: Vec<_> = (0..=255)
+            .filter(|b| !Opcode::from_byte(*b).is_push())
+            .collect();
+        let decoded: Vec<_> = super::decode_all(&non_push_ops, 0)
+            .map(|ins| ins.unwrap().opcode as u8)
+            .collect();
+        assert_eq!(non_push_ops, decoded);
+    }
+
+    #[test]
+    fn real_bytecode() {
+        // taken from NotoSerif-Regular, glyph Rturnedsmall, gid 1272
+        let bytecode = [
+            181, 5, 1, 9, 3, 1, 76, 75, 176, 45, 80, 88, 64, 35, 0, 3, 0, 9, 7, 3, 9, 105, 6, 4, 2,
+            1, 1, 2, 97, 5, 1, 2, 2, 109, 77, 11, 8, 2, 7, 7, 0, 95, 10, 1, 0, 0, 107, 0, 78, 27,
+            64, 41, 0, 7, 8, 0, 8, 7, 114, 0, 3, 0, 9, 8, 3, 9, 105, 6, 4, 2, 1, 1, 2, 97, 5, 1, 2,
+            2, 109, 77, 11, 1, 8, 8, 0, 95, 10, 1, 0, 0, 107, 0, 78, 89, 64, 31, 37, 36, 1, 0, 40,
+            38, 36, 44, 37, 44, 34, 32, 27, 25, 24, 23, 22, 20, 17, 16, 12, 10, 9, 8, 0, 35, 1, 35,
+            12, 13, 22, 43,
+        ];
+        // comments below contain the ttx assembly
+        let expected = [
+            // PUSHB[ ]	/* 6 values pushed */
+            // 5 1 9 3 1 76
+            "PUSHB[5] 5 1 9 3 1 76",
+            // MPPEM[ ]	/* MeasurePixelPerEm */
+            "MPPEM",
+            // PUSHB[ ]	/* 1 value pushed */
+            // 45
+            "PUSHB[0] 45",
+            // LT[ ]	/* LessThan */
+            "LT",
+            // IF[ ]	/* If */
+            "IF",
+            //   NPUSHB[ ]	/* 35 values pushed */
+            //   0 3 0 9 7 3 9 105 6 4 2 1 1 2 97 5 1 2 2 109 77 11 8 2 7
+            //   7 0 95 10 1 0 0 107 0 78
+            "NPUSHB 0 3 0 9 7 3 9 105 6 4 2 1 1 2 97 5 1 2 2 109 77 11 8 2 7 7 0 95 10 1 0 0 107 0 78",
+            // ELSE[ ]	/* Else */
+            "ELSE",
+            //   NPUSHB[ ]	/* 41 values pushed */
+            //   0 7 8 0 8 7 114 0 3 0 9 8 3 9 105 6 4 2 1 1 2 97 5 1 2
+            //   2 109 77 11 1 8 8 0 95 10 1 0 0 107 0 78
+            "NPUSHB 0 7 8 0 8 7 114 0 3 0 9 8 3 9 105 6 4 2 1 1 2 97 5 1 2 2 109 77 11 1 8 8 0 95 10 1 0 0 107 0 78",
+            // EIF[ ]	/* EndIf */
+            "EIF",
+            // NPUSHB[ ]	/* 31 values pushed */
+            // 37 36 1 0 40 38 36 44 37 44 34 32 27 25 24 23 22 20 17 16 12 10 9 8 0
+            // 35 1 35 12 13 22
+            "NPUSHB 37 36 1 0 40 38 36 44 37 44 34 32 27 25 24 23 22 20 17 16 12 10 9 8 0 35 1 35 12 13 22",
+            // CALL[ ]	/* CallFunction */
+            "CALL",
+        ];
+        let decoded: Vec<_> = super::decode_all(&bytecode, 0)
+            .map(|ins| ins.unwrap())
+            .collect();
+        let decoded_asm: Vec<_> = decoded.iter().map(|ins| ins.to_string()).collect();
+        assert_eq!(decoded_asm, expected);
+    }
+
+    /// Simple encoder used for testing.
+    #[derive(Default)]
+    struct Encoder(Vec<u8>);
+
+    impl Encoder {
+        pub fn encode(&mut self, opcode: Opcode) {
+            assert!(!opcode.is_push(), "use the encode_push method instead");
+            self.0.push(opcode as u8);
+        }
+
+        pub fn encode_push(&mut self, values: &[i16]) {
+            if values.is_empty() {
+                return;
+            }
+            let is_bytes = values.iter().all(|&x| x >= 0 && x <= u8::MAX as _);
+            if values.len() < 256 {
+                if is_bytes {
+                    if values.len() <= 8 {
+                        let opcode =
+                            Opcode::from_byte(Opcode::PUSHB000 as u8 + values.len() as u8 - 1);
+                        self.0.push(opcode as u8);
+                    } else {
+                        self.0.push(Opcode::NPUSHB as _);
+                        self.0.push(values.len() as _);
+                    }
+                    self.0.extend(values.iter().map(|&x| x as u8));
+                } else {
+                    if values.len() <= 8 {
+                        let opcode =
+                            Opcode::from_byte(Opcode::PUSHW000 as u8 + values.len() as u8 - 1);
+                        self.0.push(opcode as u8);
+                    } else {
+                        self.0.push(Opcode::NPUSHW as _);
+                        self.0.push(values.len() as _)
+                    }
+                    for &value in values {
+                        let value = value as u16;
+                        self.0.push((value >> 8) as _);
+                        self.0.push((value & 0xFF) as _);
+                    }
+                }
+            } else {
+                panic!("too many values to push in a single instruction");
+            }
+        }
+    }
+}
diff --git a/read-fonts/src/tables/truetype/bytecode/instruction.rs b/read-fonts/src/tables/truetype/bytecode/instruction.rs
@@ -0,0 +1,124 @@
+/// Decoded representation of a TrueType instruction.
+use super::Opcode;
+
+/// Decoded TrueType instruction.
+#[derive(Copy, Clone, Debug)]
+pub struct Instruction<'a> {
+    /// Operation code.
+    pub opcode: Opcode,
+    /// Instruction operands that were decoded from the bytecode.
+    pub inline_operands: InlineOperands<'a>,
+    /// Program counter -- offset into the bytecode where this
+    /// instruction was decoded.
+    pub pc: usize,
+}
+
+impl std::fmt::Display for Instruction<'_> {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        write!(f, "{}", self.opcode.name())?;
+        for value in self.inline_operands.values() {
+            write!(f, " {value}")?;
+        }
+        Ok(())
+    }
+}
+
+/// Sequence of instruction operands that are encoded directly in the bytecode.
+///
+/// This is only used for push instructions.
+#[derive(Copy, Clone, Default, Debug)]
+pub struct InlineOperands<'a> {
+    pub(super) bytes: &'a [u8],
+    pub(super) is_words: bool,
+}
+
+impl<'a> InlineOperands<'a> {
+    /// Returns the number of operands.
+    #[inline]
+    pub fn len(&self) -> usize {
+        if self.is_words {
+            self.bytes.len() / 2
+        } else {
+            self.bytes.len()
+        }
+    }
+
+    /// Returns true if there are no operands.
+    pub fn is_empty(&self) -> bool {
+        self.bytes.is_empty()
+    }
+
+    /// Returns an iterator over the operand values.
+    #[inline]
+    pub fn values(&self) -> impl Iterator<Item = i32> + 'a + Clone {
+        let (bytes, words) = if self.is_words {
+            (&[][..], self.bytes)
+        } else {
+            (self.bytes, &[][..])
+        };
+        bytes
+            .iter()
+            .map(|byte| *byte as u32 as i32)
+            .chain(words.chunks_exact(2).map(|chunk| {
+                let word = ((chunk[0] as u16) << 8) | chunk[1] as u16;
+                // Double cast to ensure sign extension
+                word as i16 as i32
+            }))
+    }
+}
+
+/// Mock for testing inline operands.
+#[cfg(any(test, feature = "scaler_test"))]
+pub struct MockInlineOperands {
+    bytes: Vec<u8>,
+    is_words: bool,
+}
+
+#[cfg(any(test, feature = "scaler_test"))]
+impl MockInlineOperands {
+    pub fn from_bytes(bytes: &[u8]) -> Self {
+        Self {
+            bytes: bytes.into(),
+            is_words: false,
+        }
+    }
+
+    pub fn from_words(words: &[i16]) -> Self {
+        Self {
+            bytes: words
+                .iter()
+                .map(|word| *word as u16)
+                .flat_map(|word| vec![(word >> 8) as u8, word as u8])
+                .collect(),
+            is_words: true,
+        }
+    }
+
+    pub fn operands(&self) -> InlineOperands {
+        InlineOperands {
+            bytes: &self.bytes,
+            is_words: self.is_words,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::MockInlineOperands;
+
+    #[test]
+    fn byte_operands() {
+        let values = [5, 2, 85, 92, 26, 42, u8::MIN, u8::MAX];
+        let mock = MockInlineOperands::from_bytes(&values);
+        let decoded = mock.operands().values().collect::<Vec<_>>();
+        assert!(values.iter().map(|x| *x as i32).eq(decoded.iter().copied()));
+    }
+
+    #[test]
+    fn word_operands() {
+        let values = [-5, 2, 2845, 92, -26, 42, i16::MIN, i16::MAX];
+        let mock = MockInlineOperands::from_words(&values);
+        let decoded = mock.operands().values().collect::<Vec<_>>();
+        assert!(values.iter().map(|x| *x as i32).eq(decoded.iter().copied()));
+    }
+}