From 9839450ae00a08388697a8b7256f85b1d9d7180a Mon Sep 17 00:00:00 2001 From: Chad Brokaw Date: Wed, 7 Feb 2024 04:08:39 -0500 Subject: [PATCH] [read-fonts] tt bytecode decoder Adds an opcode definition, instruction representation and decoder for TrueType bytecode. Also renames Args -> InlineOperands and moves the type from skrifa to read-fonts. --- read-fonts/src/tables.rs | 1 + read-fonts/src/tables/truetype.rs | 3 + read-fonts/src/tables/truetype/bytecode.rs | 14 + .../src/tables/truetype/bytecode/decode.rs | 237 +++++ .../tables/truetype/bytecode/instruction.rs | 124 +++ .../src/tables/truetype/bytecode/opcode.rs | 842 ++++++++++++++++++ .../src/outline/glyf/hint/code_state/args.rs | 99 -- .../src/outline/glyf/hint/code_state/mod.rs | 7 - skrifa/src/outline/glyf/hint/engine/stack.rs | 23 +- skrifa/src/outline/glyf/hint/error.rs | 8 + skrifa/src/outline/glyf/hint/value_stack.rs | 17 +- 11 files changed, 1252 insertions(+), 123 deletions(-) create mode 100644 read-fonts/src/tables/truetype.rs create mode 100644 read-fonts/src/tables/truetype/bytecode.rs create mode 100644 read-fonts/src/tables/truetype/bytecode/decode.rs create mode 100644 read-fonts/src/tables/truetype/bytecode/instruction.rs create mode 100644 read-fonts/src/tables/truetype/bytecode/opcode.rs delete mode 100644 skrifa/src/outline/glyf/hint/code_state/args.rs diff --git a/read-fonts/src/tables.rs b/read-fonts/src/tables.rs index 93e29bb7a..d5a1c0cc6 100644 --- a/read-fonts/src/tables.rs +++ b/read-fonts/src/tables.rs @@ -32,6 +32,7 @@ pub mod post; pub mod postscript; pub mod sbix; pub mod stat; +pub mod truetype; pub mod variations; pub mod vhea; pub mod vmtx; diff --git a/read-fonts/src/tables/truetype.rs b/read-fonts/src/tables/truetype.rs new file mode 100644 index 000000000..a684572f4 --- /dev/null +++ b/read-fonts/src/tables/truetype.rs @@ -0,0 +1,3 @@ +//! TrueType (glyf) common code. + +pub mod bytecode; diff --git a/read-fonts/src/tables/truetype/bytecode.rs b/read-fonts/src/tables/truetype/bytecode.rs new file mode 100644 index 000000000..5ba665275 --- /dev/null +++ b/read-fonts/src/tables/truetype/bytecode.rs @@ -0,0 +1,14 @@ +//! TrueType hinting bytecode. + +mod decode; +mod instruction; +mod opcode; + +pub use decode::{decode_all, DecodeError, Decoder}; +pub use instruction::{InlineOperands, Instruction}; +pub use opcode::Opcode; + +// Exported publicly for use by skrifa when the scaler_test feature is +// enabled. +#[cfg(any(test, feature = "scaler_test"))] +pub use instruction::MockInlineOperands; diff --git a/read-fonts/src/tables/truetype/bytecode/decode.rs b/read-fonts/src/tables/truetype/bytecode/decode.rs new file mode 100644 index 000000000..5146930ef --- /dev/null +++ b/read-fonts/src/tables/truetype/bytecode/decode.rs @@ -0,0 +1,237 @@ +//! TrueType bytecode decoder. + +use super::{InlineOperands, Instruction, Opcode}; + +/// An error returned by [`Decoder::decode`] if the end of the bytecode +/// stream is reached unexpectedly. +#[derive(Copy, Clone, Debug)] +pub struct DecodeError(()); + +impl std::fmt::Display for DecodeError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("unexpected end of bytecode") + } +} + +/// Decodes instructions from TrueType bytecode. +#[derive(Copy, Clone)] +pub struct Decoder<'a> { + /// The bytecode for the program. + pub bytecode: &'a [u8], + /// The "program counter" or current offset into the bytecode. + pub pc: usize, +} + +impl<'a> Decoder<'a> { + /// Creates a new decoder for the given bytecode and program counter. + pub fn new(bytecode: &'a [u8], pc: usize) -> Self { + Self { bytecode, pc } + } + + /// Decodes the next instruction. + /// + /// Returns `None` at the end of the bytecode stream. + pub fn decode(&mut self) -> Option, DecodeError>> { + let opcode = Opcode::from_byte(*self.bytecode.get(self.pc)?); + Some(self.decode_inner(opcode)) + } + + fn decode_inner(&mut self, opcode: Opcode) -> Result, DecodeError> { + let mut opcode_len = opcode.len(); + let mut count_len = 0; + // If the opcode length is negative the next byte contains the number + // of inline operands and |opcode_len| is the size of each operand. + // + if opcode_len < 0 { + let inline_count = *self.bytecode.get(self.pc + 1).ok_or(DecodeError(()))?; + opcode_len = -opcode_len * inline_count as i32 + 2; + count_len = 1; + } + let opcode_len = opcode_len as usize; + let pc = self.pc; + let next_pc = pc + opcode_len; + // Skip opcode and potential inline operand count byte. + let inline_start = pc + 1 + count_len; + let inline_size = next_pc - inline_start; + let mut inline_operands = InlineOperands::default(); + if inline_size > 0 { + inline_operands.bytes = self + .bytecode + .get(inline_start..inline_start + inline_size) + .ok_or(DecodeError(()))?; + inline_operands.is_words = opcode.is_push_words(); + } + self.pc += opcode_len; + Ok(Instruction { + opcode, + inline_operands, + pc, + }) + } +} + +/// Returns an iterator that yields all instructions in the given bytecode +/// starting at the specified program counter. +pub fn decode_all( + bytecode: &[u8], + pc: usize, +) -> impl Iterator, DecodeError>> + '_ + Clone { + let mut decoder = Decoder::new(bytecode, pc); + std::iter::from_fn(move || decoder.decode()) +} + +#[cfg(test)] +mod tests { + use super::Opcode; + + #[test] + fn mixed_ops() { + let mut enc = Encoder::default(); + // intermix push and non-push ops of various sizes to test boundary + // conditions + let cases: &[(Opcode, &[i16])] = &[ + (Opcode::PUSHB100, &[1, 2, 3, 255, 5]), + (Opcode::PUSHW010, &[-1, 4508, -3]), + (Opcode::IUP0, &[]), + (Opcode::NPUSHB, &[55; 255]), + (Opcode::MDRP00110, &[]), + (Opcode::NPUSHW, &[i16::MIN; 32]), + (Opcode::LOOPCALL, &[]), + (Opcode::FLIPOFF, &[]), + ( + Opcode::PUSHW011, + &[i16::MIN, i16::MIN / 2, i16::MAX, i16::MAX / 2], + ), + (Opcode::GETVARIATION, &[]), + ]; + for (opcode, values) in cases { + if !values.is_empty() { + enc.encode_push(values); + } else { + enc.encode(*opcode); + } + } + let all_ins = super::decode_all(&enc.0, 0) + .map(|ins| ins.unwrap()) + .collect::>(); + for (ins, (expected_opcode, expected_values)) in all_ins.iter().zip(cases) { + assert_eq!(ins.opcode, *expected_opcode); + let values = ins + .inline_operands + .values() + .map(|v| v as i16) + .collect::>(); + assert_eq!(&values, expected_values); + } + } + + #[test] + fn non_push_ops() { + // test decoding of all single byte (non-push) opcodes + let non_push_ops: Vec<_> = (0..=255) + .filter(|b| !Opcode::from_byte(*b).is_push()) + .collect(); + let decoded: Vec<_> = super::decode_all(&non_push_ops, 0) + .map(|ins| ins.unwrap().opcode as u8) + .collect(); + assert_eq!(non_push_ops, decoded); + } + + #[test] + fn real_bytecode() { + // taken from NotoSerif-Regular, glyph Rturnedsmall, gid 1272 + let bytecode = [ + 181, 5, 1, 9, 3, 1, 76, 75, 176, 45, 80, 88, 64, 35, 0, 3, 0, 9, 7, 3, 9, 105, 6, 4, 2, + 1, 1, 2, 97, 5, 1, 2, 2, 109, 77, 11, 8, 2, 7, 7, 0, 95, 10, 1, 0, 0, 107, 0, 78, 27, + 64, 41, 0, 7, 8, 0, 8, 7, 114, 0, 3, 0, 9, 8, 3, 9, 105, 6, 4, 2, 1, 1, 2, 97, 5, 1, 2, + 2, 109, 77, 11, 1, 8, 8, 0, 95, 10, 1, 0, 0, 107, 0, 78, 89, 64, 31, 37, 36, 1, 0, 40, + 38, 36, 44, 37, 44, 34, 32, 27, 25, 24, 23, 22, 20, 17, 16, 12, 10, 9, 8, 0, 35, 1, 35, + 12, 13, 22, 43, + ]; + // comments below contain the ttx assembly + let expected = [ + // PUSHB[ ] /* 6 values pushed */ + // 5 1 9 3 1 76 + "PUSHB[5] 5 1 9 3 1 76", + // MPPEM[ ] /* MeasurePixelPerEm */ + "MPPEM", + // PUSHB[ ] /* 1 value pushed */ + // 45 + "PUSHB[0] 45", + // LT[ ] /* LessThan */ + "LT", + // IF[ ] /* If */ + "IF", + // NPUSHB[ ] /* 35 values pushed */ + // 0 3 0 9 7 3 9 105 6 4 2 1 1 2 97 5 1 2 2 109 77 11 8 2 7 + // 7 0 95 10 1 0 0 107 0 78 + "NPUSHB 0 3 0 9 7 3 9 105 6 4 2 1 1 2 97 5 1 2 2 109 77 11 8 2 7 7 0 95 10 1 0 0 107 0 78", + // ELSE[ ] /* Else */ + "ELSE", + // NPUSHB[ ] /* 41 values pushed */ + // 0 7 8 0 8 7 114 0 3 0 9 8 3 9 105 6 4 2 1 1 2 97 5 1 2 + // 2 109 77 11 1 8 8 0 95 10 1 0 0 107 0 78 + "NPUSHB 0 7 8 0 8 7 114 0 3 0 9 8 3 9 105 6 4 2 1 1 2 97 5 1 2 2 109 77 11 1 8 8 0 95 10 1 0 0 107 0 78", + // EIF[ ] /* EndIf */ + "EIF", + // NPUSHB[ ] /* 31 values pushed */ + // 37 36 1 0 40 38 36 44 37 44 34 32 27 25 24 23 22 20 17 16 12 10 9 8 0 + // 35 1 35 12 13 22 + "NPUSHB 37 36 1 0 40 38 36 44 37 44 34 32 27 25 24 23 22 20 17 16 12 10 9 8 0 35 1 35 12 13 22", + // CALL[ ] /* CallFunction */ + "CALL", + ]; + let decoded: Vec<_> = super::decode_all(&bytecode, 0) + .map(|ins| ins.unwrap()) + .collect(); + let decoded_asm: Vec<_> = decoded.iter().map(|ins| ins.to_string()).collect(); + assert_eq!(decoded_asm, expected); + } + + /// Simple encoder used for testing. + #[derive(Default)] + struct Encoder(Vec); + + impl Encoder { + pub fn encode(&mut self, opcode: Opcode) { + assert!(!opcode.is_push(), "use the encode_push method instead"); + self.0.push(opcode as u8); + } + + pub fn encode_push(&mut self, values: &[i16]) { + if values.is_empty() { + return; + } + let is_bytes = values.iter().all(|&x| x >= 0 && x <= u8::MAX as _); + if values.len() < 256 { + if is_bytes { + if values.len() <= 8 { + let opcode = + Opcode::from_byte(Opcode::PUSHB000 as u8 + values.len() as u8 - 1); + self.0.push(opcode as u8); + } else { + self.0.push(Opcode::NPUSHB as _); + self.0.push(values.len() as _); + } + self.0.extend(values.iter().map(|&x| x as u8)); + } else { + if values.len() <= 8 { + let opcode = + Opcode::from_byte(Opcode::PUSHW000 as u8 + values.len() as u8 - 1); + self.0.push(opcode as u8); + } else { + self.0.push(Opcode::NPUSHW as _); + self.0.push(values.len() as _) + } + for &value in values { + let value = value as u16; + self.0.push((value >> 8) as _); + self.0.push((value & 0xFF) as _); + } + } + } else { + panic!("too many values to push in a single instruction"); + } + } + } +} diff --git a/read-fonts/src/tables/truetype/bytecode/instruction.rs b/read-fonts/src/tables/truetype/bytecode/instruction.rs new file mode 100644 index 000000000..94fc155a6 --- /dev/null +++ b/read-fonts/src/tables/truetype/bytecode/instruction.rs @@ -0,0 +1,124 @@ +/// Decoded representation of a TrueType instruction. +use super::Opcode; + +/// Decoded TrueType instruction. +#[derive(Copy, Clone, Debug)] +pub struct Instruction<'a> { + /// Operation code. + pub opcode: Opcode, + /// Instruction operands that were decoded from the bytecode. + pub inline_operands: InlineOperands<'a>, + /// Program counter -- offset into the bytecode where this + /// instruction was decoded. + pub pc: usize, +} + +impl std::fmt::Display for Instruction<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "{}", self.opcode.name())?; + for value in self.inline_operands.values() { + write!(f, " {value}")?; + } + Ok(()) + } +} + +/// Sequence of instruction operands that are encoded directly in the bytecode. +/// +/// This is only used for push instructions. +#[derive(Copy, Clone, Default, Debug)] +pub struct InlineOperands<'a> { + pub(super) bytes: &'a [u8], + pub(super) is_words: bool, +} + +impl<'a> InlineOperands<'a> { + /// Returns the number of operands. + #[inline] + pub fn len(&self) -> usize { + if self.is_words { + self.bytes.len() / 2 + } else { + self.bytes.len() + } + } + + /// Returns true if there are no operands. + pub fn is_empty(&self) -> bool { + self.bytes.is_empty() + } + + /// Returns an iterator over the operand values. + #[inline] + pub fn values(&self) -> impl Iterator + 'a + Clone { + let (bytes, words) = if self.is_words { + (&[][..], self.bytes) + } else { + (self.bytes, &[][..]) + }; + bytes + .iter() + .map(|byte| *byte as u32 as i32) + .chain(words.chunks_exact(2).map(|chunk| { + let word = ((chunk[0] as u16) << 8) | chunk[1] as u16; + // Double cast to ensure sign extension + word as i16 as i32 + })) + } +} + +/// Mock for testing inline operands. +#[cfg(any(test, feature = "scaler_test"))] +pub struct MockInlineOperands { + bytes: Vec, + is_words: bool, +} + +#[cfg(any(test, feature = "scaler_test"))] +impl MockInlineOperands { + pub fn from_bytes(bytes: &[u8]) -> Self { + Self { + bytes: bytes.into(), + is_words: false, + } + } + + pub fn from_words(words: &[i16]) -> Self { + Self { + bytes: words + .iter() + .map(|word| *word as u16) + .flat_map(|word| vec![(word >> 8) as u8, word as u8]) + .collect(), + is_words: true, + } + } + + pub fn operands(&self) -> InlineOperands { + InlineOperands { + bytes: &self.bytes, + is_words: self.is_words, + } + } +} + +#[cfg(test)] +mod tests { + use super::MockInlineOperands; + + #[test] + fn byte_operands() { + let values = [5, 2, 85, 92, 26, 42, u8::MIN, u8::MAX]; + let mock = MockInlineOperands::from_bytes(&values); + let decoded = mock.operands().values().collect::>(); + assert!(values.iter().map(|x| *x as i32).eq(decoded.iter().copied())); + } + + #[test] + fn word_operands() { + let values = [-5, 2, 2845, 92, -26, 42, i16::MIN, i16::MAX]; + let mock = MockInlineOperands::from_words(&values); + let decoded = mock.operands().values().collect::>(); + assert!(values.iter().map(|x| *x as i32).eq(decoded.iter().copied())); + } +} diff --git a/read-fonts/src/tables/truetype/bytecode/opcode.rs b/read-fonts/src/tables/truetype/bytecode/opcode.rs new file mode 100644 index 000000000..c43eaab77 --- /dev/null +++ b/read-fonts/src/tables/truetype/bytecode/opcode.rs @@ -0,0 +1,842 @@ +//! TrueType hinting opcodes. + +/// Operation code for a TrueType instruction. +/// +/// See [the TrueType instruction set](https://learn.microsoft.com/en-us/typography/opentype/spec/tt_instructions) +/// from the OpenType specification for more detail. +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] +#[repr(u8)] +pub enum Opcode { + SVTCA0 = 0x00, + SVTCA1 = 0x01, + SPVTCA0 = 0x02, + SPVTCA1 = 0x03, + SFVTCA0 = 0x04, + SFVTCA1 = 0x05, + SPVTL0 = 0x06, + SPVTL1 = 0x07, + SFVTL0 = 0x08, + SFVTL1 = 0x09, + SPVFS = 0x0A, + SFVFS = 0x0B, + GPV = 0x0C, + GFV = 0x0D, + SFVTPV = 0x0E, + ISECT = 0x0F, + SRP0 = 0x10, + SRP1 = 0x11, + SRP2 = 0x12, + SZP0 = 0x13, + SZP1 = 0x14, + SZP2 = 0x15, + SZPS = 0x16, + SLOOP = 0x17, + RTG = 0x18, + RTHG = 0x19, + SMD = 0x1A, + ELSE = 0x1B, + JMPR = 0x1C, + SCVTCI = 0x1D, + SSWCI = 0x1E, + SSW = 0x1F, + DUP = 0x20, + POP = 0x21, + CLEAR = 0x22, + SWAP = 0x23, + DEPTH = 0x24, + CINDEX = 0x25, + MINDEX = 0x26, + ALIGNPTS = 0x27, + INS28 = 0x28, + UTP = 0x29, + LOOPCALL = 0x2A, + CALL = 0x2B, + FDEF = 0x2C, + ENDF = 0x2D, + MDAP0 = 0x2E, + MDAP1 = 0x2F, + IUP0 = 0x30, + IUP1 = 0x31, + SHP0 = 0x32, + SHP1 = 0x33, + SHC0 = 0x34, + SHC1 = 0x35, + SHZ0 = 0x36, + SHZ1 = 0x37, + SHPIX = 0x38, + IP = 0x39, + MSIRP0 = 0x3A, + MSIRP1 = 0x3B, + ALIGNRP = 0x3C, + RTDG = 0x3D, + MIAP0 = 0x3E, + MIAP1 = 0x3F, + NPUSHB = 0x40, + NPUSHW = 0x41, + WS = 0x42, + RS = 0x43, + WCVTP = 0x44, + RCVT = 0x45, + GC0 = 0x46, + GC1 = 0x47, + SCFS = 0x48, + MD0 = 0x49, + MD1 = 0x4A, + MPPEM = 0x4B, + MPS = 0x4C, + FLIPON = 0x4D, + FLIPOFF = 0x4E, + DEBUG = 0x4F, + LT = 0x50, + LTEQ = 0x51, + GT = 0x52, + GTEQ = 0x53, + EQ = 0x54, + NEQ = 0x55, + ODD = 0x56, + EVEN = 0x57, + IF = 0x58, + EIF = 0x59, + AND = 0x5A, + OR = 0x5B, + NOT = 0x5C, + DELTAP1 = 0x5D, + SDB = 0x5E, + SDS = 0x5F, + ADD = 0x60, + SUB = 0x61, + DIV = 0x62, + MUL = 0x63, + ABS = 0x64, + NEG = 0x65, + FLOOR = 0x66, + CEILING = 0x67, + ROUND00 = 0x68, + ROUND01 = 0x69, + ROUND10 = 0x6A, + ROUND11 = 0x6B, + NROUND00 = 0x6C, + NROUND01 = 0x6D, + NROUND10 = 0x6E, + NROUND11 = 0x6F, + WCVTF = 0x70, + DELTAP2 = 0x71, + DELTAP3 = 0x72, + DELTAC1 = 0x73, + DELTAC2 = 0x74, + DELTAC3 = 0x75, + SROUND = 0x76, + S45ROUND = 0x77, + JROT = 0x78, + JROF = 0x79, + ROFF = 0x7A, + INS7B = 0x7B, + RUTG = 0x7C, + RDTG = 0x7D, + SANGW = 0x7E, + AA = 0x7F, + FLIPPT = 0x80, + FLIPRGON = 0x81, + FLIPRGOFF = 0x82, + INS83 = 0x83, + INS84 = 0x84, + SCANCTRL = 0x85, + SDPVTL0 = 0x86, + SDPVTL1 = 0x87, + GETINFO = 0x88, + IDEF = 0x89, + ROLL = 0x8A, + MAX = 0x8B, + MIN = 0x8C, + SCANTYPE = 0x8D, + INSTCTRL = 0x8E, + GETVARIATION = 0x8F, + GETDATA = 0x90, + INS91 = 0x91, + INS92 = 0x92, + INS93 = 0x93, + INS94 = 0x94, + INS95 = 0x95, + INS96 = 0x96, + INS97 = 0x97, + INS98 = 0x98, + INS99 = 0x99, + INS9A = 0x9A, + INS9B = 0x9B, + INS9C = 0x9C, + INS9D = 0x9D, + INS9E = 0x9E, + INS9F = 0x9F, + INSA0 = 0xA0, + INSA1 = 0xA1, + INSA2 = 0xA2, + INSA3 = 0xA3, + INSA4 = 0xA4, + INSA5 = 0xA5, + INSA6 = 0xA6, + INSA7 = 0xA7, + INSA8 = 0xA8, + INSA9 = 0xA9, + INSAA = 0xAA, + INSAB = 0xAB, + INSAC = 0xAC, + INSAD = 0xAD, + INSAE = 0xAE, + INSAF = 0xAF, + PUSHB000 = 0xB0, + PUSHB001 = 0xB1, + PUSHB010 = 0xB2, + PUSHB011 = 0xB3, + PUSHB100 = 0xB4, + PUSHB101 = 0xB5, + PUSHB110 = 0xB6, + PUSHB111 = 0xB7, + PUSHW000 = 0xB8, + PUSHW001 = 0xB9, + PUSHW010 = 0xBA, + PUSHW011 = 0xBB, + PUSHW100 = 0xBC, + PUSHW101 = 0xBD, + PUSHW110 = 0xBE, + PUSHW111 = 0xBF, + MDRP00000 = 0xC0, + MDRP00001 = 0xC1, + MDRP00010 = 0xC2, + MDRP00011 = 0xC3, + MDRP00100 = 0xC4, + MDRP00101 = 0xC5, + MDRP00110 = 0xC6, + MDRP00111 = 0xC7, + MDRP01000 = 0xC8, + MDRP01001 = 0xC9, + MDRP01010 = 0xCA, + MDRP01011 = 0xCB, + MDRP01100 = 0xCC, + MDRP01101 = 0xCD, + MDRP01110 = 0xCE, + MDRP01111 = 0xCF, + MDRP10000 = 0xD0, + MDRP10001 = 0xD1, + MDRP10010 = 0xD2, + MDRP10011 = 0xD3, + MDRP10100 = 0xD4, + MDRP10101 = 0xD5, + MDRP10110 = 0xD6, + MDRP10111 = 0xD7, + MDRP11000 = 0xD8, + MDRP11001 = 0xD9, + MDRP11010 = 0xDA, + MDRP11011 = 0xDB, + MDRP11100 = 0xDC, + MDRP11101 = 0xDD, + MDRP11110 = 0xDE, + MDRP11111 = 0xDF, + MIRP00000 = 0xE0, + MIRP00001 = 0xE1, + MIRP00010 = 0xE2, + MIRP00011 = 0xE3, + MIRP00100 = 0xE4, + MIRP00101 = 0xE5, + MIRP00110 = 0xE6, + MIRP00111 = 0xE7, + MIRP01000 = 0xE8, + MIRP01001 = 0xE9, + MIRP01010 = 0xEA, + MIRP01011 = 0xEB, + MIRP01100 = 0xEC, + MIRP01101 = 0xED, + MIRP01110 = 0xEE, + MIRP01111 = 0xEF, + MIRP10000 = 0xF0, + MIRP10001 = 0xF1, + MIRP10010 = 0xF2, + MIRP10011 = 0xF3, + MIRP10100 = 0xF4, + MIRP10101 = 0xF5, + MIRP10110 = 0xF6, + MIRP10111 = 0xF7, + MIRP11000 = 0xF8, + MIRP11001 = 0xF9, + MIRP11010 = 0xFA, + MIRP11011 = 0xFB, + MIRP11100 = 0xFC, + MIRP11101 = 0xFD, + MIRP11110 = 0xFE, + MIRP11111 = 0xFF, +} + +impl Opcode { + /// Creates an opcode from the given byte. + /// + /// There is a 1:1 mapping between bytes and opcodes. + #[inline] + pub fn from_byte(byte: u8) -> Self { + OPCODE_FROM_BYTE[byte as usize] + } + + /// Returns a more descriptive name for the opcode. + pub fn name(self) -> &'static str { + OPCODE_NAMES[self as usize] + } + + /// Returns true if this is an instruction that pushes values onto the + /// stack. + #[inline] + pub fn is_push(self) -> bool { + (self >= Self::PUSHB000 && self <= Self::PUSHW111) + || self == Self::NPUSHB + || self == Self::NPUSHW + } + + pub(super) fn is_push_words(self) -> bool { + (self >= Self::PUSHW000 && self <= Self::PUSHW111) || self == Self::NPUSHW + } + + pub(super) fn len(self) -> i32 { + OPCODE_LENGTHS[self as usize] as i32 + } +} + +use Opcode::*; + +const OPCODE_FROM_BYTE: [Opcode; 256] = [ + SVTCA0, + SVTCA1, + SPVTCA0, + SPVTCA1, + SFVTCA0, + SFVTCA1, + SPVTL0, + SPVTL1, + SFVTL0, + SFVTL1, + SPVFS, + SFVFS, + GPV, + GFV, + SFVTPV, + ISECT, + SRP0, + SRP1, + SRP2, + SZP0, + SZP1, + SZP2, + SZPS, + SLOOP, + RTG, + RTHG, + SMD, + ELSE, + JMPR, + SCVTCI, + SSWCI, + SSW, + DUP, + POP, + CLEAR, + SWAP, + DEPTH, + CINDEX, + MINDEX, + ALIGNPTS, + INS28, + UTP, + LOOPCALL, + CALL, + FDEF, + ENDF, + MDAP0, + MDAP1, + IUP0, + IUP1, + SHP0, + SHP1, + SHC0, + SHC1, + SHZ0, + SHZ1, + SHPIX, + IP, + MSIRP0, + MSIRP1, + ALIGNRP, + RTDG, + MIAP0, + MIAP1, + NPUSHB, + NPUSHW, + WS, + RS, + WCVTP, + RCVT, + GC0, + GC1, + SCFS, + MD0, + MD1, + MPPEM, + MPS, + FLIPON, + FLIPOFF, + DEBUG, + LT, + LTEQ, + GT, + GTEQ, + EQ, + NEQ, + ODD, + EVEN, + IF, + EIF, + AND, + OR, + NOT, + DELTAP1, + SDB, + SDS, + ADD, + SUB, + DIV, + MUL, + ABS, + NEG, + FLOOR, + CEILING, + ROUND00, + ROUND01, + ROUND10, + ROUND11, + NROUND00, + NROUND01, + NROUND10, + NROUND11, + WCVTF, + DELTAP2, + DELTAP3, + DELTAC1, + DELTAC2, + DELTAC3, + SROUND, + S45ROUND, + JROT, + JROF, + ROFF, + INS7B, + RUTG, + RDTG, + SANGW, + AA, + FLIPPT, + FLIPRGON, + FLIPRGOFF, + INS83, + INS84, + SCANCTRL, + SDPVTL0, + SDPVTL1, + GETINFO, + IDEF, + ROLL, + MAX, + MIN, + SCANTYPE, + INSTCTRL, + GETVARIATION, + GETDATA, + INS91, + INS92, + INS93, + INS94, + INS95, + INS96, + INS97, + INS98, + INS99, + INS9A, + INS9B, + INS9C, + INS9D, + INS9E, + INS9F, + INSA0, + INSA1, + INSA2, + INSA3, + INSA4, + INSA5, + INSA6, + INSA7, + INSA8, + INSA9, + INSAA, + INSAB, + INSAC, + INSAD, + INSAE, + INSAF, + PUSHB000, + PUSHB001, + PUSHB010, + PUSHB011, + PUSHB100, + PUSHB101, + PUSHB110, + PUSHB111, + PUSHW000, + PUSHW001, + PUSHW010, + PUSHW011, + PUSHW100, + PUSHW101, + PUSHW110, + PUSHW111, + MDRP00000, + MDRP00001, + MDRP00010, + MDRP00011, + MDRP00100, + MDRP00101, + MDRP00110, + MDRP00111, + MDRP01000, + MDRP01001, + MDRP01010, + MDRP01011, + MDRP01100, + MDRP01101, + MDRP01110, + MDRP01111, + MDRP10000, + MDRP10001, + MDRP10010, + MDRP10011, + MDRP10100, + MDRP10101, + MDRP10110, + MDRP10111, + MDRP11000, + MDRP11001, + MDRP11010, + MDRP11011, + MDRP11100, + MDRP11101, + MDRP11110, + MDRP11111, + MIRP00000, + MIRP00001, + MIRP00010, + MIRP00011, + MIRP00100, + MIRP00101, + MIRP00110, + MIRP00111, + MIRP01000, + MIRP01001, + MIRP01010, + MIRP01011, + MIRP01100, + MIRP01101, + MIRP01110, + MIRP01111, + MIRP10000, + MIRP10001, + MIRP10010, + MIRP10011, + MIRP10100, + MIRP10101, + MIRP10110, + MIRP10111, + MIRP11000, + MIRP11001, + MIRP11010, + MIRP11011, + MIRP11100, + MIRP11101, + MIRP11110, + MIRP11111, +]; + +/// There doesn't seem to be any prevailing set of mnemonics for these +/// instructions. These are pulled from FreeType with the justification +/// that diffing FreeType hinting traces with our own is the most +/// efficient way to track down discrepancies. +/// +const OPCODE_NAMES: [&str; 256] = [ + "SVTCA[y]", + "SVTCA[x]", + "SPVTCA[y]", + "SPVTCA[x]", + "SFVTCA[y]", + "SFVTCA[x]", + "SPVTL[||]", + "SPVTL[+]", + "SFVTL[||]", + "SFVTL[+]", + "SPVFS", + "SFVFS", + "GPV", + "GFV", + "SFVTPV", + "ISECT", + "SRP0", + "SRP1", + "SRP2", + "SZP0", + "SZP1", + "SZP2", + "SZPS", + "SLOOP", + "RTG", + "RTHG", + "SMD", + "ELSE", + "JMPR", + "SCVTCI", + "SSWCI", + "SSW", + "DUP", + "POP", + "CLEAR", + "SWAP", + "DEPTH", + "CINDEX", + "MINDEX", + "ALIGNPTS", + "INS_$28", + "UTP", + "LOOPCALL", + "CALL", + "FDEF", + "ENDF", + "MDAP[]", + "MDAP[rnd]", + "IUP[y]", + "IUP[x]", + "SHP[rp2]", + "SHP[rp1]", + "SHC[rp2]", + "SHC[rp1]", + "SHZ[rp2]", + "SHZ[rp1]", + "SHPIX", + "IP", + "MSIRP[]", + "MSIRP[rp0]", + "ALIGNRP", + "RTDG", + "MIAP[]", + "MIAP[rnd]", + "NPUSHB", + "NPUSHW", + "WS", + "RS", + "WCVTP", + "RCVT", + "GC[curr]", + "GC[orig]", + "SCFS", + "MD[curr]", + "MD[orig]", + "MPPEM", + "MPS", + "FLIPON", + "FLIPOFF", + "DEBUG", + "LT", + "LTEQ", + "GT", + "GTEQ", + "EQ", + "NEQ", + "ODD", + "EVEN", + "IF", + "EIF", + "AND", + "OR", + "NOT", + "DELTAP1", + "SDB", + "SDS", + "ADD", + "SUB", + "DIV", + "MUL", + "ABS", + "NEG", + "FLOOR", + "CEILING", + "ROUND[G]", + "ROUND[B]", + "ROUND[W]", + "ROUND[]", + "NROUND[G]", + "NROUND[B]", + "NROUND[W]", + "NROUND[]", + "WCVTF", + "DELTAP2", + "DELTAP3", + "DELTAC1", + "DELTAC2", + "DELTAC3", + "SROUND", + "S45ROUND", + "JROT", + "JROF", + "ROFF", + "INS_$7B", + "RUTG", + "RDTG", + "SANGW", + "AA", + "FLIPPT", + "FLIPRGON", + "FLIPRGOFF", + "INS_$83", + "INS_$84", + "SCANCTRL", + "SDPVTL[||]", + "SDPVTL[+]", + "GETINFO", + "IDEF", + "ROLL", + "MAX", + "MIN", + "SCANTYPE", + "INSTCTRL", + "INS_$8F", + "INS_$90", + "GETVARIATION", + "GETDATA", + "INS_$93", + "INS_$94", + "INS_$95", + "INS_$96", + "INS_$97", + "INS_$98", + "INS_$99", + "INS_$9A", + "INS_$9B", + "INS_$9C", + "INS_$9D", + "INS_$9E", + "INS_$9F", + "INS_$A0", + "INS_$A1", + "INS_$A2", + "INS_$A3", + "INS_$A4", + "INS_$A5", + "INS_$A6", + "INS_$A7", + "INS_$A8", + "INS_$A9", + "INS_$AA", + "INS_$AB", + "INS_$AC", + "INS_$AD", + "INS_$AE", + "INS_$AF", + "PUSHB[0]", + "PUSHB[1]", + "PUSHB[2]", + "PUSHB[3]", + "PUSHB[4]", + "PUSHB[5]", + "PUSHB[6]", + "PUSHB[7]", + "PUSHW[0]", + "PUSHW[1]", + "PUSHW[2]", + "PUSHW[3]", + "PUSHW[4]", + "PUSHW[5]", + "PUSHW[6]", + "PUSHW[7]", + "MDRP[G]", + "MDRP[B]", + "MDRP[W]", + "MDRP[]", + "MDRP[rG]", + "MDRP[rB]", + "MDRP[rW]", + "MDRP[r]", + "MDRP[mG]", + "MDRP[mB]", + "MDRP[mW]", + "MDRP[m]", + "MDRP[mrG]", + "MDRP[mrB]", + "MDRP[mrW]", + "MDRP[mr]", + "MDRP[pG]", + "MDRP[pB]", + "MDRP[pW]", + "MDRP[p]", + "MDRP[prG]", + "MDRP[prB]", + "MDRP[prW]", + "MDRP[pr]", + "MDRP[pmG]", + "MDRP[pmB]", + "MDRP[pmW]", + "MDRP[pm]", + "MDRP[pmrG]", + "MDRP[pmrB]", + "MDRP[pmrW]", + "MDRP[pmr]", + "MIRP[G]", + "MIRP[B]", + "MIRP[W]", + "MIRP[]", + "MIRP[rG]", + "MIRP[rB]", + "MIRP[rW]", + "MIRP[r]", + "MIRP[mG]", + "MIRP[mB]", + "MIRP[mW]", + "MIRP[m]", + "MIRP[mrG]", + "MIRP[mrB]", + "MIRP[mrW]", + "MIRP[mr]", + "MIRP[pG]", + "MIRP[pB]", + "MIRP[pW]", + "MIRP[p]", + "MIRP[prG]", + "MIRP[prB]", + "MIRP[prW]", + "MIRP[pr]", + "MIRP[pmG]", + "MIRP[pmB]", + "MIRP[pmW]", + "MIRP[pm]", + "MIRP[pmrG]", + "MIRP[pmrB]", + "MIRP[pmrW]", + "MIRP[pmr]", +]; + +/// Size in bytes of an instruction. +/// +/// The negative values represent variable length instructions where the +/// next byte in the stream is the count of following operands and the +/// absolute value of the length in this table is the size in bytes of +/// each operand. These are just the NPUSHB and NPUSHW instructions. +/// +const OPCODE_LENGTHS: [i8; 256] = [ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + -1, -2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 3, 5, 7, 9, 11, 13, + 15, 17, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, +]; diff --git a/skrifa/src/outline/glyf/hint/code_state/args.rs b/skrifa/src/outline/glyf/hint/code_state/args.rs deleted file mode 100644 index cfa65e1e4..000000000 --- a/skrifa/src/outline/glyf/hint/code_state/args.rs +++ /dev/null @@ -1,99 +0,0 @@ -//! Inline instruction arguments. - -/// Support for decoding a sequence of bytes or words from the -/// instruction stream. -#[derive(Copy, Clone, Default, Debug)] -pub struct Args<'a> { - bytes: &'a [u8], - is_words: bool, -} - -impl<'a> Args<'a> { - pub(crate) fn new(bytes: &'a [u8], is_words: bool) -> Self { - Self { bytes, is_words } - } - - /// Returns the number of arguments in the list. - pub fn len(&self) -> usize { - if self.is_words { - self.bytes.len() / 2 - } else { - self.bytes.len() - } - } - - /// Returns true if the argument list is empty. - pub fn is_empty(&self) -> bool { - self.bytes.is_empty() - } - - /// Returns an iterator over the argument values. - pub fn values(&self) -> impl Iterator + 'a + Clone { - let bytes = if self.is_words { &[] } else { self.bytes }; - let words = if self.is_words { self.bytes } else { &[] }; - bytes - .iter() - .map(|byte| *byte as u32 as i32) - .chain(words.chunks_exact(2).map(|chunk| { - let word = ((chunk[0] as u16) << 8) | chunk[1] as u16; - // Double cast to ensure sign extension - word as i16 as i32 - })) - } -} - -/// Mock for testing arguments. -#[cfg(test)] -pub(crate) struct MockArgs { - bytes: Vec, - is_words: bool, -} - -#[cfg(test)] -impl MockArgs { - pub fn from_bytes(bytes: &[u8]) -> Self { - Self { - bytes: bytes.into(), - is_words: false, - } - } - - pub fn from_words(words: &[i16]) -> Self { - Self { - bytes: words - .iter() - .map(|word| *word as u16) - .flat_map(|word| vec![(word >> 8) as u8, word as u8]) - .collect(), - is_words: true, - } - } - - pub fn args(&self) -> Args { - Args { - bytes: &self.bytes, - is_words: self.is_words, - } - } -} - -#[cfg(test)] -mod tests { - use super::MockArgs; - - #[test] - fn byte_args() { - let values = [5, 2, 85, 92, 26, 42, u8::MIN, u8::MAX]; - let mock = MockArgs::from_bytes(&values); - let decoded = mock.args().values().collect::>(); - assert!(values.iter().map(|x| *x as i32).eq(decoded.iter().copied())); - } - - #[test] - fn word_args() { - let values = [-5, 2, 2845, 92, -26, 42, i16::MIN, i16::MAX]; - let mock = MockArgs::from_words(&values); - let decoded = mock.args().values().collect::>(); - assert!(values.iter().map(|x| *x as i32).eq(decoded.iter().copied())); - } -} diff --git a/skrifa/src/outline/glyf/hint/code_state/mod.rs b/skrifa/src/outline/glyf/hint/code_state/mod.rs index 9b69444cb..7653172cc 100644 --- a/skrifa/src/outline/glyf/hint/code_state/mod.rs +++ b/skrifa/src/outline/glyf/hint/code_state/mod.rs @@ -1,9 +1,5 @@ //! State for managing active programs and decoding instructions. -mod args; - -pub use args::Args; - /// Describes the source for a piece of bytecode. #[derive(Copy, Clone, PartialEq, Eq, Default, Debug)] #[repr(u8)] @@ -18,6 +14,3 @@ pub enum ProgramKind { /// Glyph specified program. Stored per-glyph in the `glyf` table. Glyph = 2, } - -#[cfg(test)] -pub(crate) use args::MockArgs; diff --git a/skrifa/src/outline/glyf/hint/engine/stack.rs b/skrifa/src/outline/glyf/hint/engine/stack.rs index 6b148cbce..f8981067d 100644 --- a/skrifa/src/outline/glyf/hint/engine/stack.rs +++ b/skrifa/src/outline/glyf/hint/engine/stack.rs @@ -5,7 +5,9 @@ //! See //! and -use super::{super::code_state::Args, Engine, OpResult}; +use read_fonts::tables::truetype::bytecode::InlineOperands; + +use super::{Engine, OpResult}; impl<'a> Engine<'a> { /// Duplicate top stack element. @@ -166,29 +168,30 @@ impl<'a> Engine<'a> { /// /// See /// and - pub(super) fn op_push(&mut self, args: &Args) -> OpResult { - self.value_stack.push_args(args) + pub(super) fn op_push(&mut self, operands: &InlineOperands) -> OpResult { + self.value_stack.push_inline_operands(operands) } } #[cfg(test)] mod tests { - use super::super::{super::code_state::MockArgs, MockEngine}; + use super::super::MockEngine; + use read_fonts::tables::truetype::bytecode::MockInlineOperands; #[test] fn stack_ops() { let mut mock = MockEngine::new(); let mut engine = mock.engine(); - let byte_args = MockArgs::from_bytes(&[2, 4, 6, 8]); - let word_args = MockArgs::from_words(&[-2000, 4000, -6000, 8000]); + let byte_args = MockInlineOperands::from_bytes(&[2, 4, 6, 8]); + let word_args = MockInlineOperands::from_words(&[-2000, 4000, -6000, 8000]); let initial_stack = byte_args - .args() + .operands() .values() - .chain(word_args.args().values()) + .chain(word_args.operands().values()) .collect::>(); // Push instructions - engine.op_push(&byte_args.args()).unwrap(); - engine.op_push(&word_args.args()).unwrap(); + engine.op_push(&byte_args.operands()).unwrap(); + engine.op_push(&word_args.operands()).unwrap(); assert_eq!(engine.value_stack.values(), initial_stack); // DEPTH[] engine.op_depth().unwrap(); diff --git a/skrifa/src/outline/glyf/hint/error.rs b/skrifa/src/outline/glyf/hint/error.rs index 89fa018a3..a5c392280 100644 --- a/skrifa/src/outline/glyf/hint/error.rs +++ b/skrifa/src/outline/glyf/hint/error.rs @@ -1,5 +1,7 @@ //! Hinting error definitions. +use read_fonts::tables::truetype::bytecode::DecodeError; + /// Errors that may occur when interpreting TrueType bytecode. #[derive(Clone, Debug)] pub enum HintErrorKind { @@ -71,3 +73,9 @@ impl core::fmt::Display for HintErrorKind { } } } + +impl From for HintErrorKind { + fn from(_: DecodeError) -> Self { + Self::UnexpectedEndOfBytecode + } +} diff --git a/skrifa/src/outline/glyf/hint/value_stack.rs b/skrifa/src/outline/glyf/hint/value_stack.rs index 317805290..0a5090397 100644 --- a/skrifa/src/outline/glyf/hint/value_stack.rs +++ b/skrifa/src/outline/glyf/hint/value_stack.rs @@ -1,6 +1,8 @@ //! Value stack for TrueType interpreter. +//! +use read_fonts::tables::truetype::bytecode::InlineOperands; -use super::{code_state::Args, error::HintErrorKind}; +use super::error::HintErrorKind; use HintErrorKind::{ValueStackOverflow, ValueStackUnderflow}; @@ -50,15 +52,15 @@ impl<'a> ValueStack<'a> { /// Implements the PUSHB[], PUSHW[], NPUSHB[] and NPUSHW[] instructions. /// /// See - pub fn push_args(&mut self, args: &Args) -> Result<(), HintErrorKind> { - let push_count = args.len(); + pub fn push_inline_operands(&mut self, operands: &InlineOperands) -> Result<(), HintErrorKind> { + let push_count = operands.len(); let stack_base = self.top; for (stack_value, value) in self .values .get_mut(stack_base..stack_base + push_count) .ok_or(ValueStackOverflow)? .iter_mut() - .zip(args.values()) + .zip(operands.values()) { *stack_value = value; } @@ -201,7 +203,8 @@ impl<'a> ValueStack<'a> { #[cfg(test)] mod tests { - use super::{super::code_state::MockArgs, HintErrorKind, ValueStack}; + use super::{HintErrorKind, ValueStack}; + use read_fonts::tables::truetype::bytecode::MockInlineOperands; // The following are macros because functions can't return a new ValueStack // with a borrowed parameter. @@ -239,8 +242,8 @@ mod tests { fn push_args() { let mut stack = make_empty_stack!(&mut [0; 32]); let values = [-5, 2, 2845, 92, -26, 42, i16::MIN, i16::MAX]; - let mock_args = MockArgs::from_words(&values); - stack.push_args(&mock_args.args()).unwrap(); + let mock_args = MockInlineOperands::from_words(&values); + stack.push_inline_operands(&mock_args.operands()).unwrap(); let mut popped = vec![]; while !stack.is_empty() { popped.push(stack.pop().unwrap());