-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adds an opcode definition, instruction representation and decoder for TrueType bytecode. Also renames Args -> InlineOperands and moves the type from skrifa to read-fonts.
- Loading branch information
Showing
11 changed files
with
1,252 additions
and
123 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
//! TrueType (glyf) common code. | ||
|
||
pub mod bytecode; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
//! TrueType hinting bytecode. | ||
|
||
mod decode; | ||
mod instruction; | ||
mod opcode; | ||
|
||
pub use decode::{decode_all, DecodeError, Decoder}; | ||
pub use instruction::{InlineOperands, Instruction}; | ||
pub use opcode::Opcode; | ||
|
||
// Exported publicly for use by skrifa when the scaler_test feature is | ||
// enabled. | ||
#[cfg(any(test, feature = "scaler_test"))] | ||
pub use instruction::MockInlineOperands; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,237 @@ | ||
//! TrueType bytecode decoder. | ||
|
||
use super::{InlineOperands, Instruction, Opcode}; | ||
|
||
/// An error returned by [`Decoder::decode`] if the end of the bytecode | ||
/// stream is reached unexpectedly. | ||
#[derive(Copy, Clone, Debug)] | ||
pub struct DecodeError(()); | ||
|
||
impl std::fmt::Display for DecodeError { | ||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
f.write_str("unexpected end of bytecode") | ||
} | ||
} | ||
|
||
/// Decodes instructions from TrueType bytecode. | ||
#[derive(Copy, Clone)] | ||
pub struct Decoder<'a> { | ||
/// The bytecode for the program. | ||
pub bytecode: &'a [u8], | ||
/// The "program counter" or current offset into the bytecode. | ||
pub pc: usize, | ||
} | ||
|
||
impl<'a> Decoder<'a> { | ||
/// Creates a new decoder for the given bytecode and program counter. | ||
pub fn new(bytecode: &'a [u8], pc: usize) -> Self { | ||
Self { bytecode, pc } | ||
} | ||
|
||
/// Decodes the next instruction. | ||
/// | ||
/// Returns `None` at the end of the bytecode stream. | ||
pub fn decode(&mut self) -> Option<Result<Instruction<'a>, DecodeError>> { | ||
let opcode = Opcode::from_byte(*self.bytecode.get(self.pc)?); | ||
Some(self.decode_inner(opcode)) | ||
} | ||
|
||
fn decode_inner(&mut self, opcode: Opcode) -> Result<Instruction<'a>, DecodeError> { | ||
let mut opcode_len = opcode.len(); | ||
let mut count_len = 0; | ||
// If the opcode length is negative the next byte contains the number | ||
// of inline operands and |opcode_len| is the size of each operand. | ||
// <https://gitlab.freedesktop.org/freetype/freetype/-/blob/57617782464411201ce7bbc93b086c1b4d7d84a5/src/truetype/ttinterp.c#L7046> | ||
if opcode_len < 0 { | ||
let inline_count = *self.bytecode.get(self.pc + 1).ok_or(DecodeError(()))?; | ||
opcode_len = -opcode_len * inline_count as i32 + 2; | ||
count_len = 1; | ||
} | ||
let opcode_len = opcode_len as usize; | ||
let pc = self.pc; | ||
let next_pc = pc + opcode_len; | ||
// Skip opcode and potential inline operand count byte. | ||
let inline_start = pc + 1 + count_len; | ||
let inline_size = next_pc - inline_start; | ||
let mut inline_operands = InlineOperands::default(); | ||
if inline_size > 0 { | ||
inline_operands.bytes = self | ||
.bytecode | ||
.get(inline_start..inline_start + inline_size) | ||
.ok_or(DecodeError(()))?; | ||
inline_operands.is_words = opcode.is_push_words(); | ||
} | ||
self.pc += opcode_len; | ||
Ok(Instruction { | ||
opcode, | ||
inline_operands, | ||
pc, | ||
}) | ||
} | ||
} | ||
|
||
/// Returns an iterator that yields all instructions in the given bytecode | ||
/// starting at the specified program counter. | ||
pub fn decode_all( | ||
bytecode: &[u8], | ||
pc: usize, | ||
) -> impl Iterator<Item = Result<Instruction<'_>, DecodeError>> + '_ + Clone { | ||
let mut decoder = Decoder::new(bytecode, pc); | ||
std::iter::from_fn(move || decoder.decode()) | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::Opcode; | ||
|
||
#[test] | ||
fn mixed_ops() { | ||
let mut enc = Encoder::default(); | ||
// intermix push and non-push ops of various sizes to test boundary | ||
// conditions | ||
let cases: &[(Opcode, &[i16])] = &[ | ||
(Opcode::PUSHB100, &[1, 2, 3, 255, 5]), | ||
(Opcode::PUSHW010, &[-1, 4508, -3]), | ||
(Opcode::IUP0, &[]), | ||
(Opcode::NPUSHB, &[55; 255]), | ||
(Opcode::MDRP00110, &[]), | ||
(Opcode::NPUSHW, &[i16::MIN; 32]), | ||
(Opcode::LOOPCALL, &[]), | ||
(Opcode::FLIPOFF, &[]), | ||
( | ||
Opcode::PUSHW011, | ||
&[i16::MIN, i16::MIN / 2, i16::MAX, i16::MAX / 2], | ||
), | ||
(Opcode::GETVARIATION, &[]), | ||
]; | ||
for (opcode, values) in cases { | ||
if !values.is_empty() { | ||
enc.encode_push(values); | ||
} else { | ||
enc.encode(*opcode); | ||
} | ||
} | ||
let all_ins = super::decode_all(&enc.0, 0) | ||
.map(|ins| ins.unwrap()) | ||
.collect::<Vec<_>>(); | ||
for (ins, (expected_opcode, expected_values)) in all_ins.iter().zip(cases) { | ||
assert_eq!(ins.opcode, *expected_opcode); | ||
let values = ins | ||
.inline_operands | ||
.values() | ||
.map(|v| v as i16) | ||
.collect::<Vec<_>>(); | ||
assert_eq!(&values, expected_values); | ||
} | ||
} | ||
|
||
#[test] | ||
fn non_push_ops() { | ||
// test decoding of all single byte (non-push) opcodes | ||
let non_push_ops: Vec<_> = (0..=255) | ||
.filter(|b| !Opcode::from_byte(*b).is_push()) | ||
.collect(); | ||
let decoded: Vec<_> = super::decode_all(&non_push_ops, 0) | ||
.map(|ins| ins.unwrap().opcode as u8) | ||
.collect(); | ||
assert_eq!(non_push_ops, decoded); | ||
} | ||
|
||
#[test] | ||
fn real_bytecode() { | ||
// taken from NotoSerif-Regular, glyph Rturnedsmall, gid 1272 | ||
let bytecode = [ | ||
181, 5, 1, 9, 3, 1, 76, 75, 176, 45, 80, 88, 64, 35, 0, 3, 0, 9, 7, 3, 9, 105, 6, 4, 2, | ||
1, 1, 2, 97, 5, 1, 2, 2, 109, 77, 11, 8, 2, 7, 7, 0, 95, 10, 1, 0, 0, 107, 0, 78, 27, | ||
64, 41, 0, 7, 8, 0, 8, 7, 114, 0, 3, 0, 9, 8, 3, 9, 105, 6, 4, 2, 1, 1, 2, 97, 5, 1, 2, | ||
2, 109, 77, 11, 1, 8, 8, 0, 95, 10, 1, 0, 0, 107, 0, 78, 89, 64, 31, 37, 36, 1, 0, 40, | ||
38, 36, 44, 37, 44, 34, 32, 27, 25, 24, 23, 22, 20, 17, 16, 12, 10, 9, 8, 0, 35, 1, 35, | ||
12, 13, 22, 43, | ||
]; | ||
// comments below contain the ttx assembly | ||
let expected = [ | ||
// PUSHB[ ] /* 6 values pushed */ | ||
// 5 1 9 3 1 76 | ||
"PUSHB[5] 5 1 9 3 1 76", | ||
// MPPEM[ ] /* MeasurePixelPerEm */ | ||
"MPPEM", | ||
// PUSHB[ ] /* 1 value pushed */ | ||
// 45 | ||
"PUSHB[0] 45", | ||
// LT[ ] /* LessThan */ | ||
"LT", | ||
// IF[ ] /* If */ | ||
"IF", | ||
// NPUSHB[ ] /* 35 values pushed */ | ||
// 0 3 0 9 7 3 9 105 6 4 2 1 1 2 97 5 1 2 2 109 77 11 8 2 7 | ||
// 7 0 95 10 1 0 0 107 0 78 | ||
"NPUSHB 0 3 0 9 7 3 9 105 6 4 2 1 1 2 97 5 1 2 2 109 77 11 8 2 7 7 0 95 10 1 0 0 107 0 78", | ||
// ELSE[ ] /* Else */ | ||
"ELSE", | ||
// NPUSHB[ ] /* 41 values pushed */ | ||
// 0 7 8 0 8 7 114 0 3 0 9 8 3 9 105 6 4 2 1 1 2 97 5 1 2 | ||
// 2 109 77 11 1 8 8 0 95 10 1 0 0 107 0 78 | ||
"NPUSHB 0 7 8 0 8 7 114 0 3 0 9 8 3 9 105 6 4 2 1 1 2 97 5 1 2 2 109 77 11 1 8 8 0 95 10 1 0 0 107 0 78", | ||
// EIF[ ] /* EndIf */ | ||
"EIF", | ||
// NPUSHB[ ] /* 31 values pushed */ | ||
// 37 36 1 0 40 38 36 44 37 44 34 32 27 25 24 23 22 20 17 16 12 10 9 8 0 | ||
// 35 1 35 12 13 22 | ||
"NPUSHB 37 36 1 0 40 38 36 44 37 44 34 32 27 25 24 23 22 20 17 16 12 10 9 8 0 35 1 35 12 13 22", | ||
// CALL[ ] /* CallFunction */ | ||
"CALL", | ||
]; | ||
let decoded: Vec<_> = super::decode_all(&bytecode, 0) | ||
.map(|ins| ins.unwrap()) | ||
.collect(); | ||
let decoded_asm: Vec<_> = decoded.iter().map(|ins| ins.to_string()).collect(); | ||
assert_eq!(decoded_asm, expected); | ||
} | ||
|
||
/// Simple encoder used for testing. | ||
#[derive(Default)] | ||
struct Encoder(Vec<u8>); | ||
|
||
impl Encoder { | ||
pub fn encode(&mut self, opcode: Opcode) { | ||
assert!(!opcode.is_push(), "use the encode_push method instead"); | ||
self.0.push(opcode as u8); | ||
} | ||
|
||
pub fn encode_push(&mut self, values: &[i16]) { | ||
if values.is_empty() { | ||
return; | ||
} | ||
let is_bytes = values.iter().all(|&x| x >= 0 && x <= u8::MAX as _); | ||
if values.len() < 256 { | ||
if is_bytes { | ||
if values.len() <= 8 { | ||
let opcode = | ||
Opcode::from_byte(Opcode::PUSHB000 as u8 + values.len() as u8 - 1); | ||
self.0.push(opcode as u8); | ||
} else { | ||
self.0.push(Opcode::NPUSHB as _); | ||
self.0.push(values.len() as _); | ||
} | ||
self.0.extend(values.iter().map(|&x| x as u8)); | ||
} else { | ||
if values.len() <= 8 { | ||
let opcode = | ||
Opcode::from_byte(Opcode::PUSHW000 as u8 + values.len() as u8 - 1); | ||
self.0.push(opcode as u8); | ||
} else { | ||
self.0.push(Opcode::NPUSHW as _); | ||
self.0.push(values.len() as _) | ||
} | ||
for &value in values { | ||
let value = value as u16; | ||
self.0.push((value >> 8) as _); | ||
self.0.push((value & 0xFF) as _); | ||
} | ||
} | ||
} else { | ||
panic!("too many values to push in a single instruction"); | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
/// Decoded representation of a TrueType instruction. | ||
use super::Opcode; | ||
|
||
/// Decoded TrueType instruction. | ||
#[derive(Copy, Clone, Debug)] | ||
pub struct Instruction<'a> { | ||
/// Operation code. | ||
pub opcode: Opcode, | ||
/// Instruction operands that were decoded from the bytecode. | ||
pub inline_operands: InlineOperands<'a>, | ||
/// Program counter -- offset into the bytecode where this | ||
/// instruction was decoded. | ||
pub pc: usize, | ||
} | ||
|
||
impl std::fmt::Display for Instruction<'_> { | ||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { | ||
write!(f, "{}", self.opcode.name())?; | ||
for value in self.inline_operands.values() { | ||
write!(f, " {value}")?; | ||
} | ||
Ok(()) | ||
} | ||
} | ||
|
||
/// Sequence of instruction operands that are encoded directly in the bytecode. | ||
/// | ||
/// This is only used for push instructions. | ||
#[derive(Copy, Clone, Default, Debug)] | ||
pub struct InlineOperands<'a> { | ||
pub(super) bytes: &'a [u8], | ||
pub(super) is_words: bool, | ||
} | ||
|
||
impl<'a> InlineOperands<'a> { | ||
/// Returns the number of operands. | ||
#[inline] | ||
pub fn len(&self) -> usize { | ||
if self.is_words { | ||
self.bytes.len() / 2 | ||
} else { | ||
self.bytes.len() | ||
} | ||
} | ||
|
||
/// Returns true if there are no operands. | ||
pub fn is_empty(&self) -> bool { | ||
self.bytes.is_empty() | ||
} | ||
|
||
/// Returns an iterator over the operand values. | ||
#[inline] | ||
pub fn values(&self) -> impl Iterator<Item = i32> + 'a + Clone { | ||
let (bytes, words) = if self.is_words { | ||
(&[][..], self.bytes) | ||
} else { | ||
(self.bytes, &[][..]) | ||
}; | ||
bytes | ||
.iter() | ||
.map(|byte| *byte as u32 as i32) | ||
.chain(words.chunks_exact(2).map(|chunk| { | ||
let word = ((chunk[0] as u16) << 8) | chunk[1] as u16; | ||
// Double cast to ensure sign extension | ||
word as i16 as i32 | ||
})) | ||
} | ||
} | ||
|
||
/// Mock for testing inline operands. | ||
#[cfg(any(test, feature = "scaler_test"))] | ||
pub struct MockInlineOperands { | ||
bytes: Vec<u8>, | ||
is_words: bool, | ||
} | ||
|
||
#[cfg(any(test, feature = "scaler_test"))] | ||
impl MockInlineOperands { | ||
pub fn from_bytes(bytes: &[u8]) -> Self { | ||
Self { | ||
bytes: bytes.into(), | ||
is_words: false, | ||
} | ||
} | ||
|
||
pub fn from_words(words: &[i16]) -> Self { | ||
Self { | ||
bytes: words | ||
.iter() | ||
.map(|word| *word as u16) | ||
.flat_map(|word| vec![(word >> 8) as u8, word as u8]) | ||
.collect(), | ||
is_words: true, | ||
} | ||
} | ||
|
||
pub fn operands(&self) -> InlineOperands { | ||
InlineOperands { | ||
bytes: &self.bytes, | ||
is_words: self.is_words, | ||
} | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::MockInlineOperands; | ||
|
||
#[test] | ||
fn byte_operands() { | ||
let values = [5, 2, 85, 92, 26, 42, u8::MIN, u8::MAX]; | ||
let mock = MockInlineOperands::from_bytes(&values); | ||
let decoded = mock.operands().values().collect::<Vec<_>>(); | ||
assert!(values.iter().map(|x| *x as i32).eq(decoded.iter().copied())); | ||
} | ||
|
||
#[test] | ||
fn word_operands() { | ||
let values = [-5, 2, 2845, 92, -26, 42, i16::MIN, i16::MAX]; | ||
let mock = MockInlineOperands::from_words(&values); | ||
let decoded = mock.operands().values().collect::<Vec<_>>(); | ||
assert!(values.iter().map(|x| *x as i32).eq(decoded.iter().copied())); | ||
} | ||
} |
Oops, something went wrong.