diff --git a/src/yaz0.rs b/src/yaz0.rs index 1e6d400..6123576 100644 --- a/src/yaz0.rs +++ b/src/yaz0.rs @@ -1,5 +1,7 @@ // Based on https://gist.github.com/Mr-Wiseguy/6cca110d74b32b5bb19b76cfa2d7ab4f +use std::cmp; + use crate::utils; pub fn decompress_yaz0(bytes: &[u8]) -> Box<[u8]> { @@ -60,6 +62,265 @@ pub fn decompress_yaz0(bytes: &[u8]) -> Box<[u8]> { ret.into_boxed_slice() } -pub fn compress_yaz0(_bytes: &[u8]) -> Box<[u8]> { - panic!("Not implemented") +fn divide_round_up(a: usize, b: usize) -> usize { + (a + b - 1) / b +} + +pub fn compress_yaz0(bytes: &[u8]) -> Box<[u8]> { + let input_size = bytes.len(); + // Worst-case size for output is zero compression on the input, meaning the input size plus the number of layout bytes plus the Yaz0 header. + // There would be one layout byte for every 8 input bytes, so the worst-case size is: + // input_size + ROUND_UP_DIVIDE(input_size, 8) + 0x10 + let mut output: Vec = + Vec::with_capacity(input_size + divide_round_up(input_size, 8) + 0x10); + + output.extend(b"Yaz0"); + output.extend((input_size as u32).to_be_bytes()); + // padding + output.extend(&[0, 0, 0, 0, 0, 0, 0, 0]); + + output.push(0); + let mut index_cur_layout_byte: usize = 0x10; + let mut index_out_ptr: usize = index_cur_layout_byte + 1; + let mut input_pos: usize = 0; + let mut cur_layout_bit: u8 = 0x80; + + while input_pos < input_size { + let mut group_pos: i32 = 0; + let mut group_size: u32 = 0; + + search( + input_pos, + input_size, + &mut group_pos, + &mut group_size, + bytes, + ); + + // If the group isn't larger than 2 bytes, copying the input without compression is smaller + if group_size <= 2 { + // Set the current layout bit to indicate that this is an uncompressed byte + output[index_cur_layout_byte] |= cur_layout_bit; + output.push(bytes[input_pos]); + input_pos += 1; + index_out_ptr += 1; + } else { + let mut new_size: u32 = 0; + let mut new_position: i32 = 0; + + // Search for a new group after one position after the current one + search( + input_pos + 1, + input_size, + &mut new_position, + &mut new_size, + bytes, + ); + + // If the new group is better than the current group by at least 2 bytes, use it one instead + if new_size >= group_size + 2 { + // Mark the current layout bit to skip compressing this byte, as the next input position yielded better compression + output[index_cur_layout_byte] |= cur_layout_bit; + // Copy the input byte to the output + output.push(bytes[input_pos]); + input_pos += 1; + index_out_ptr += 1; + + // Advance to the next layout bit + cur_layout_bit >>= 1; + + if cur_layout_bit == 0 { + cur_layout_bit = 0x80; + index_cur_layout_byte = index_out_ptr; + index_out_ptr += 1; + output[index_cur_layout_byte] = 0; + } + + group_size = new_size; + group_pos = new_position; + } + + // Calculate the offset for the current group + let group_offset: u32 = (input_pos as i32 - group_pos - 1) as u32; + + // Determine which encoding to use for the current group + if group_size >= 0x12 { + // Three bytes, 0RRRNN + output.push((group_offset >> 8) as u8); + index_out_ptr += 1; + output.push((group_offset & 0xFF) as u8); + index_out_ptr += 1; + output.push((group_size - 0x12) as u8); + index_out_ptr += 1; + } else { + // Two bytes, NRRR + output.push((group_offset >> 8) as u8 | ((group_size - 2) << 4) as u8); + index_out_ptr += 1; + output.push((group_offset & 0xFF) as u8); + index_out_ptr += 1; + } + + // Move forward in the input by the size of the group + input_pos += group_size as usize; + } + + // Advance to the next layout bit + cur_layout_bit >>= 1; + + if cur_layout_bit == 0 { + cur_layout_bit = 0x80; + index_cur_layout_byte = index_out_ptr; + output.push(0); + index_out_ptr += 1; + } + } + + output.shrink_to_fit(); + output.into_boxed_slice() +} + +fn search( + input_pos: usize, + input_size: usize, + pos_out: &mut i32, + size_out: &mut u32, + data_in: &[u8], +) { + let mut cur_size: usize = 3; + let mut found_pos: isize = 0; + let mut search_pos: usize = cmp::max(input_pos as isize - 0x1000, 0) as usize; + let search_size = cmp::min(input_size - input_pos, 0x111); + + if search_size >= 3 { + while search_pos < input_pos { + let found_offset = mischarsearch( + &data_in[input_pos..], + cur_size, + &data_in[search_pos..], + cur_size + input_pos - search_pos, + ); + + if found_offset >= input_pos - search_pos { + break; + } + + while cur_size < search_size { + if data_in[cur_size + search_pos + found_offset] != data_in[cur_size + input_pos] { + break; + } + cur_size += 1; + } + + if search_size == cur_size { + *pos_out = (found_offset + search_pos) as i32; + *size_out = cur_size as u32; + return; + } + + found_pos = (search_pos + found_offset) as isize; + search_pos = (found_pos + 1) as usize; + cur_size += 1; + } + + *pos_out = found_pos as i32; + if cur_size > 3 { + cur_size -= 1; + *size_out = cur_size as u32; + return; + } + } else { + *pos_out = 0; + } + *size_out = 0; +} + +fn mischarsearch(pattern: &[u8], pattern_len: usize, data: &[u8], data_len: usize) -> usize { + let mut skip_table = [0u16; 256]; + let mut i: isize; + //let mut k: usize; + + let mut v6: isize; + let mut j: isize; + + if pattern_len <= data_len { + initskip(pattern, pattern_len as i32, &mut skip_table); + + i = pattern_len as isize - 1; + loop { + if pattern[pattern_len - 1] == data[i as usize] { + i -= 1; + j = pattern_len as isize - 2; + if j < 0 { + return (i + 1) as usize; + } + + while pattern[j as usize] == data[i as usize] { + i -= 1; + j -= 1; + if j < 0 { + return (i + 1) as usize; + } + } + + v6 = pattern_len as isize - j; + + if skip_table[data[i as usize] as usize] as isize > v6 { + v6 = skip_table[data[i as usize] as usize] as isize; + } + } else { + v6 = skip_table[data[i as usize] as usize] as isize; + } + i += v6; + } + } + data_len +} + +fn initskip(pattern: &[u8], len: i32, skip: &mut [u16; 256]) { + skip.fill(len as u16); + + for i in 0..len { + skip[pattern[i as usize] as usize] = (len - i - 1) as u16; + } +} + +#[cfg(test)] +mod tests { + #[test] + fn test_matching_decompression() { + let compressed_file = include_bytes!("../test_data/Yaz0/1.Yaz0"); + let decompressed_file = include_bytes!("../test_data/Yaz0/1.bin"); + + let decompressed: Box<[u8]> = super::decompress_yaz0(compressed_file); + assert_eq!(decompressed_file, decompressed.as_ref()); + } + + #[test] + fn test_matching_compression() { + let compressed_file = include_bytes!("../test_data/Yaz0/1.Yaz0"); + let decompressed_file = include_bytes!("../test_data/Yaz0/1.bin"); + + let compressed = super::compress_yaz0(decompressed_file.as_slice()); + assert_eq!(compressed_file, compressed.as_ref()); + } + + #[test] + fn test_cycle_decompressed() { + let decompressed_file = include_bytes!("../test_data/Yaz0/1.bin"); + + assert_eq!( + decompressed_file, + super::decompress_yaz0(&super::compress_yaz0(decompressed_file.as_ref())).as_ref() + ); + } + + #[test] + fn test_cycle_compressed() { + let compressed_file = include_bytes!("../test_data/Yaz0/1.Yaz0"); + + assert_eq!( + compressed_file, + super::compress_yaz0(&super::decompress_yaz0(compressed_file.as_ref())).as_ref() + ); + } } diff --git a/test_data/.gitignore b/test_data/.gitignore index ed45cff..bfd1ca9 100644 --- a/test_data/.gitignore +++ b/test_data/.gitignore @@ -1,2 +1,3 @@ -!*.Yay0 !*.bin +!*.Yay0 +!*.Yaz0 diff --git a/test_data/Yaz0/1.Yaz0 b/test_data/Yaz0/1.Yaz0 new file mode 100644 index 0000000..1270e8b Binary files /dev/null and b/test_data/Yaz0/1.Yaz0 differ diff --git a/test_data/Yaz0/1.bin b/test_data/Yaz0/1.bin new file mode 100755 index 0000000..b9559a5 --- /dev/null +++ b/test_data/Yaz0/1.bin @@ -0,0 +1 @@ +ASCII文字と互換性を持たせるために、ASCIIと同じ部分は1バイト=asdfasdfasdfasdfasdfasdfasdfasdfasdf \ No newline at end of file