Skip to content

Commit

Permalink
remove no longer used stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
a10y committed Aug 14, 2024
1 parent 54deb7f commit 0fce563
Show file tree
Hide file tree
Showing 5 changed files with 6 additions and 164 deletions.
2 changes: 1 addition & 1 deletion src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use std::cmp::Ordering;
use std::collections::BinaryHeap;

use crate::find_longest::FindLongestSymbol;
use crate::{CodeMeta, Symbol, SymbolTable, MAX_CODE};
use crate::{Symbol, SymbolTable, MAX_CODE};

#[derive(Debug, Clone)]
struct Counter {
Expand Down
2 changes: 0 additions & 2 deletions src/find_longest/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::CodeMeta;

mod naive;

pub trait FindLongestSymbol {
Expand Down
2 changes: 1 addition & 1 deletion src/find_longest/naive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.

use crate::find_longest::FindLongestSymbol;
use crate::{CodeMeta, SymbolTable};
use crate::SymbolTable;

// Find the code that maps to a symbol with longest-match to a piece of text.
//
Expand Down
64 changes: 4 additions & 60 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#![allow(unused)]
#![doc = include_str!("../README.md")]

/// Throw a compiler error if a type isn't guaranteed to have a specific size in bytes.
Expand Down Expand Up @@ -184,34 +183,13 @@ impl CodeMeta {
Self(value)
}

/// Create a new code representing an escape byte.
fn new_escaped(byte: u8) -> Self {
Self::new(byte, true, 1)
}

/// Create a new code from a [`Symbol`].
fn new_symbol(code: u8, symbol: Symbol) -> Self {
assert_ne!(code, ESCAPE_CODE, "ESCAPE_CODE cannot be used for symbol");

Self::new(code, false, symbol.len() as u16)
}

/// Create a `Code` directly from a `u16` value.
///
/// # Panics
/// Panic if the value is ≥ the defined `CODE_MAX`.
fn from_u16(code: u16) -> Self {
assert!((code >> 12) <= 8, "len must be <= 8");

Self(code)
}

/// Returns true if the code is for an escape byte.
#[inline]
fn is_escape(&self) -> bool {
self.0 <= 255
}

#[inline]
fn code(&self) -> u8 {
self.0 as u8
Expand All @@ -226,11 +204,6 @@ impl CodeMeta {
fn len(&self) -> u16 {
self.0 >> 12
}

#[inline]
fn as_u16(&self) -> u16 {
self.0
}
}

impl Debug for CodeMeta {
Expand Down Expand Up @@ -284,7 +257,7 @@ impl Default for SymbolTable {
let mut table = Self {
symbols: [Symbol::ZERO; 511],
n_symbols: 0,
codes_twobyte: Vec::with_capacity(65_536),
codes_twobyte: vec![CodeMeta::EMPTY; 65_536],
lossy_pht: LossyPHT::new(),
};

Expand All @@ -293,16 +266,6 @@ impl Default for SymbolTable {
table.symbols[byte as usize] = Symbol::from_u8(byte);
}

// Populate the "codes" for twobytes to default to the escape sequence
// for the first byte
for first in 0..256 {
for _second in 0..256 {
// let default_code = CodeMeta::new_escaped(first as u8);
// table.codes_twobyte.push(default_code);
table.codes_twobyte.push(CodeMeta::EMPTY)
}
}

table
}
}
Expand Down Expand Up @@ -355,12 +318,7 @@ impl SymbolTable {
///
/// `in_ptr` and `out_ptr` must never be NULL or otherwise point to invalid memory.
#[inline(never)]
pub(crate) unsafe fn compress_word(
&self,
word: u64,
out_ptr: *mut u8,
out_start: *mut u8,
) -> (usize, usize) {
pub(crate) unsafe fn compress_word(&self, word: u64, out_ptr: *mut u8) -> (usize, usize) {
// Speculatively write the first byte of `word` at offset 1. This is necessary if it is an escape, and
// if it isn't, it will be overwritten anyway.
//
Expand Down Expand Up @@ -412,7 +370,6 @@ impl SymbolTable {

let mut in_ptr = plaintext.as_ptr();
let mut out_ptr = values.as_mut_ptr();
let out_start = values.as_mut_ptr();

// SAFETY: `end` will point just after the end of the `plaintext` slice.
let in_end = unsafe { in_ptr.byte_add(plaintext.len()) };
Expand All @@ -426,7 +383,7 @@ impl SymbolTable {
// Load a full 8-byte word of data from in_ptr.
// SAFETY: caller asserts in_ptr is not null. we may read past end of pointer though.
let word: u64 = (in_ptr as *const u64).read_unaligned();
let (advance_in, advance_out) = self.compress_word(word, out_ptr, out_start);
let (advance_in, advance_out) = self.compress_word(word, out_ptr);
in_ptr = in_ptr.byte_add(advance_in);
out_ptr = out_ptr.byte_add(advance_out);
};
Expand All @@ -446,7 +403,7 @@ impl SymbolTable {
unsafe {
// Load a full 8-byte word of data from in_ptr.
// SAFETY: caller asserts in_ptr is not null. we may read past end of pointer though.
let (advance_in, advance_out) = self.compress_word(last_word, out_ptr, out_start);
let (advance_in, advance_out) = self.compress_word(last_word, out_ptr);
in_ptr = in_ptr.byte_add(advance_in);
out_ptr = out_ptr.byte_add(advance_out);

Expand Down Expand Up @@ -541,19 +498,6 @@ fn advance_8byte_word(word: u64, bytes: usize) -> u64 {
}
}

fn advance_8byte_word_bits(word: u64, bits: usize) -> u64 {
// shift the word off the right-end, because little endian means the first
// char is stored in the LSB.
//
// Note that even though this looks like it branches, Rust compiles this to a
// conditional move instruction. See `<https://godbolt.org/z/Pbvre65Pq>`
if bits == 64 {
0
} else {
word >> bits
}
}

fn compare_masked(left: u64, right: u64, ignored_bits: u16) -> bool {
let mask = if ignored_bits == 64 {
0
Expand Down
100 changes: 0 additions & 100 deletions src/lossy_pht.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use std::fmt::Debug;
use std::fmt::Formatter;

use crate::CodeMeta;
use crate::Symbol;
Expand All @@ -12,94 +11,6 @@ use crate::MAX_CODE;
/// vendors split the L1 cache into 32KB of instruction and 32KB of data.
pub const HASH_TABLE_SIZE: usize = 1 << 11;

/// Bit-packed metadata for a [`TableEntry`]
///
/// Bitpacked layout:
///
/// bits 9-15: ignored bits in the symbol. Equivalent to 64 - symbol.len()*8
/// bit 8: the "unused" flag
/// bits 0-7: code value (0-254)
#[derive(Clone, Copy)]
#[repr(C)]
pub(crate) struct PackedMeta(u16);

assert_sizeof!(PackedMeta => 2);

impl PackedMeta {
/// Constant unused instance.
///
/// All bits are set, corresponding to
///
/// 6 bits set for `ignored bits`
/// 1 unused bit
/// 1 bit to indicate the `unused` flag
/// 8 bits of `code` data
pub const UNUSED: Self = Self(0b10000001_11111111);

/// The 8th bit toggles if the slot is unused or not.
const UNUSED_FLAG: u16 = 1 << 8;

/// Create a new `PackedSymbolMeta` from raw parts.
///
/// # Panics
/// If `len` > 8 or `code` > [`Code::CODE_MAX`]
pub fn new(len: u16, code: u8) -> Self {
assert!(len <= 8, "cannot construct PackedCode with len > 8");

let ignored_bits = 64 - 8 * len;

let packed = (ignored_bits << 9) | (code as u16);
Self(packed)
}

/// Import a `PackedSymbolMeta` from a raw `u16`.
pub fn from_u16(value: u16) -> Self {
assert!(
(value >> 9) <= 64,
"cannot construct PackedCode with len > 8"
);

Self(value)
}

/// Get the number of ignored bits in the corresponding symbol's `u64` representation.
///
/// Always <= 64
#[inline]
pub(crate) fn ignored_bits(&self) -> u16 {
self.0 >> 9
}

/// Get the code value.
#[inline]
pub(crate) fn code(&self) -> u8 {
self.0 as u8
}

/// Check if the unused flag is set
#[inline]
pub(crate) fn is_unused(&self) -> bool {
(self.0 & Self::UNUSED_FLAG) != 0
}
}

impl Default for PackedMeta {
fn default() -> Self {
// The default implementation of a `PackedMeta` is one where only the `UNUSED_FLAG` is set,
// representing an unused slot in the table.
Self::UNUSED
}
}

impl Debug for PackedMeta {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("PackedCode")
.field("ignored_bits", &self.ignored_bits())
.field("code", &self.code())
.finish()
}
}

/// A single entry in the [`SymbolTable`].
///
/// `TableEntry` is based on the `Symbol` class outlined in Algorithm 4 of the FSST paper. See
Expand Down Expand Up @@ -205,14 +116,3 @@ impl Default for LossyPHT {
Self::new()
}
}

#[cfg(test)]
mod test {
use crate::lossy_pht::PackedMeta;

#[test]
fn test_packedmeta() {
assert!(PackedMeta::UNUSED.is_unused());
assert_eq!(PackedMeta::UNUSED.ignored_bits(), 64);
}
}

0 comments on commit 0fce563

Please sign in to comment.