Skip to content

Commit

Permalink
Optimize DenseBinaryMatrix
Browse files Browse the repository at this point in the history
Switch to a single contiguous vector instead of vec of vecs

This improves performance by ~5%, especially for smaller symbol counts
  • Loading branch information
cberner committed Dec 8, 2020
1 parent 7b0d1c5 commit 102c6a5
Show file tree
Hide file tree
Showing 4 changed files with 187 additions and 82 deletions.
62 changes: 31 additions & 31 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,16 @@ The following were run on an Intel Core i5-6600K @ 3.50GHz

```
Symbol size: 1280 bytes (without pre-built plan)
symbol count = 10, encoded 127 MB in 0.484secs, throughput: 2115.5Mbit/s
symbol count = 100, encoded 127 MB in 0.509secs, throughput: 2010.7Mbit/s
symbol count = 250, encoded 127 MB in 0.482secs, throughput: 2122.3Mbit/s
symbol count = 500, encoded 127 MB in 0.463secs, throughput: 2204.1Mbit/s
symbol count = 1000, encoded 126 MB in 0.492secs, throughput: 2064.3Mbit/s
symbol count = 2000, encoded 126 MB in 0.565secs, throughput: 1797.6Mbit/s
symbol count = 5000, encoded 122 MB in 0.594secs, throughput: 1644.0Mbit/s
symbol count = 10000, encoded 122 MB in 0.716secs, throughput: 1363.9Mbit/s
symbol count = 20000, encoded 122 MB in 1.059secs, throughput: 922.2Mbit/s
symbol count = 50000, encoded 122 MB in 1.508secs, throughput: 647.6Mbit/s
symbol count = 10, encoded 127 MB in 0.465secs, throughput: 2202.0Mbit/s
symbol count = 100, encoded 127 MB in 0.483secs, throughput: 2118.9Mbit/s
symbol count = 250, encoded 127 MB in 0.474secs, throughput: 2158.1Mbit/s
symbol count = 500, encoded 127 MB in 0.460secs, throughput: 2218.5Mbit/s
symbol count = 1000, encoded 126 MB in 0.490secs, throughput: 2072.7Mbit/s
symbol count = 2000, encoded 126 MB in 0.562secs, throughput: 1807.2Mbit/s
symbol count = 5000, encoded 122 MB in 0.578secs, throughput: 1689.6Mbit/s
symbol count = 10000, encoded 122 MB in 0.687secs, throughput: 1421.5Mbit/s
symbol count = 20000, encoded 122 MB in 1.019secs, throughput: 958.4Mbit/s
symbol count = 50000, encoded 122 MB in 1.432secs, throughput: 682.0Mbit/s
Symbol size: 1280 bytes (with pre-built plan)
symbol count = 10, encoded 127 MB in 0.220secs, throughput: 4654.2Mbit/s
Expand All @@ -47,27 +47,27 @@ symbol count = 20000, encoded 122 MB in 0.427secs, throughput: 2287.0Mbit/s
symbol count = 50000, encoded 122 MB in 0.540secs, throughput: 1808.4Mbit/s
Symbol size: 1280 bytes
symbol count = 10, decoded 127 MB in 0.706secs using 0.0% overhead, throughput: 1450.3Mbit/s
symbol count = 100, decoded 127 MB in 0.619secs using 0.0% overhead, throughput: 1653.4Mbit/s
symbol count = 250, decoded 127 MB in 0.568secs using 0.0% overhead, throughput: 1801.0Mbit/s
symbol count = 500, decoded 127 MB in 0.560secs using 0.0% overhead, throughput: 1822.3Mbit/s
symbol count = 1000, decoded 126 MB in 0.601secs using 0.0% overhead, throughput: 1689.9Mbit/s
symbol count = 2000, decoded 126 MB in 0.670secs using 0.0% overhead, throughput: 1515.9Mbit/s
symbol count = 5000, decoded 122 MB in 0.767secs using 0.0% overhead, throughput: 1273.2Mbit/s
symbol count = 10000, decoded 122 MB in 0.970secs using 0.0% overhead, throughput: 1006.8Mbit/s
symbol count = 20000, decoded 122 MB in 1.222secs using 0.0% overhead, throughput: 799.2Mbit/s
symbol count = 50000, decoded 122 MB in 2.046secs using 0.0% overhead, throughput: 477.3Mbit/s
symbol count = 10, decoded 127 MB in 0.698secs using 5.0% overhead, throughput: 1466.9Mbit/s
symbol count = 100, decoded 127 MB in 0.617secs using 5.0% overhead, throughput: 1658.7Mbit/s
symbol count = 250, decoded 127 MB in 0.565secs using 5.0% overhead, throughput: 1810.5Mbit/s
symbol count = 500, decoded 127 MB in 0.545secs using 5.0% overhead, throughput: 1872.5Mbit/s
symbol count = 1000, decoded 126 MB in 0.563secs using 5.0% overhead, throughput: 1804.0Mbit/s
symbol count = 2000, decoded 126 MB in 0.599secs using 5.0% overhead, throughput: 1695.5Mbit/s
symbol count = 5000, decoded 122 MB in 0.689secs using 5.0% overhead, throughput: 1417.4Mbit/s
symbol count = 10000, decoded 122 MB in 0.881secs using 5.0% overhead, throughput: 1108.5Mbit/s
symbol count = 20000, decoded 122 MB in 1.117secs using 5.0% overhead, throughput: 874.3Mbit/s
symbol count = 50000, decoded 122 MB in 1.848secs using 5.0% overhead, throughput: 528.4Mbit/s
symbol count = 10, decoded 127 MB in 0.679secs using 0.0% overhead, throughput: 1508.0Mbit/s
symbol count = 100, decoded 127 MB in 0.583secs using 0.0% overhead, throughput: 1755.5Mbit/s
symbol count = 250, decoded 127 MB in 0.564secs using 0.0% overhead, throughput: 1813.7Mbit/s
symbol count = 500, decoded 127 MB in 0.539secs using 0.0% overhead, throughput: 1893.3Mbit/s
symbol count = 1000, decoded 126 MB in 0.571secs using 0.0% overhead, throughput: 1778.7Mbit/s
symbol count = 2000, decoded 126 MB in 0.708secs using 0.0% overhead, throughput: 1434.5Mbit/s
symbol count = 5000, decoded 122 MB in 0.769secs using 0.0% overhead, throughput: 1269.9Mbit/s
symbol count = 10000, decoded 122 MB in 0.902secs using 0.0% overhead, throughput: 1082.7Mbit/s
symbol count = 20000, decoded 122 MB in 1.135secs using 0.0% overhead, throughput: 860.4Mbit/s
symbol count = 50000, decoded 122 MB in 1.929secs using 0.0% overhead, throughput: 506.3Mbit/s
symbol count = 10, decoded 127 MB in 0.669secs using 5.0% overhead, throughput: 1530.5Mbit/s
symbol count = 100, decoded 127 MB in 0.582secs using 5.0% overhead, throughput: 1758.5Mbit/s
symbol count = 250, decoded 127 MB in 0.550secs using 5.0% overhead, throughput: 1859.9Mbit/s
symbol count = 500, decoded 127 MB in 0.520secs using 5.0% overhead, throughput: 1962.5Mbit/s
symbol count = 1000, decoded 126 MB in 0.548secs using 5.0% overhead, throughput: 1853.3Mbit/s
symbol count = 2000, decoded 126 MB in 0.582secs using 5.0% overhead, throughput: 1745.1Mbit/s
symbol count = 5000, decoded 122 MB in 0.658secs using 5.0% overhead, throughput: 1484.1Mbit/s
symbol count = 10000, decoded 122 MB in 0.835secs using 5.0% overhead, throughput: 1169.5Mbit/s
symbol count = 20000, decoded 122 MB in 1.105secs using 5.0% overhead, throughput: 883.8Mbit/s
symbol count = 50000, decoded 122 MB in 1.784secs using 5.0% overhead, throughput: 547.4Mbit/s
```

### Public API
Expand Down
49 changes: 35 additions & 14 deletions src/iterators.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ pub struct ClonedOctetIter {
end_col: usize,
dense_elements: Option<Vec<u64>>,
dense_index: usize,
dense_word_index: usize,
dense_bit_index: usize,
sparse_elements: Option<Vec<(usize, Octet)>>,
sparse_index: usize,
}
Expand All @@ -29,16 +31,20 @@ impl Iterator for ClonedOctetIter {
return None;
} else {
let old_index = self.dense_index;
self.dense_index += 1;
let (word, bit) = DenseBinaryMatrix::bit_position(old_index);
let value = if self.dense_elements.as_ref().unwrap()[word]
& DenseBinaryMatrix::select_mask(bit)
let value = if self.dense_elements.as_ref().unwrap()[self.dense_word_index]
& DenseBinaryMatrix::select_mask(self.dense_bit_index)
== 0
{
Octet::zero()
} else {
Octet::one()
};
self.dense_index += 1;
self.dense_bit_index += 1;
if self.dense_bit_index == 64 {
self.dense_bit_index = 0;
self.dense_word_index += 1;
}
return Some((old_index, value));
}
}
Expand All @@ -49,8 +55,10 @@ pub struct OctetIter<'a> {
sparse: bool,
start_col: usize,
end_col: usize,
dense_elements: Option<&'a Vec<u64>>,
dense_elements: Option<&'a [u64]>,
dense_index: usize,
dense_word_index: usize,
dense_bit_index: usize,
sparse_elements: Option<&'a SparseBinaryVec>,
sparse_index: usize,
sparse_physical_col_to_logical: Option<&'a [u16]>,
Expand All @@ -69,6 +77,8 @@ impl<'a> OctetIter<'a> {
end_col,
dense_elements: None,
dense_index: 0,
dense_word_index: 0,
dense_bit_index: 0,
sparse_elements: Some(sparse_elements),
sparse_index: 0,
sparse_physical_col_to_logical: Some(sparse_physical_col_to_logical),
Expand All @@ -79,14 +89,17 @@ impl<'a> OctetIter<'a> {
pub fn new_dense_binary(
start_col: usize,
end_col: usize,
dense_elements: &'a Vec<u64>,
start_bit: usize,
dense_elements: &'a [u64],
) -> OctetIter<'a> {
OctetIter {
sparse: false,
start_col: 0,
end_col,
dense_elements: Some(dense_elements),
dense_index: start_col,
dense_word_index: 0,
dense_bit_index: start_bit,
sparse_elements: None,
sparse_index: 0,
sparse_physical_col_to_logical: None,
Expand All @@ -111,8 +124,10 @@ impl<'a> OctetIter<'a> {
ClonedOctetIter {
sparse: self.sparse,
end_col: self.end_col,
dense_elements: self.dense_elements.cloned(),
dense_elements: self.dense_elements.map(|x| x.to_vec()),
dense_index: self.dense_index,
dense_word_index: self.dense_word_index,
dense_bit_index: self.dense_bit_index,
sparse_elements,
sparse_index: self.sparse_index,
}
Expand Down Expand Up @@ -144,13 +159,19 @@ impl<'a> Iterator for OctetIter<'a> {
} else {
let old_index = self.dense_index;
self.dense_index += 1;
let (word, bit) = DenseBinaryMatrix::bit_position(old_index);
let value =
if self.dense_elements.unwrap()[word] & DenseBinaryMatrix::select_mask(bit) == 0 {
Octet::zero()
} else {
Octet::one()
};
let value = if self.dense_elements.unwrap()[self.dense_word_index]
& DenseBinaryMatrix::select_mask(self.dense_bit_index)
== 0
{
Octet::zero()
} else {
Octet::one()
};
self.dense_bit_index += 1;
if self.dense_bit_index == 64 {
self.dense_bit_index = 0;
self.dense_word_index += 1;
}
return Some((old_index, value));
}
}
Expand Down
Loading

0 comments on commit 102c6a5

Please sign in to comment.