-
Notifications
You must be signed in to change notification settings - Fork 4
/
interpolative_coding.hpp
157 lines (131 loc) · 3.85 KB
/
interpolative_coding.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#pragma once
#include <vector>
#include <stdint.h>
#include "succinct/broadword.hpp"
namespace ds2i {
class bit_writer {
public:
bit_writer(std::vector<uint32_t>& buf)
: m_buf(buf)
, m_size(0)
, m_cur_word(nullptr)
{
m_buf.clear();
}
void write(uint32_t bits, uint32_t len)
{
if (!len) return;
uint32_t pos_in_word = m_size % 32;
m_size += len;
if (pos_in_word == 0) {
m_buf.push_back(bits);
} else {
*m_cur_word |= bits << pos_in_word;
if (len > 32 - pos_in_word) {
m_buf.push_back(bits >> (32 - pos_in_word));
}
}
m_cur_word = &m_buf.back();
}
size_t size() const {
return m_size;
}
void write_int(uint32_t val, uint32_t u)
{
assert(u > 0);
assert(val < u);
auto b = succinct::broadword::msb(u);
uint64_t m = (uint64_t(1) << (b + 1)) - u;
if (val < m) {
write(val, b);
} else {
val += m;
// since we use little-endian we must split the writes
write(val >> 1, b);
write(val & 1, 1);
}
}
void write_interpolative(uint32_t const* in,
size_t n,
uint32_t low,
uint32_t high)
{
if (!n) return;
assert(low <= high);
size_t h = n / 2;
uint32_t val = in[h];
write_int(val - low, high - low + 1);
write_interpolative(in, h, low, val);
write_interpolative(in + h + 1, n - h - 1, val, high);
}
private:
std::vector<uint32_t>& m_buf;
size_t m_size;
uint32_t* m_cur_word;
};
class bit_reader {
public:
bit_reader(uint32_t const* in)
: m_in(in)
, m_avail(0)
, m_buf(0)
, m_pos(0)
{}
size_t position() const
{
return m_pos;
}
uint32_t read(uint32_t len)
{
if (!len) return 0;
if (m_avail < len) {
m_buf |= uint64_t(*m_in++) << m_avail;
m_avail += 32;
}
uint32_t val = m_buf & ((uint64_t(1) << len) - 1);
m_buf >>= len;
m_avail -= len;
m_pos += len;
return val;
}
uint32_t read_int(uint32_t u)
{
assert(u > 0);
auto b = succinct::broadword::msb(u);
uint64_t m = (uint64_t(1) << (b + 1)) - u;
uint32_t val = read(b);
if (val >= m) {
val = (val << 1) + read(1) - m;
}
assert(val < u);
return val;
}
void read_interpolative(uint32_t* out,
size_t n,
uint32_t low,
uint32_t high)
{
assert(low <= high);
assert(n > 0);
size_t h = n / 2;
uint32_t val = low + read_int(high - low + 1);
out[h] = val;
if (n == 1) {
// optimization to avoid two unpredictable ifs
return;
}
// the two ifs are a bit ugly but it is faster than postponing them
if (h) {
read_interpolative(out, h, low, val);
}
if (n - h - 1) {
read_interpolative(out + h + 1, n - h - 1, val, high);
}
}
private:
uint32_t const* m_in;
uint32_t m_avail;
uint64_t m_buf;
size_t m_pos;
};
}