-
Notifications
You must be signed in to change notification settings - Fork 0
/
encoding.py
56 lines (40 loc) · 1.79 KB
/
encoding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from functools import reduce
from copy import deepcopy
def bitstring_to_bytes(bit_s):
return int(bit_s, 2).to_bytes((len(bit_s) + 7) // 8, byteorder='big')
def bytes_to_bit_string(bytes):
return bin(int(bytes.hex(), base = 16))[2:]
def gap_encoding(list_to_encode):
result = deepcopy(list_to_encode)
for i in range(1, len(result)):
result[i] = list_to_encode[i] - list_to_encode[i - 1]
return result
def gap_decoding(list_to_decode):
result = deepcopy(list_to_decode)
for i in range(1, len(result)):
result[i] = list_to_decode[i] + result[i - 1]
return result
def gamma_encoding(postings):
return "".join([get_length(get_offset(gap))+get_offset(gap) for gap in get_gaps_list(postings)])
def gamma_decoding(gamma):
num,length,offset,aux,res = 0,"","",0,[]
while gamma!="":
aux = gamma.find("0")
length = gamma[:aux]
if length == "":
res.append(1)
gamma = gamma[1:]
else:
offset = "1"+gamma[aux+1:aux+1+unary_decodification(length)]
res.append(int(offset,2))
gamma = gamma[aux+1+unary_decodification(length):]
return res
def get_offset(gap): return bin(gap)[3:]
def get_length(offset): return unary_codification(len(offset))+"0"
def unary_codification(gap): return "".join(["1" for _ in range(gap)])
def unary_decodification(gap): return int(gap[0]) if len(gap) == 1 else reduce(lambda x,y : int(x)+int(y),list(gap))
def get_gaps_list(posting_lists): return [posting_lists[0]]+[posting_lists[i]-posting_lists[i-1] for i in range(1,len(posting_lists))]
if __name__ == '__main__':
print(gamma_encoding([10,15,22,23,34,44,50,58]))
print(type(gamma_encoding([10,15,22,23,34,44,50,58])))
print(gamma_decoding(gamma_encoding([10,15,22,23,34,44,50,58])))