-
Notifications
You must be signed in to change notification settings - Fork 2
/
strata_estimator.go
89 lines (72 loc) · 1.96 KB
/
strata_estimator.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
package difference_digest
import (
"database/sql"
"fmt"
"math"
)
const (
stratumCount = 64
cellsCount = 80
)
// StrataEstimator is a data structure used to estimate the number of differences between 2 sets probablistically
type StrataEstimator struct {
Stratum []InvertibleBloomFilter
}
// NewStrataEstimator initalizes a new StrataEstimator
func NewStrataEstimator() *StrataEstimator {
se := StrataEstimator{
Stratum: make([]InvertibleBloomFilter, stratumCount),
}
for i := range se.Stratum {
se.Stratum[i] = *NewIBF(cellsCount)
}
return &se
}
// Add adds an element to the StrataEstimator
func (se *StrataEstimator) Add(element uint64) {
j := estimatorHash(element)
se.Stratum[j].Add(element)
}
// EstimateDifference returns the estimated number of differences between the receiver and a 2nd Strata Estimator
func (se *StrataEstimator) EstimateDifference(se2 *StrataEstimator) uint64 {
var Count uint64 = 0
for i := 63; i >= 0; i-- {
diff := se.Stratum[i].Subtract(&se2.Stratum[i])
aWb, _, ok := diff.Decode()
if ok {
Count += uint64(len(aWb))
} else {
return uint64(math.Pow(2.0, float64(i+1))) * (Count + 1)
}
}
return Count
}
// EncodeEstimatorDB queries a PostgreSQL database and returns a StrataEstimator for the specified table and column
func EncodeEstimatorDB(db *sql.DB, table string, column string) (*StrataEstimator, error) {
rows, err := db.Query(fmt.Sprintf(query("strata_estimator"), table, column, cellsCount))
if err != nil {
return nil, err
}
defer rows.Close()
estimator := NewStrataEstimator()
for rows.Next() {
var (
strata, cell int
IDSum, HashSum uint64
Count int64
)
err := rows.Scan(&strata, &cell, &IDSum, &HashSum, &Count)
if err != nil {
return nil, err
}
idBitmap := ToBitmap(IDSum)
hashBitmap := ToBitmap(HashSum)
el := IBFCell{
IDSum: *idBitmap,
HashSum: *hashBitmap,
Count: Count,
}
estimator.Stratum[strata].Cells[cell] = el
}
return estimator, nil
}