From 10e83bdd213bbba07ae7c10a41c6bdf2dfd6ab09 Mon Sep 17 00:00:00 2001 From: gengbing Date: Sun, 29 May 2022 19:30:51 +0800 Subject: [PATCH] add v2, a new version built with generics --- README.md | 12 +- doublejump.go | 2 +- v2/benchmark/benchmark_test.go | 30 +++ v2/doublejump.go | 183 +++++++++++++++ v2/doublejump_test.go | 415 +++++++++++++++++++++++++++++++++ v2/go.mod | 5 + v2/go.sum | 2 + 7 files changed, 647 insertions(+), 2 deletions(-) create mode 100644 v2/benchmark/benchmark_test.go create mode 100644 v2/doublejump.go create mode 100644 v2/doublejump_test.go create mode 100644 v2/go.mod create mode 100644 v2/go.sum diff --git a/README.md b/README.md index a510440..f041531 100644 --- a/README.md +++ b/README.md @@ -16,8 +16,18 @@ BenchmarkSerialxHashring/100-nodes 2535745 482.1 ns/ BenchmarkSerialxHashring/1000-nodes 2243271 549.6 ns/op ``` -# Example +# Import + +```go +// If golang version <= 1.17 +import "github.com/edwingeng/doublejump" + +// If golang version >= 1.18 +import "github.com/edwingeng/doublejump/v2" ``` + +# Example +```go h := NewHash() for i := 0; i < 10; i++ { h.Add(fmt.Sprintf("node%d", i)) diff --git a/doublejump.go b/doublejump.go index 0a70eff..a75b3b3 100644 --- a/doublejump.go +++ b/doublejump.go @@ -156,7 +156,7 @@ func (this *Hash) Shrink() { this.compact.shrink(this.loose.a) } -// Get returns an object according to the key provided. +// Get returns an object according to the key provided, or nil if there is no object in the hash. func (this *Hash) Get(key uint64) interface{} { obj := this.loose.get(key) switch obj { diff --git a/v2/benchmark/benchmark_test.go b/v2/benchmark/benchmark_test.go new file mode 100644 index 0000000..d7a5896 --- /dev/null +++ b/v2/benchmark/benchmark_test.go @@ -0,0 +1,30 @@ +package benchmark + +import ( + "fmt" + "testing" + + "github.com/edwingeng/doublejump/v2" +) + +var ( + g struct { + Ret string + } +) + +func BenchmarkDoublejump(b *testing.B) { + for i := 10; i <= 1000; i *= 10 { + b.Run(fmt.Sprintf("%d-nodes", i), func(b *testing.B) { + h := doublejump.NewHash[string]() + for j := 0; j < i; j++ { + h.Add(fmt.Sprintf("node%d", j)) + } + + b.ResetTimer() + for j := 0; j < b.N; j++ { + g.Ret, _ = h.Get(uint64(j)) + } + }) + } +} diff --git a/v2/doublejump.go b/v2/doublejump.go new file mode 100644 index 0000000..028529e --- /dev/null +++ b/v2/doublejump.go @@ -0,0 +1,183 @@ +// Package doublejump provides a revamped Google's jump consistent hash. +package doublejump + +import ( + "math/rand" + + "github.com/dgryski/go-jump" +) + +type optional[T comparable] struct { + b bool + v T +} + +type looseHolder[T comparable] struct { + a []optional[T] + m map[T]int + f []int +} + +func (holder *looseHolder[T]) add(obj T) { + if _, ok := holder.m[obj]; ok { + return + } + + if n := len(holder.f); n == 0 { + holder.a = append(holder.a, optional[T]{v: obj, b: true}) + holder.m[obj] = len(holder.a) - 1 + } else { + idx := holder.f[n-1] + holder.f = holder.f[:n-1] + holder.a[idx] = optional[T]{v: obj, b: true} + holder.m[obj] = idx + } +} + +func (holder *looseHolder[T]) remove(obj T) { + if idx, ok := holder.m[obj]; ok { + holder.a[idx] = optional[T]{} + holder.f = append(holder.f, idx) + delete(holder.m, obj) + } +} + +func (holder *looseHolder[T]) get(key uint64) (T, bool) { + var defVal T + n := len(holder.a) + if n == 0 { + return defVal, false + } + + h := jump.Hash(key, n) + if holder.a[h].b { + return holder.a[h].v, true + } else { + return defVal, false + } +} + +func (holder *looseHolder[T]) shrink() { + if len(holder.f) == 0 { + return + } + + var a []optional[T] + for _, opt := range holder.a { + if opt.b { + a = append(a, opt) + holder.m[opt.v] = len(a) - 1 + } + } + holder.a = a + holder.f = nil +} + +type compactHolder[T comparable] struct { + a []T + m map[T]int +} + +func (holder *compactHolder[T]) add(obj T) { + if _, ok := holder.m[obj]; ok { + return + } + + holder.a = append(holder.a, obj) + holder.m[obj] = len(holder.a) - 1 +} + +func (holder *compactHolder[T]) remove(obj T) { + if idx, ok := holder.m[obj]; ok { + newLen := len(holder.a) - 1 + tail := holder.a[newLen] + holder.a[idx] = tail + holder.m[tail] = idx + var defVal T + holder.a[newLen] = defVal + holder.a = holder.a[:newLen] + delete(holder.m, obj) + } +} + +func (holder *compactHolder[T]) get(key uint64) (T, bool) { + var defVal T + n := len(holder.a) + if n == 0 { + return defVal, false + } + + h := jump.Hash(key*0xc6a4a7935bd1e995, n) + return holder.a[h], true +} + +// Hash is a revamped Google's jump consistent hash. It overcomes the shortcoming of the +// original implementation - not being able to remove nodes. +type Hash[T comparable] struct { + loose looseHolder[T] + compact compactHolder[T] +} + +// NewHash creates a new doublejump hash instance, which does NOT threadsafe. +func NewHash[T comparable]() *Hash[T] { + hash := &Hash[T]{} + hash.loose.m = make(map[T]int) + hash.compact.m = make(map[T]int) + return hash +} + +// Add adds an object to the hash. +func (h *Hash[T]) Add(obj T) { + h.loose.add(obj) + h.compact.add(obj) +} + +// Remove removes an object from the hash. +func (h *Hash[T]) Remove(obj T) { + h.loose.remove(obj) + h.compact.remove(obj) +} + +// Len returns the number of objects in the hash. +func (h *Hash[T]) Len() int { + return len(h.compact.a) +} + +// LooseLen returns the size of the inner loose object holder. +func (h *Hash[T]) LooseLen() int { + return len(h.loose.a) +} + +// Shrink removes all empty slots from the hash. +func (h *Hash[T]) Shrink() { + h.loose.shrink() +} + +// Get returns an object and a boolean value according to the key provided. +// If there is no object in the hash, ok is false. +func (h *Hash[T]) Get(key uint64) (obj T, ok bool) { + if obj, ok = h.loose.get(key); ok { + return obj, true + } + return h.compact.get(key) +} + +// All returns all the objects in this Hash. +func (h *Hash[T]) All() []T { + n := len(h.compact.a) + if n == 0 { + return nil + } + all := make([]T, n) + copy(all, h.compact.a) + return all +} + +// Random returns a random object. +func (h *Hash[T]) Random() (T, bool) { + n := len(h.compact.a) + if n > 0 { + return h.compact.a[rand.Intn(n)], true + } + return *new(T), false +} diff --git a/v2/doublejump_test.go b/v2/doublejump_test.go new file mode 100644 index 0000000..f127164 --- /dev/null +++ b/v2/doublejump_test.go @@ -0,0 +1,415 @@ +package doublejump + +import ( + "errors" + "flag" + "fmt" + "math" + "math/rand" + "runtime" + "sync" + "testing" + "time" +) + +var debugMode = flag.Bool("debugMode", false, "enable the debug mode") + +func init() { + rand.Seed(time.Now().UnixMilli()) +} + +func invariant[T comparable](h *Hash[T], t *testing.T) { + t.Helper() + if len(h.loose.a) != len(h.loose.m)+len(h.loose.f) { + t.Fatalf("len(h.loose.a) != len(h.loose.m) + len(h.loose.f). len(a): %d, len(m): %d, len(f): %d", + len(h.loose.a), len(h.loose.m), len(h.loose.f)) + } + if len(h.compact.a) != len(h.compact.m) { + t.Fatalf("len(h.compact.a) != len(h.compact.m). len(a): %d, len(m): %d", + len(h.compact.a), len(h.compact.m)) + } + + for obj, idx := range h.loose.m { + if opt := h.loose.a[idx]; !opt.b || opt.v != obj { + t.Fatalf(`!opt.b || opt.v != obj. obj: %v, idx: %v, opt.b: %v, opt.v: %v`, obj, idx, opt.b, opt.v) + } + } + + var defVal T + freeMap := make(map[int]struct{}) + m1 := make(map[int]struct{}) + for _, idx := range h.loose.f { + freeMap[idx] = struct{}{} + m1[idx] = struct{}{} + if h.loose.a[idx].v != defVal { + t.Fatalf(`h.loose.a[idx].v != defVal. idx: %d, a[idx]: %v`, idx, h.loose.a[idx]) + } + } + if len(freeMap) != len(h.loose.f) { + t.Fatalf("len(freeMap) != len(h.loose.f). %d vs %d", + len(freeMap), len(h.loose.f)) + } + + slots := make([]bool, len(h.loose.a)) + usedMap := make(map[int]struct{}) + for _, idx := range h.loose.m { + slots[idx] = true + usedMap[idx] = struct{}{} + m1[idx] = struct{}{} + if _, ok := freeMap[idx]; ok { + t.Fatalf("%d should not be in the free list", idx) + } + } + for i := range slots { + if h.loose.a[i].b != slots[i] { + t.Fatalf("h.loose.a[i].b != slots[i]. i: %d, b[i]: %v, slots[i]: %v", + i, h.loose.a[i].b, slots[i]) + } + } + if len(usedMap) != len(h.loose.m) { + t.Fatalf("len(usedMap) != len(h.loose.m). %d vs %d", + len(usedMap), len(h.loose.m)) + } + if len(m1) != len(h.loose.a) { + t.Fatalf("len(m1) != len(h.loose.a). %d vs %d", + len(m1), len(h.loose.a)) + } + + m2 := make(map[T]int) + for i, obj := range h.compact.a { + m2[obj] = i + } + if len(m2) != len(h.compact.m) { + t.Fatalf("len(m2) != len(h.compact.m). len(m2): %d, len(m): %d", len(m2), len(h.compact.m)) + } + for obj, idx := range h.compact.m { + if i, ok := m2[obj]; !ok { + t.Fatalf("cannot find %v in m2", obj) + } else if i != idx { + t.Fatalf("m2[%v] != h.compact.m[%v]. idx: %d, i: %d", obj, obj, idx, i) + } + } + + all := h.All() + if len(all) != h.Len() { + t.Fatal("len(all) != h.Len()") + } +} + +func TestHash_Basic(t *testing.T) { + h := NewHash[int]() + invariant(h, t) + + const n1 = 10 + for i := 0; i < 100*n1; i += 100 { + h.Add(i) + invariant(h, t) + + for j := 0; j <= i; j++ { + if _, ok := h.Get(uint64(j)); !ok { + t.Fatal("something is wrong with Get") + } + } + for j := 0; j < 10000; j++ { + if _, ok := h.Get(rand.Uint64()); !ok { + t.Fatal("something is wrong with Get") + } + } + } + + h.Remove(0) + invariant(h, t) + + h.Remove(100) + invariant(h, t) + + h.Remove(900) + invariant(h, t) + + h.Remove(500) + invariant(h, t) + + h.Shrink() + invariant(h, t) + h.Shrink() + invariant(h, t) + + for i := 0; i < 100*n1; i += 100 { + h.Remove(i) + invariant(h, t) + } +} + +func TestHash_Add(t *testing.T) { + h := NewHash[int]() + h.Add(100) + h.Add(200) + h.Add(300) + h.Add(100) + invariant(h, t) + + if h.Len() != 3 { + t.Fatalf("h.Len() != 3") + } + + h.Remove(200) + if h.Len() != 2 { + t.Fatalf("h.Len() != 2") + } + + h.Add(500) + invariant(h, t) + if len(h.loose.a) != 3 || h.loose.a[0].v != 100 || h.loose.a[1].v != 500 || h.loose.a[2].v != 300 { + t.Fatalf("h.loose.a is wrong. a: %v", h.loose.a) + } +} + +func TestHash_Get(t *testing.T) { + h := NewHash[int]() + if v, ok := h.Get(100); ok || v != 0 { + t.Fatal("something is wrong with Get") + } + + for i := 0; i < 10; i++ { + h.Add(i) + } + for i := 9; i >= 0; i-- { + h.Remove(i) + } + if v, ok := h.Get(100); ok || v != 0 { + t.Fatal("something is wrong with Get") + } +} + +func TestHash_LooseLen(t *testing.T) { + h := NewHash[int]() + for i := 0; i < 10; i++ { + h.Add(i) + } + if h.LooseLen() != 10 { + t.Fatal("h.LooseLen() != 10") + } + + n := 10 + for i := 1; i < 10; i += 2 { + h.Remove(i) + n-- + if h.Len() != n { + t.Fatalf("h.Len() != n. h.Len(): %d, n: %d", h.Len(), n) + } + if h.LooseLen() != 10 { + t.Fatal("h.LooseLen() should not change after calling Remove") + } + } +} + +func checkBalance(total int, h *Hash[int]) (float64, error) { + if h.Len() == 0 { + return 0, nil + } + if total < h.Len()*10000 { + return 0, errors.New("total is too small") + } + + a := make([]int, h.LooseLen()) + for i := 0; i < total; i++ { + if v, ok := h.Get(uint64(i)); ok { + a[v]++ + } else { + panic("impossible") + } + } + var nn int + for _, c := range a { + if c > 0 { + nn++ + } + } + if nn != h.Len() { + return 0, fmt.Errorf("nn != h.Len(). nn: %d, h.Len(): %d", nn, h.Len()) + } + + maxErr := float64(0) + avg := float64(total) / float64(h.Len()) + for obj, c := range a { + if c == 0 { + continue + } + e := math.Abs(float64(c)/avg - 1) + maxErr = math.Max(maxErr, e) + if e > 0.15 { + return 0, fmt.Errorf("not balanced. len: %d, len(f): %d, avg: %.1f, e: %.2f, obj: %c, c: %d", + h.Len(), len(h.loose.f), avg, e, obj, c) + } + } + + return maxErr, nil +} + +func TestHash_Balance(t *testing.T) { + sm := make(chan int, runtime.NumCPU()/2+1) + for i := 0; i < cap(sm); i++ { + sm <- 1 + } + + const n1 = 1000 + chErr := make(chan error, n1) + var wg sync.WaitGroup + for rm := 0; true; rm += rand.Intn(100) + 1 { + rm := rm + if rm > n1 { + rm = n1 + } + + <-sm + wg.Add(1) + go func() { + defer func() { + wg.Done() + sm <- 1 + }() + + h := NewHash[int]() + var a [n1]int + for i := 0; i < n1; i++ { + h.Add(i) + a[i] = i + } + rand.Shuffle(n1, func(i, j int) { + a[i], a[j] = a[j], a[i] + }) + for i := 0; i < rm; i++ { + h.Remove(a[i]) + invariant(h, t) + } + if h.Len() != n1-rm { + chErr <- fmt.Errorf("h.Len() != n1-rm. h.Len(): %d, n1: %d, rm: %d", h.Len(), n1, rm) + return + } + + if rm != n1 { + for j := 0; j < 10000; j++ { + if _, ok := h.Get(rand.Uint64()); !ok { + chErr <- fmt.Errorf("something is wrong with Get [1]. len: %d, looseLen: %d", + h.Len(), h.LooseLen()) + return + } + } + } else { + if _, ok := h.Get(rand.Uint64()); ok { + chErr <- fmt.Errorf("something is wrong with Get [2]. len: %d, looseLen: %d", + h.Len(), h.LooseLen()) + return + } + } + + if *debugMode { + total := h.Len() * 10000 + maxErr, err := checkBalance(total, h) + if err != nil { + chErr <- err + return + } + fmt.Printf("rm: %-6d len: %-6d total: %-12d maxErr: %.2f\n", + rm, h.Len(), total, maxErr) + } + }() + + if rm == n1 { + break + } + } + + wg.Wait() + select { + case err := <-chErr: + t.Fatal(err) + default: + } +} + +func TestHash_Consistent(t *testing.T) { + const n1 = 100 + numbers := []int{0, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47} + var m0 map[int]int + for _, rm := range numbers { + h1 := NewHash[int]() + var xx []int + for i := 0; i < n1; i++ { + h1.Add(i) + xx = append(xx, i) + } + rand.Shuffle(n1, func(i, j int) { + xx[i], xx[j] = xx[j], xx[i] + }) + for i := 0; i < rm; i++ { + h1.Remove(xx[i]) + invariant(h1, t) + } + if h1.Len() != n1-rm { + t.Fatalf("h1.Len() != n1-rm. h1.Len(): %d, n1: %d, rm: %d", h1.Len(), n1, rm) + } + + total := n1 * 10000 + m1 := make(map[int]int, total) + for i := 0; i < total; i++ { + if obj, ok := h1.Get(uint64(i)); ok { + m1[i] = obj + } else { + panic("impossible") + } + } + + switch rm { + case 0: + m0 = m1 + default: + var n2 int + for k, v := range m1 { + if m0[k] == v { + n2++ + } + } + r1 := float64(total-n2) / float64(total) + r2 := float64(rm) / n1 + delta := math.Abs(r1 - r2) + if delta > 0.05 { + t.Fatal("delta > 0.05") + } + } + } +} + +func Example() { + h := NewHash[string]() + for i := 0; i < 10; i++ { + h.Add(fmt.Sprintf("node%d", i)) + } + + fmt.Println(h.Len()) + fmt.Println(h.LooseLen()) + + fmt.Println(h.Get(1000)) + fmt.Println(h.Get(2000)) + fmt.Println(h.Get(3000)) + + h.Remove("node3") + fmt.Println(h.Len()) + fmt.Println(h.LooseLen()) + + fmt.Println(h.Get(1000)) + fmt.Println(h.Get(2000)) + fmt.Println(h.Get(3000)) + + // Output: + // 10 + // 10 + // node9 true + // node2 true + // node3 true + // 9 + // 10 + // node9 true + // node2 true + // node0 true +} diff --git a/v2/go.mod b/v2/go.mod new file mode 100644 index 0000000..36c16ad --- /dev/null +++ b/v2/go.mod @@ -0,0 +1,5 @@ +module github.com/edwingeng/doublejump/v2 + +go 1.18 + +require github.com/dgryski/go-jump v0.0.0-20211018200510-ba001c3ffce0 diff --git a/v2/go.sum b/v2/go.sum new file mode 100644 index 0000000..920ff5f --- /dev/null +++ b/v2/go.sum @@ -0,0 +1,2 @@ +github.com/dgryski/go-jump v0.0.0-20211018200510-ba001c3ffce0 h1:0wH6nO9QEa02Qx8sIQGw6ieKdz+BXjpccSOo9vXNl4U= +github.com/dgryski/go-jump v0.0.0-20211018200510-ba001c3ffce0/go.mod h1:4hKCXuwrJoYvHZxJ86+bRVTOMyJ0Ej+RqfSm8mHi6KA=