From 2484db05d4f727957c57d79a6b68253122da2edf Mon Sep 17 00:00:00 2001 From: yihuang Date: Sat, 30 Mar 2024 00:32:58 +0800 Subject: [PATCH] Problem: nested cache store not efficient (#241) * Problem: nested cache store not efficient Solution: - introduce copy-on-write btree based cache store temp * changelog * rename * Update store/cachekv/store.go Signed-off-by: yihuang --------- Signed-off-by: yihuang --- store/CHANGELOG.md | 1 + store/cachekv/search_benchmark_test.go | 44 --- store/cachekv/search_test.go | 141 ---------- store/cachekv/store.go | 368 ++++--------------------- store/internal/btree/btree.go | 25 +- store/internal/btree/btree_test.go | 23 +- store/internal/btreeadaptor.go | 8 +- store/types/store.go | 6 + 8 files changed, 100 insertions(+), 516 deletions(-) delete mode 100644 store/cachekv/search_benchmark_test.go delete mode 100644 store/cachekv/search_test.go diff --git a/store/CHANGELOG.md b/store/CHANGELOG.md index 13c55db25952..e4ea8fd42e84 100644 --- a/store/CHANGELOG.md +++ b/store/CHANGELOG.md @@ -30,6 +30,7 @@ Ref: https://keepachangelog.com/en/1.0.0/ * [#207](https://github.com/crypto-org-chain/cosmos-sdk/pull/207) Remove api CacheWrapWithTrace. * [#205](https://github.com/crypto-org-chain/cosmos-sdk/pull/205) Support object store. * [#240](https://github.com/crypto-org-chain/cosmos-sdk/pull/240) Split methods from `MultiStore` into specialized `RootMultiStore`, keep `MultiStore` generic. +* [#241](https://github.com/crypto-org-chain/cosmos-sdk/pull/241) Refactor the cache store to be btree backed, prepare to support copy-on-write atomic branching. ## v1.1.0 (March 20, 2024) diff --git a/store/cachekv/search_benchmark_test.go b/store/cachekv/search_benchmark_test.go deleted file mode 100644 index f0e29bc4ec18..000000000000 --- a/store/cachekv/search_benchmark_test.go +++ /dev/null @@ -1,44 +0,0 @@ -package cachekv - -import ( - "strconv" - "testing" - - "cosmossdk.io/store/internal/btree" -) - -func BenchmarkLargeUnsortedMisses(b *testing.B) { - for i := 0; i < b.N; i++ { - b.StopTimer() - store := generateStore() - b.StartTimer() - - for k := 0; k < 10000; k++ { - // cache has A + Z values - // these are within range, but match nothing - store.dirtyItems([]byte("B1"), []byte("B2")) - } - } -} - -func generateStore() *Store { - cache := map[string]*cValue[[]byte]{} - unsorted := map[string]struct{}{} - for i := 0; i < 5000; i++ { - key := "A" + strconv.Itoa(i) - unsorted[key] = struct{}{} - cache[key] = &cValue[[]byte]{} - } - - for i := 0; i < 5000; i++ { - key := "Z" + strconv.Itoa(i) - unsorted[key] = struct{}{} - cache[key] = &cValue[[]byte]{} - } - - return &GStore[[]byte]{ - cache: cache, - unsortedCache: unsorted, - sortedCache: btree.NewBTree[[]byte](), - } -} diff --git a/store/cachekv/search_test.go b/store/cachekv/search_test.go deleted file mode 100644 index 41321c076eae..000000000000 --- a/store/cachekv/search_test.go +++ /dev/null @@ -1,141 +0,0 @@ -package cachekv - -import "testing" - -func TestFindStartIndex(t *testing.T) { - tests := []struct { - name string - sortedL []string - query string - want int - }{ - { - name: "non-existent value", - sortedL: []string{"a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"}, - query: "o", - want: 8, - }, - { - name: "dupes start at index 0", - sortedL: []string{"a", "a", "a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"}, - query: "a", - want: 0, - }, - { - name: "dupes start at non-index 0", - sortedL: []string{"a", "c", "c", "c", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"}, - query: "c", - want: 1, - }, - { - name: "at end", - sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z"}, - query: "z", - want: 7, - }, - { - name: "dupes at end", - sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z", "z", "z", "z"}, - query: "z", - want: 7, - }, - { - name: "entirely dupes", - sortedL: []string{"z", "z", "z", "z", "z"}, - query: "z", - want: 0, - }, - { - name: "non-existent but within >=start", - sortedL: []string{"z", "z", "z", "z", "z"}, - query: "p", - want: 0, - }, - { - name: "non-existent and out of range", - sortedL: []string{"d", "e", "f", "g", "h"}, - query: "z", - want: -1, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - body := tt.sortedL - got := findStartIndex(body, tt.query) - if got != tt.want { - t.Fatalf("Got: %d, want: %d", got, tt.want) - } - }) - } -} - -func TestFindEndIndex(t *testing.T) { - tests := []struct { - name string - sortedL []string - query string - want int - }{ - { - name: "non-existent value", - sortedL: []string{"a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"}, - query: "o", - want: 7, - }, - { - name: "dupes start at index 0", - sortedL: []string{"a", "a", "a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"}, - query: "a", - want: 0, - }, - { - name: "dupes start at non-index 0", - sortedL: []string{"a", "c", "c", "c", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"}, - query: "c", - want: 1, - }, - { - name: "at end", - sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z"}, - query: "z", - want: 7, - }, - { - name: "dupes at end", - sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z", "z", "z", "z"}, - query: "z", - want: 7, - }, - { - name: "entirely dupes", - sortedL: []string{"z", "z", "z", "z", "z"}, - query: "z", - want: 0, - }, - { - name: "non-existent and out of range", - sortedL: []string{"z", "z", "z", "z", "z"}, - query: "p", - want: -1, - }, - { - name: "non-existent and out of range", - sortedL: []string{"d", "e", "f", "g", "h"}, - query: "z", - want: 4, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - body := tt.sortedL - got := findEndIndex(body, tt.query) - if got != tt.want { - t.Fatalf("Got: %d, want: %d", got, tt.want) - } - }) - } -} diff --git a/store/cachekv/store.go b/store/cachekv/store.go index 7c7cf7513845..df47dfc3e6ae 100644 --- a/store/cachekv/store.go +++ b/store/cachekv/store.go @@ -1,34 +1,18 @@ package cachekv import ( - "bytes" - "sort" - "sync" - - dbm "github.com/cosmos/cosmos-db" - - "cosmossdk.io/math" "cosmossdk.io/store/cachekv/internal" "cosmossdk.io/store/internal/btree" - "cosmossdk.io/store/internal/conv" "cosmossdk.io/store/types" ) -// cValue represents a cached value. -// If dirty is true, it indicates the cached value is different from the underlying value. -type cValue[V any] struct { - value V - dirty bool -} - -type kvPair[V any] struct { - Key []byte - Value V -} - type Store = GStore[[]byte] -var _ types.CacheKVStore = (*Store)(nil) +var ( + _ types.CacheKVStore = (*Store)(nil) + _ types.CacheWrap = (*Store)(nil) + _ types.BranchStore = (*Store)(nil) +) func NewStore(parent types.KVStore) *Store { return NewGStore( @@ -40,11 +24,8 @@ func NewStore(parent types.KVStore) *Store { // GStore wraps an in-memory cache around an underlying types.KVStore. type GStore[V any] struct { - mtx sync.Mutex - cache map[string]*cValue[V] - unsortedCache map[string]struct{} - sortedCache btree.BTree[V] // always ascending sorted - parent types.GKVStore[V] + writeSet btree.BTree[V] // always ascending sorted + parent types.GKVStore[V] // isZero is a function that returns true if the value is considered "zero", for []byte and pointers the zero value // is `nil`, zero value is not allowed to set to a key, and it's returned if the key is not found. @@ -57,12 +38,10 @@ type GStore[V any] struct { // NewStore creates a new Store object func NewGStore[V any](parent types.GKVStore[V], isZero func(V) bool, valueLen func(V) int) *GStore[V] { return &GStore[V]{ - cache: make(map[string]*cValue[V]), - unsortedCache: make(map[string]struct{}), - sortedCache: btree.NewBTree[V](), - parent: parent, - isZero: isZero, - valueLen: valueLen, + writeSet: btree.NewBTree[V](), + parent: parent, + isZero: isZero, + valueLen: valueLen, } } @@ -71,21 +50,35 @@ func (store *GStore[V]) GetStoreType() types.StoreType { return store.parent.GetStoreType() } -// Get implements types.KVStore. -func (store *GStore[V]) Get(key []byte) (value V) { - store.mtx.Lock() - defer store.mtx.Unlock() +// Clone creates a copy-on-write snapshot of the cache store, +// it only performs a shallow copy so is very fast. +func (store *GStore[V]) Clone() types.BranchStore { + return &GStore[V]{ + writeSet: store.writeSet.Copy(), + parent: store.parent, + } +} + +// swapCache swap out the internal cache store and leave the current store unusable. +func (store *GStore[V]) swapCache() btree.BTree[V] { + cache := store.writeSet + store.writeSet = btree.BTree[V]{} + return cache +} +// Restore restores the store cache to a given snapshot, leaving the snapshot unusable. +func (store *GStore[V]) Restore(s types.BranchStore) { + store.writeSet = s.(*GStore[V]).swapCache() +} + +// Get implements types.KVStore. +func (store *GStore[V]) Get(key []byte) V { types.AssertValidKey(key) - cacheValue, ok := store.cache[conv.UnsafeBytesToStr(key)] - if !ok { - value = store.parent.Get(key) - store.setCacheValue(key, value, false) - } else { - value = cacheValue.value + value, found := store.writeSet.Get(key) + if !found { + return store.parent.Get(key) } - return value } @@ -94,93 +87,37 @@ func (store *GStore[V]) Set(key []byte, value V) { types.AssertValidKey(key) types.AssertValidValueGeneric(value, store.isZero, store.valueLen) - store.mtx.Lock() - defer store.mtx.Unlock() - store.setCacheValue(key, value, true) + store.writeSet.Set(key, value) } // Has implements types.KVStore. func (store *GStore[V]) Has(key []byte) bool { - value := store.Get(key) + types.AssertValidKey(key) + + value, found := store.writeSet.Get(key) + if !found { + return store.parent.Has(key) + } return !store.isZero(value) } // Delete implements types.KVStore. func (store *GStore[V]) Delete(key []byte) { types.AssertValidKey(key) - - store.mtx.Lock() - defer store.mtx.Unlock() - - store.setCacheValue(key, store.zeroValue, true) -} - -func (store *GStore[V]) resetCaches() { - if len(store.cache) > 100_000 { - // Cache is too large. We likely did something linear time - // (e.g. Epoch block, Genesis block, etc). Free the old caches from memory, and let them get re-allocated. - // TODO: In a future CacheKV redesign, such linear workloads should get into a different cache instantiation. - // 100_000 is arbitrarily chosen as it solved Osmosis' InitGenesis RAM problem. - store.cache = make(map[string]*cValue[V]) - store.unsortedCache = make(map[string]struct{}) - } else { - // Clear the cache using the map clearing idiom - // and not allocating fresh objects. - // Please see https://bencher.orijtech.com/perfclinic/mapclearing/ - for key := range store.cache { - delete(store.cache, key) - } - for key := range store.unsortedCache { - delete(store.unsortedCache, key) - } - } - store.sortedCache = btree.NewBTree[V]() + store.writeSet.Set(key, store.zeroValue) } // Implements Cachetypes.KVStore. func (store *GStore[V]) Write() { - store.mtx.Lock() - defer store.mtx.Unlock() - - if len(store.cache) == 0 && len(store.unsortedCache) == 0 { - store.sortedCache = btree.NewBTree[V]() - return - } - - type cEntry struct { - key string - val *cValue[V] - } - - // We need a copy of all of the keys. - // Not the best. To reduce RAM pressure, we copy the values as well - // and clear out the old caches right after the copy. - sortedCache := make([]cEntry, 0, len(store.cache)) - - for key, dbValue := range store.cache { - if dbValue.dirty { - sortedCache = append(sortedCache, cEntry{key, dbValue}) - } - } - store.resetCaches() - sort.Slice(sortedCache, func(i, j int) bool { - return sortedCache[i].key < sortedCache[j].key - }) - - // TODO: Consider allowing usage of Batch, which would allow the write to - // at least happen atomically. - for _, obj := range sortedCache { - // We use []byte(key) instead of conv.UnsafeStrToBytes because we cannot - // be sure if the underlying store might do a save with the byteslice or - // not. Once we get confirmation that .Delete is guaranteed not to - // save the byteslice, then we can assume only a read-only copy is sufficient. - if !store.isZero(obj.val.value) { - // It already exists in the parent, hence update it. - store.parent.Set([]byte(obj.key), obj.val.value) + store.writeSet.Scan(func(key []byte, value V) bool { + if store.isZero(value) { + store.parent.Delete(key) } else { - store.parent.Delete([]byte(obj.key)) + store.parent.Set(key, value) } - } + return true + }) + store.writeSet.Clear() } // CacheWrap implements CacheWrapper. @@ -202,11 +139,7 @@ func (store *GStore[V]) ReverseIterator(start, end []byte) types.GIterator[V] { } func (store *GStore[V]) iterator(start, end []byte, ascending bool) types.GIterator[V] { - store.mtx.Lock() - defer store.mtx.Unlock() - - store.dirtyItems(start, end) - isoSortedCache := store.sortedCache.Copy() + isoSortedCache := store.writeSet.Copy() var ( err error @@ -226,200 +159,3 @@ func (store *GStore[V]) iterator(start, end []byte, ascending bool) types.GItera return internal.NewCacheMergeIterator(parent, cache, ascending, store.isZero) } - -func findStartIndex(strL []string, startQ string) int { - // Modified binary search to find the very first element in >=startQ. - if len(strL) == 0 { - return -1 - } - - var left, right, mid int - right = len(strL) - 1 - for left <= right { - mid = (left + right) >> 1 - midStr := strL[mid] - if midStr == startQ { - // Handle condition where there might be multiple values equal to startQ. - // We are looking for the very first value < midStL, that i+1 will be the first - // element >= midStr. - for i := mid - 1; i >= 0; i-- { - if strL[i] != midStr { - return i + 1 - } - } - return 0 - } - if midStr < startQ { - left = mid + 1 - } else { // midStrL > startQ - right = mid - 1 - } - } - if left >= 0 && left < len(strL) && strL[left] >= startQ { - return left - } - return -1 -} - -func findEndIndex(strL []string, endQ string) int { - if len(strL) == 0 { - return -1 - } - - // Modified binary search to find the very first element > 1 - midStr := strL[mid] - if midStr == endQ { - // Handle condition where there might be multiple values equal to startQ. - // We are looking for the very first value < midStL, that i+1 will be the first - // element >= midStr. - for i := mid - 1; i >= 0; i-- { - if strL[i] < midStr { - return i + 1 - } - } - return 0 - } - if midStr < endQ { - left = mid + 1 - } else { // midStrL > startQ - right = mid - 1 - } - } - - // Binary search failed, now let's find a value less than endQ. - for i := right; i >= 0; i-- { - if strL[i] < endQ { - return i - } - } - - return -1 -} - -type sortState int - -const ( - stateUnsorted sortState = iota - stateAlreadySorted -) - -const minSortSize = 1024 - -// Constructs a slice of dirty items, to use w/ memIterator. -func (store *GStore[V]) dirtyItems(start, end []byte) { - startStr, endStr := conv.UnsafeBytesToStr(start), conv.UnsafeBytesToStr(end) - if end != nil && startStr > endStr { - // Nothing to do here. - return - } - - n := len(store.unsortedCache) - unsorted := make([]*kvPair[V], 0) - // If the unsortedCache is too big, its costs too much to determine - // whats in the subset we are concerned about. - // If you are interleaving iterator calls with writes, this can easily become an - // O(N^2) overhead. - // Even without that, too many range checks eventually becomes more expensive - // than just not having the cache. - if n < minSortSize { - for key := range store.unsortedCache { - // dbm.IsKeyInDomain is nil safe and returns true iff key is greater than start - if dbm.IsKeyInDomain(conv.UnsafeStrToBytes(key), start, end) { - cacheValue := store.cache[key] - unsorted = append(unsorted, &kvPair[V]{Key: []byte(key), Value: cacheValue.value}) - } - } - store.clearUnsortedCacheSubset(unsorted, stateUnsorted) - return - } - - // Otherwise it is large so perform a modified binary search to find - // the target ranges for the keys that we should be looking for. - strL := make([]string, 0, n) - for key := range store.unsortedCache { - strL = append(strL, key) - } - sort.Strings(strL) - - // Now find the values within the domain - // [start, end) - startIndex := findStartIndex(strL, startStr) - if startIndex < 0 { - startIndex = 0 - } - - var endIndex int - if end == nil { - endIndex = len(strL) - 1 - } else { - endIndex = findEndIndex(strL, endStr) - } - if endIndex < 0 { - endIndex = len(strL) - 1 - } - - // Since we spent cycles to sort the values, we should process and remove a reasonable amount - // ensure start to end is at least minSortSize in size - // if below minSortSize, expand it to cover additional values - // this amortizes the cost of processing elements across multiple calls - if endIndex-startIndex < minSortSize { - endIndex = math.Min(startIndex+minSortSize, len(strL)-1) - if endIndex-startIndex < minSortSize { - startIndex = math.Max(endIndex-minSortSize, 0) - } - } - - kvL := make([]*kvPair[V], 0, 1+endIndex-startIndex) - for i := startIndex; i <= endIndex; i++ { - key := strL[i] - cacheValue := store.cache[key] - kvL = append(kvL, &kvPair[V]{Key: []byte(key), Value: cacheValue.value}) - } - - // kvL was already sorted so pass it in as is. - store.clearUnsortedCacheSubset(kvL, stateAlreadySorted) -} - -func (store *GStore[V]) clearUnsortedCacheSubset(unsorted []*kvPair[V], sortState sortState) { - n := len(store.unsortedCache) - if len(unsorted) == n { // This pattern allows the Go compiler to emit the map clearing idiom for the entire map. - for key := range store.unsortedCache { - delete(store.unsortedCache, key) - } - } else { // Otherwise, normally delete the unsorted keys from the map. - for _, kv := range unsorted { - delete(store.unsortedCache, conv.UnsafeBytesToStr(kv.Key)) - } - } - - if sortState == stateUnsorted { - sort.Slice(unsorted, func(i, j int) bool { - return bytes.Compare(unsorted[i].Key, unsorted[j].Key) < 0 - }) - } - - for _, item := range unsorted { - // sortedCache is able to store `nil` value to represent deleted items. - store.sortedCache.Set(item.Key, item.Value) - } -} - -//---------------------------------------- -// etc - -// Only entrypoint to mutate store.cache. -// A `nil` value means a deletion. -func (store *GStore[V]) setCacheValue(key []byte, value V, dirty bool) { - keyStr := conv.UnsafeBytesToStr(key) - store.cache[keyStr] = &cValue[V]{ - value: value, - dirty: dirty, - } - if dirty { - store.unsortedCache[keyStr] = struct{}{} - } -} diff --git a/store/internal/btree/btree.go b/store/internal/btree/btree.go index 0fe28def41ee..bef4309412ff 100644 --- a/store/internal/btree/btree.go +++ b/store/internal/btree/btree.go @@ -36,22 +36,18 @@ func NewBTree[V any]() BTree[V] { } } +// Set supports nil as value when used as overlay func (bt BTree[V]) Set(key []byte, value V) { bt.tree.Set(newItem(key, value)) } -func (bt BTree[V]) Get(key []byte) V { - var empty V - i, found := bt.tree.Get(newItem(key, empty)) - if !found { - return empty - } - return i.value +func (bt BTree[V]) Get(key []byte) (V, bool) { + i, found := bt.tree.Get(newItemWithKey[V](key)) + return i.value, found } func (bt BTree[V]) Delete(key []byte) { - var empty V - bt.tree.Delete(newItem(key, empty)) + bt.tree.Delete(newItemWithKey[V](key)) } func (bt BTree[V]) Iterator(start, end []byte) (types.GIterator[V], error) { @@ -80,6 +76,12 @@ func (bt BTree[V]) Clear() { bt.tree.Clear() } +func (bt BTree[V]) Scan(cb func(key []byte, value V) bool) { + bt.tree.Scan(func(i item[V]) bool { + return cb(i.key, i.value) + }) +} + // item is a btree item with byte slices as keys and values type item[V any] struct { key []byte @@ -95,3 +97,8 @@ func byKeys[V any](a, b item[V]) bool { func newItem[V any](key []byte, value V) item[V] { return item[V]{key: key, value: value} } + +// newItem creates a new pair item with empty value. +func newItemWithKey[V any](key []byte) item[V] { + return item[V]{key: key} +} diff --git a/store/internal/btree/btree_test.go b/store/internal/btree/btree_test.go index 89b5827e6156..75613ebbb69c 100644 --- a/store/internal/btree/btree_test.go +++ b/store/internal/btree/btree_test.go @@ -12,17 +12,20 @@ func TestGetSetDelete(t *testing.T) { db := NewBTree[[]byte]() // A nonexistent key should return nil. - value := db.Get([]byte("a")) + value, found := db.Get([]byte("a")) require.Nil(t, value) + require.False(t, found) // Set and get a value. db.Set([]byte("a"), []byte{0x01}) db.Set([]byte("b"), []byte{0x02}) - value = db.Get([]byte("a")) + value, found = db.Get([]byte("a")) require.Equal(t, []byte{0x01}, value) + require.True(t, found) - value = db.Get([]byte("b")) + value, found = db.Get([]byte("b")) require.Equal(t, []byte{0x02}, value) + require.True(t, found) // Deleting a non-existent value is fine. db.Delete([]byte("x")) @@ -30,13 +33,23 @@ func TestGetSetDelete(t *testing.T) { // Delete a value. db.Delete([]byte("a")) - value = db.Get([]byte("a")) + value, found = db.Get([]byte("a")) require.Nil(t, value) + require.False(t, found) db.Delete([]byte("b")) - value = db.Get([]byte("b")) + value, found = db.Get([]byte("b")) require.Nil(t, value) + require.False(t, found) +} + +func TestNilValue(t *testing.T) { + db := NewBTree[[]byte]() + db.Set([]byte("a"), nil) + value, found := db.Get([]byte("a")) + require.Nil(t, value) + require.True(t, found) } func TestDBIterator(t *testing.T) { diff --git a/store/internal/btreeadaptor.go b/store/internal/btreeadaptor.go index 322e0f4152fe..318c67bfda89 100644 --- a/store/internal/btreeadaptor.go +++ b/store/internal/btreeadaptor.go @@ -20,9 +20,15 @@ func NewBTreeStore[V any](btree btree.BTree[V], isZero func(V) bool, valueLen fu return &BTreeStore[V]{btree, isZero, valueLen} } +func (ts *BTreeStore[V]) Get(key []byte) (value V) { + value, _ = ts.BTree.Get(key) + return +} + // Hash Implements GKVStore. func (ts *BTreeStore[V]) Has(key []byte) bool { - return !ts.isZero(ts.Get(key)) + _, found := ts.BTree.Get(key) + return found } func (ts *BTreeStore[V]) Iterator(start, end []byte) types.GIterator[V] { diff --git a/store/types/store.go b/store/types/store.go index 239e4ae94b3f..19e45dbda43c 100644 --- a/store/types/store.go +++ b/store/types/store.go @@ -352,6 +352,12 @@ func (cid CommitID) String() string { return fmt.Sprintf("CommitID{%v:%X}", cid.Hash, cid.Version) } +// BranchStore +type BranchStore interface { + Clone() BranchStore + Restore(BranchStore) +} + //---------------------------------------- // Store types