From 06986b4003d8405c562bb78243601fc8f6da5efe Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Wed, 3 Jul 2024 15:49:17 +0900 Subject: [PATCH 01/50] blockchain: export flush() For NodesBackend and CachedLeavesBackend, flush is exported to let the utreexo indexes to be able to call it directly. --- blockchain/utreexoio.go | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/blockchain/utreexoio.go b/blockchain/utreexoio.go index 0c0a6fd9..88c52449 100644 --- a/blockchain/utreexoio.go +++ b/blockchain/utreexoio.go @@ -122,7 +122,7 @@ func (m *NodesBackEnd) Get(k uint64) (utreexo.Leaf, bool) { // If the cache is full, flush the cache then Put // the leaf in. if !m.cache.Put(k, cLeaf) { - m.flush() + m.Flush() m.cache.Put(k, cLeaf) } @@ -140,7 +140,7 @@ func (m *NodesBackEnd) Get(k uint64) (utreexo.Leaf, bool) { // Cache the leaf before returning it. if !m.cache.Put(k, utreexobackends.CachedLeaf{Leaf: leaf}) { - m.flush() + m.Flush() m.cache.Put(k, utreexobackends.CachedLeaf{Leaf: leaf}) } return leaf, true @@ -158,7 +158,7 @@ func (m *NodesBackEnd) Put(k uint64, v utreexo.Leaf) { } if int64(m.cache.Length()) > m.maxCacheElem { - m.flush() + m.Flush() } leaf, found := m.cache.Get(k) @@ -171,7 +171,7 @@ func (m *NodesBackEnd) Put(k uint64, v utreexo.Leaf) { // It shouldn't fail here but handle it anyways. if !m.cache.Put(k, l) { - m.flush() + m.Flush() m.cache.Put(k, l) } } else { @@ -183,7 +183,7 @@ func (m *NodesBackEnd) Put(k uint64, v utreexo.Leaf) { // It shouldn't fail here but handle it anyways. if !m.cache.Put(k, l) { - m.flush() + m.Flush() m.cache.Put(k, l) } } @@ -204,7 +204,7 @@ func (m *NodesBackEnd) Delete(k uint64) { leaf, found := m.cache.Get(k) if !found { if int64(m.cache.Length()) >= m.maxCacheElem { - m.flush() + m.Flush() } } l := utreexobackends.CachedLeaf{ @@ -212,14 +212,14 @@ func (m *NodesBackEnd) Delete(k uint64) { Flags: leaf.Flags | utreexobackends.Removed, } if !m.cache.Put(k, l) { - m.flush() + m.Flush() m.cache.Put(k, l) } } // Length returns the amount of items in the underlying database. func (m *NodesBackEnd) Length() int { - m.flush() + m.Flush() length := 0 iter := m.db.NewIterator(nil, nil) @@ -233,7 +233,7 @@ func (m *NodesBackEnd) Length() int { // ForEach calls the given function for each of the elements in the underlying map. func (m *NodesBackEnd) ForEach(fn func(uint64, utreexo.Leaf) error) error { - m.flush() + m.Flush() iter := m.db.NewIterator(nil, nil) for iter.Next() { @@ -258,7 +258,7 @@ func (m *NodesBackEnd) ForEach(fn func(uint64, utreexo.Leaf) error) error { } // flush saves all the cached entries to disk and resets the cache map. -func (m *NodesBackEnd) flush() { +func (m *NodesBackEnd) Flush() { if m.maxCacheElem == 0 { return } @@ -282,7 +282,7 @@ func (m *NodesBackEnd) flush() { // Close flushes the cache and closes the underlying database. func (m *NodesBackEnd) Close() error { - m.flush() + m.Flush() return m.db.Close() } @@ -355,7 +355,7 @@ func (m *CachedLeavesBackEnd) Put(k utreexo.Hash, v uint64) { length := m.cache.Length() if int64(length) >= m.maxCacheElem { - m.flush() + m.Flush() } m.cache.Put(k, v) @@ -370,7 +370,7 @@ func (m *CachedLeavesBackEnd) Delete(k utreexo.Hash) { // Length returns the amount of items in the underlying db and the cache. func (m *CachedLeavesBackEnd) Length() int { - m.flush() + m.Flush() length := 0 iter := m.db.NewIterator(nil, nil) @@ -384,7 +384,7 @@ func (m *CachedLeavesBackEnd) Length() int { // ForEach calls the given function for each of the elements in the underlying map. func (m *CachedLeavesBackEnd) ForEach(fn func(utreexo.Hash, uint64) error) error { - m.flush() + m.Flush() iter := m.db.NewIterator(nil, nil) for iter.Next() { @@ -403,7 +403,7 @@ func (m *CachedLeavesBackEnd) ForEach(fn func(utreexo.Hash, uint64) error) error } // Flush resets the cache and saves all the key values onto the database. -func (m *CachedLeavesBackEnd) flush() { +func (m *CachedLeavesBackEnd) Flush() { m.cache.ForEach(func(k utreexo.Hash, v uint64) { err := m.dbPut(k, v) if err != nil { @@ -416,6 +416,6 @@ func (m *CachedLeavesBackEnd) flush() { // Close flushes all the cached entries and then closes the underlying database. func (m *CachedLeavesBackEnd) Close() error { - m.flush() + m.Flush() return m.db.Close() } From 48fb55fb6b8f09c80554532361526a9b495a803e Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Wed, 3 Jul 2024 15:58:36 +0900 Subject: [PATCH 02/50] blockchain, indexers: use unified database for the accumulator For utreexo bridges, the accumulator state was kept in separate leveldb instances for the nodes and the leaves. This creates issues during recovery because it's hard to ensure that the writes will be atomic. Using the same leveldb backend for both the nodes and the leaves solves this problem. --- blockchain/indexers/utreexobackend.go | 46 +++++++-------------------- blockchain/utreexoio.go | 37 +++++++-------------- blockchain/utreexoio_test.go | 33 +++++++++++++++---- 3 files changed, 51 insertions(+), 65 deletions(-) diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index 36c09851..f01abf86 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -7,10 +7,10 @@ package indexers import ( "bytes" "encoding/binary" - "fmt" "os" "path/filepath" + "github.com/syndtr/goleveldb/leveldb" "github.com/utreexo/utreexo" "github.com/utreexo/utreexod/blockchain" "github.com/utreexo/utreexod/chaincfg" @@ -22,8 +22,6 @@ const ( // utreexoDirName is the name of the directory in which the utreexo state // is stored. utreexoDirName = "utreexostate" - nodesDBDirName = "nodes" - cachedLeavesDBDirName = "cachedleaves" defaultUtreexoFileName = "forest.dat" ) @@ -320,14 +318,17 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) maxNodesMem := maxMemoryUsage * 6 / 10 maxCachedLeavesMem := maxMemoryUsage - maxNodesMem - nodesPath := filepath.Join(basePath, nodesDBDirName) - nodesDB, err := blockchain.InitNodesBackEnd(nodesPath, maxNodesMem) + db, err := leveldb.OpenFile(basePath, nil) if err != nil { return nil, err } - cachedLeavesPath := filepath.Join(basePath, cachedLeavesDBDirName) - cachedLeavesDB, err := blockchain.InitCachedLeavesBackEnd(cachedLeavesPath, maxCachedLeavesMem) + nodesDB, err := blockchain.InitNodesBackEnd(db, maxNodesMem) + if err != nil { + return nil, err + } + + cachedLeavesDB, err := blockchain.InitCachedLeavesBackEnd(db, maxCachedLeavesMem) if err != nil { return nil, err } @@ -351,17 +352,10 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) p.Nodes = nodesDB p.CachedLeaves = cachedLeavesDB closeDB = func() error { - err := nodesDB.Close() - if err != nil { - return err - } + nodesDB.Flush() + cachedLeavesDB.Flush() - err = cachedLeavesDB.Close() - if err != nil { - return err - } - - return nil + return db.Close() } } else { log.Infof("loading the utreexo state from disk...") @@ -396,25 +390,9 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) return nil }) - // We want to try to close both of the DBs before returning because of an error. - errStr := "" - err := nodesDB.Close() - if err != nil { - errStr += fmt.Sprintf("Error while closing nodes db. %v", err.Error()) - } - err = cachedLeavesDB.Close() - if err != nil { - errStr += fmt.Sprintf("Error while closing cached leaves db. %v", err.Error()) - } - - // If the err string isn't "", then return the error here. - if errStr != "" { - return fmt.Errorf(errStr) - } - log.Infof("Finished flushing the utreexo state to disk.") - return nil + return db.Close() } } diff --git a/blockchain/utreexoio.go b/blockchain/utreexoio.go index 88c52449..fc0efee9 100644 --- a/blockchain/utreexoio.go +++ b/blockchain/utreexoio.go @@ -53,12 +53,7 @@ type NodesBackEnd struct { // InitNodesBackEnd returns a newly initialized NodesBackEnd which implements // utreexo.NodesInterface. -func InitNodesBackEnd(datadir string, maxTotalMemoryUsage int64) (*NodesBackEnd, error) { - db, err := leveldb.OpenFile(datadir, nil) - if err != nil { - return nil, err - } - +func InitNodesBackEnd(db *leveldb.DB, maxTotalMemoryUsage int64) (*NodesBackEnd, error) { cache, maxCacheElems := utreexobackends.NewNodesMapSlice(maxTotalMemoryUsage) nb := NodesBackEnd{ db: db, @@ -237,6 +232,11 @@ func (m *NodesBackEnd) ForEach(fn func(uint64, utreexo.Leaf) error) error { iter := m.db.NewIterator(nil, nil) for iter.Next() { + // If the itered key is chainhash.HashSize, it means that the entry is for nodesbackend. + // Skip it since it's not relevant here. + if len(iter.Key()) == 32 { + continue + } // Remember that the contents of the returned slice should not be modified, and // only valid until the next call to Next. k, _ := deserializeVLQ(iter.Key()) @@ -280,13 +280,6 @@ func (m *NodesBackEnd) Flush() { m.cache.ClearMaps() } -// Close flushes the cache and closes the underlying database. -func (m *NodesBackEnd) Close() error { - m.Flush() - - return m.db.Close() -} - var _ utreexo.CachedLeavesInterface = (*CachedLeavesBackEnd)(nil) // CachedLeavesBackEnd implements the CachedLeavesInterface interface. The cache assumes @@ -317,12 +310,7 @@ func (m *CachedLeavesBackEnd) dbGet(k utreexo.Hash) (uint64, bool) { // InitCachedLeavesBackEnd returns a newly initialized CachedLeavesBackEnd which implements // utreexo.CachedLeavesInterface. -func InitCachedLeavesBackEnd(datadir string, maxMemoryUsage int64) (*CachedLeavesBackEnd, error) { - db, err := leveldb.OpenFile(datadir, nil) - if err != nil { - return nil, err - } - +func InitCachedLeavesBackEnd(db *leveldb.DB, maxMemoryUsage int64) (*CachedLeavesBackEnd, error) { cache, maxCacheElem := utreexobackends.NewCachedLeavesMapSlice(maxMemoryUsage) return &CachedLeavesBackEnd{maxCacheElem: maxCacheElem, db: db, cache: cache}, nil } @@ -388,6 +376,11 @@ func (m *CachedLeavesBackEnd) ForEach(fn func(utreexo.Hash, uint64) error) error iter := m.db.NewIterator(nil, nil) for iter.Next() { + // If the itered key isn't chainhash.HashSize, it means that the entry is for nodesbackend. + // Skip it since it's not relevant here. + if len(iter.Key()) != chainhash.HashSize { + continue + } // Remember that the contents of the returned slice should not be modified, and // only valid until the next call to Next. k := iter.Key() @@ -413,9 +406,3 @@ func (m *CachedLeavesBackEnd) Flush() { m.cache.ClearMaps() } - -// Close flushes all the cached entries and then closes the underlying database. -func (m *CachedLeavesBackEnd) Close() error { - m.Flush() - return m.db.Close() -} diff --git a/blockchain/utreexoio_test.go b/blockchain/utreexoio_test.go index 33ced614..fdfed0a1 100644 --- a/blockchain/utreexoio_test.go +++ b/blockchain/utreexoio_test.go @@ -8,6 +8,7 @@ import ( "sync" "testing" + "github.com/syndtr/goleveldb/leveldb" "github.com/utreexo/utreexo" "github.com/utreexo/utreexod/blockchain/internal/utreexobackends" ) @@ -38,7 +39,11 @@ func TestCachedLeavesBackEnd(t *testing.T) { } for _, test := range tests { - cachedLeavesBackEnd, err := InitCachedLeavesBackEnd(test.tmpDir, test.maxMemUsage) + db, err := leveldb.OpenFile(test.tmpDir, nil) + if err != nil { + t.Fatal(err) + } + cachedLeavesBackEnd, err := InitCachedLeavesBackEnd(db, test.maxMemUsage) if err != nil { t.Fatal(err) } @@ -56,11 +61,17 @@ func TestCachedLeavesBackEnd(t *testing.T) { } // Close and reopen the backend. - err = cachedLeavesBackEnd.Close() + cachedLeavesBackEnd.Flush() + err = db.Close() + if err != nil { + t.Fatal(err) + } + + db, err = leveldb.OpenFile(test.tmpDir, nil) if err != nil { t.Fatal(err) } - cachedLeavesBackEnd, err = InitCachedLeavesBackEnd(test.tmpDir, test.maxMemUsage) + cachedLeavesBackEnd, err = InitCachedLeavesBackEnd(db, test.maxMemUsage) if err != nil { t.Fatal(err) } @@ -172,7 +183,11 @@ func TestNodesBackEnd(t *testing.T) { } for _, test := range tests { - nodesBackEnd, err := InitNodesBackEnd(test.tmpDir, test.maxMemUsage) + db, err := leveldb.OpenFile(test.tmpDir, nil) + if err != nil { + t.Fatal(err) + } + nodesBackEnd, err := InitNodesBackEnd(db, test.maxMemUsage) if err != nil { t.Fatal(err) } @@ -190,11 +205,17 @@ func TestNodesBackEnd(t *testing.T) { } // Close and reopen the backend. - err = nodesBackEnd.Close() + nodesBackEnd.Flush() + err = db.Close() + if err != nil { + t.Fatal(err) + } + + db, err = leveldb.OpenFile(test.tmpDir, nil) if err != nil { t.Fatal(err) } - nodesBackEnd, err = InitNodesBackEnd(test.tmpDir, test.maxMemUsage) + nodesBackEnd, err = InitNodesBackEnd(db, test.maxMemUsage) if err != nil { t.Fatal(err) } From 8b0f2da30090df75f230fd5d16fdcb988806897a Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Thu, 4 Jul 2024 16:07:36 +0900 Subject: [PATCH 03/50] blockchain: don't delete from the db directly for cachedleaves On delete, cached leaves backend would delete directly from the database. This behavior is changed and now the deletes are cached until a flush happens. This is a step towards achieving a recoverable accumulator state as now only flushes are able to modify the database. --- blockchain/utreexoio.go | 49 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/blockchain/utreexoio.go b/blockchain/utreexoio.go index fc0efee9..06b2b268 100644 --- a/blockchain/utreexoio.go +++ b/blockchain/utreexoio.go @@ -6,6 +6,7 @@ package blockchain import ( "fmt" + "math" "github.com/syndtr/goleveldb/leveldb" "github.com/utreexo/utreexo" @@ -325,6 +326,11 @@ func (m *CachedLeavesBackEnd) Get(k utreexo.Hash) (uint64, bool) { if !found { return m.dbGet(k) } + // Even if the entry was found, if the position value is math.MaxUint64, + // then it was already deleted. + if pos == math.MaxUint64 { + return 0, false + } return pos, found } @@ -352,8 +358,30 @@ func (m *CachedLeavesBackEnd) Put(k utreexo.Hash, v uint64) { // Delete removes the given key from the underlying map. No-op if the key // doesn't exist. func (m *CachedLeavesBackEnd) Delete(k utreexo.Hash) { - m.cache.Delete(k) - m.db.Delete(k[:], nil) + // Delete directly from the database if we don't cache anything. + if m.maxCacheElem == 0 { + err := m.db.Delete(k[:], nil) + if err != nil { + log.Warnf("CachedLeavesBackEnd delete fail. %v", err) + } + + return + } + + _, found := m.cache.Get(k) + if !found { + // Check if we need to flush as we'll be adding an entry to + // the cache. + if int64(m.cache.Length()) >= m.maxCacheElem { + m.Flush() + } + } + + // Try inserting, if it fails, it means we need to flush. + if !m.cache.Put(k, math.MaxUint64) { + m.Flush() + m.cache.Put(k, math.MaxUint64) + } } // Length returns the amount of items in the underlying db and the cache. @@ -397,10 +425,21 @@ func (m *CachedLeavesBackEnd) ForEach(fn func(utreexo.Hash, uint64) error) error // Flush resets the cache and saves all the key values onto the database. func (m *CachedLeavesBackEnd) Flush() { + if m.maxCacheElem == 0 { + return + } + m.cache.ForEach(func(k utreexo.Hash, v uint64) { - err := m.dbPut(k, v) - if err != nil { - log.Warnf("CachedLeavesBackEnd dbPut fail. %v", err) + if v == math.MaxUint64 { + err := m.db.Delete(k[:], nil) + if err != nil { + log.Warnf("CachedLeavesBackEnd delete fail. %v", err) + } + } else { + err := m.dbPut(k, v) + if err != nil { + log.Warnf("CachedLeavesBackEnd dbPut fail. %v", err) + } } }) From e9e4cd55c3a64dbf740edf5be532d1decea0b42c Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Fri, 5 Jul 2024 16:07:39 +0900 Subject: [PATCH 04/50] blockchain: separate out flush from db close db close was handling both the flushes and the database closes. These are now separated out into 2 different functions. --- blockchain/indexers/utreexobackend.go | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index f01abf86..25352c2f 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -47,6 +47,7 @@ type UtreexoState struct { config *UtreexoConfig state utreexo.Utreexo + flush func() closeDB func() error } @@ -170,6 +171,7 @@ func (idx *UtreexoProofIndex) FlushUtreexoState() error { return err } + idx.utreexoState.flush() return idx.utreexoState.closeDB() } @@ -191,6 +193,7 @@ func (idx *FlatUtreexoProofIndex) FlushUtreexoState() error { return err } + idx.utreexoState.flush() return idx.utreexoState.closeDB() } @@ -348,13 +351,15 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) } var closeDB func() error + var flush func() if maxMemoryUsage >= 0 { p.Nodes = nodesDB p.CachedLeaves = cachedLeavesDB - closeDB = func() error { + flush = func() { nodesDB.Flush() cachedLeavesDB.Flush() - + } + closeDB = func() error { return db.Close() } } else { @@ -377,7 +382,7 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) log.Infof("Finished loading the utreexo state from disk.") - closeDB = func() error { + flush = func() { log.Infof("Flushing the utreexo state to disk. May take a while...") p.Nodes.ForEach(func(k uint64, v utreexo.Leaf) error { @@ -391,7 +396,8 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) }) log.Infof("Finished flushing the utreexo state to disk.") - + } + closeDB = func() error { return db.Close() } } @@ -399,6 +405,7 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) uState := &UtreexoState{ config: cfg, state: &p, + flush: flush, closeDB: closeDB, } From 454abb00a63ac4d367777909b64bf4033ab2cc3d Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Fri, 5 Jul 2024 17:35:56 +0900 Subject: [PATCH 05/50] blockchain: add put and delete functions for nodes and cached leaves backends These newly added functions ask for leveldb txs and now many writes and deletes can be atomically written into the database. --- blockchain/utreexoio.go | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/blockchain/utreexoio.go b/blockchain/utreexoio.go index 06b2b268..4bb4e397 100644 --- a/blockchain/utreexoio.go +++ b/blockchain/utreexoio.go @@ -142,6 +142,16 @@ func (m *NodesBackEnd) Get(k uint64) (utreexo.Leaf, bool) { return leaf, true } +// NodesBackendPut puts a key-value pair in the given leveldb tx. +func NodesBackendPut(tx *leveldb.Transaction, k uint64, v utreexo.Leaf) error { + size := serializeSizeVLQ(k) + buf := make([]byte, size) + putVLQ(buf, k) + + serialized := serializeLeaf(v) + return tx.Put(buf[:], serialized[:], nil) +} + // Put puts the given position and the leaf to the underlying map. func (m *NodesBackEnd) Put(k uint64, v utreexo.Leaf) { if m.maxCacheElem == 0 { @@ -185,6 +195,14 @@ func (m *NodesBackEnd) Put(k uint64, v utreexo.Leaf) { } } +// NodesBackendDelete deletes the corresponding key-value pair from the given leveldb tx. +func NodesBackendDelete(tx *leveldb.Transaction, k uint64) error { + size := serializeSizeVLQ(k) + buf := make([]byte, size) + putVLQ(buf, k) + return tx.Delete(buf, nil) +} + // Delete removes the given key from the underlying map. No-op if the key // doesn't exist. func (m *NodesBackEnd) Delete(k uint64) { @@ -335,6 +353,14 @@ func (m *CachedLeavesBackEnd) Get(k utreexo.Hash) (uint64, bool) { return pos, found } +// CachedLeavesBackendPut puts a key-value pair in the given leveldb tx. +func CachedLeavesBackendPut(tx *leveldb.Transaction, k utreexo.Hash, v uint64) error { + size := serializeSizeVLQ(v) + buf := make([]byte, size) + putVLQ(buf, v) + return tx.Put(k[:], buf, nil) +} + // Put puts the given data to the underlying cache. If the cache is full, it evicts // the earliest entries to make room. func (m *CachedLeavesBackEnd) Put(k utreexo.Hash, v uint64) { From df12abe0e4ad16c78b456f03adbc11991decb980 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Fri, 5 Jul 2024 17:38:38 +0900 Subject: [PATCH 06/50] blockchain, indexers: make the flushes for bridges atomic The flush functions weren't atomic which meant that in an unexpected crash the node would be left in an unrecoverable state. The atomic write is a step closer to achieving recoverable accumulator state for bridges. --- blockchain/indexers/utreexobackend.go | 35 ++++++++++++++++++------ blockchain/utreexoio.go | 38 +++++++++++++++++++++++---- 2 files changed, 60 insertions(+), 13 deletions(-) diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index 25352c2f..c6a1def6 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -384,16 +384,35 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) flush = func() { log.Infof("Flushing the utreexo state to disk. May take a while...") - - p.Nodes.ForEach(func(k uint64, v utreexo.Leaf) error { - nodesDB.Put(k, v) - return nil + ldbTx, err := db.OpenTransaction() + if err != nil { + log.Warnf("flush error, failed to open leveldb tx. %v", err) + return + } + + err = p.Nodes.ForEach(func(k uint64, v utreexo.Leaf) error { + return blockchain.NodesBackendPut(ldbTx, k, v) }) - - p.CachedLeaves.ForEach(func(k utreexo.Hash, v uint64) error { - cachedLeavesDB.Put(k, v) - return nil + if err != nil { + ldbTx.Discard() + log.Warnf("flush error. %v", err) + return + } + + err = p.CachedLeaves.ForEach(func(k utreexo.Hash, v uint64) error { + return blockchain.CachedLeavesBackendPut(ldbTx, k, v) }) + if err != nil { + ldbTx.Discard() + log.Warnf("flush error. %v", err) + return + } + + err = ldbTx.Commit() + if err != nil { + log.Warnf("flush error, failed to commit leveldb tx. %v", err) + return + } log.Infof("Finished flushing the utreexo state to disk.") } diff --git a/blockchain/utreexoio.go b/blockchain/utreexoio.go index 4bb4e397..dfe01651 100644 --- a/blockchain/utreexoio.go +++ b/blockchain/utreexoio.go @@ -282,20 +282,34 @@ func (m *NodesBackEnd) Flush() { return } + ldbTx, err := m.db.OpenTransaction() + if err != nil { + log.Warnf("NodesBackEnd flush error. %v", err) + return + } m.cache.ForEach(func(k uint64, v utreexobackends.CachedLeaf) { if v.IsRemoved() { - err := m.dbDel(k) + err := NodesBackendDelete(ldbTx, k) if err != nil { + ldbTx.Discard() log.Warnf("NodesBackEnd flush error. %v", err) + return } } else if v.IsFresh() || v.IsModified() { - err := m.dbPut(k, v.Leaf) + err := NodesBackendPut(ldbTx, k, v.Leaf) if err != nil { + ldbTx.Discard() log.Warnf("NodesBackEnd flush error. %v", err) + return } } }) + err = ldbTx.Commit() + if err != nil { + log.Warnf("NodesBackEnd flush error. Failed to commit leveldb tx. %v", err) + } + m.cache.ClearMaps() } @@ -455,19 +469,33 @@ func (m *CachedLeavesBackEnd) Flush() { return } + ldbTx, err := m.db.OpenTransaction() + if err != nil { + log.Warnf("CachedLeavesBackEnd flush error. %v", err) + return + } m.cache.ForEach(func(k utreexo.Hash, v uint64) { if v == math.MaxUint64 { - err := m.db.Delete(k[:], nil) + err = ldbTx.Delete(k[:], nil) if err != nil { + ldbTx.Discard() log.Warnf("CachedLeavesBackEnd delete fail. %v", err) + return } } else { - err := m.dbPut(k, v) + err = CachedLeavesBackendPut(ldbTx, k, v) if err != nil { - log.Warnf("CachedLeavesBackEnd dbPut fail. %v", err) + ldbTx.Discard() + log.Warnf("CachedLeavesBackEnd put fail. %v", err) + return } } }) + err = ldbTx.Commit() + if err != nil { + log.Warnf("CachedLeavesBackEnd flush error. Failed to commit leveldb tx. %v", err) + } + m.cache.ClearMaps() } From bd2ef43a241c2b4962982f20c39c5a919edea3a8 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Fri, 5 Jul 2024 18:44:51 +0900 Subject: [PATCH 07/50] blockchain: never write directly to the database The writes to the database now only happen through flushes. Since flushes are atomic, now all the writes to the database are atomic. --- blockchain/utreexoio.go | 76 ------------------------------------ blockchain/utreexoio_test.go | 28 +------------ 2 files changed, 2 insertions(+), 102 deletions(-) diff --git a/blockchain/utreexoio.go b/blockchain/utreexoio.go index dfe01651..cc43cb6e 100644 --- a/blockchain/utreexoio.go +++ b/blockchain/utreexoio.go @@ -65,15 +65,6 @@ func InitNodesBackEnd(db *leveldb.DB, maxTotalMemoryUsage int64) (*NodesBackEnd, return &nb, nil } -// dbPut serializes and puts the key value pair into the database. -func (m *NodesBackEnd) dbPut(k uint64, v utreexo.Leaf) error { - var buf [vlqBufSize]byte - size := putVLQ(buf[:], k) - - serialized := serializeLeaf(v) - return m.db.Put(buf[:size], serialized[:], nil) -} - // dbGet fetches the value from the database and deserializes it and returns // the leaf value and a boolean for whether or not it was successful. func (m *NodesBackEnd) dbGet(k uint64) (utreexo.Leaf, bool) { @@ -94,19 +85,8 @@ func (m *NodesBackEnd) dbGet(k uint64) (utreexo.Leaf, bool) { return leaf, true } -// dbDel removes the key from the database. -func (m *NodesBackEnd) dbDel(k uint64) error { - var buf [vlqBufSize]byte - size := putVLQ(buf[:], k) - return m.db.Delete(buf[:size], nil) -} - // Get returns the leaf from the underlying map. func (m *NodesBackEnd) Get(k uint64) (utreexo.Leaf, bool) { - if m.maxCacheElem == 0 { - return m.dbGet(k) - } - // Look it up on the cache first. cLeaf, found := m.cache.Get(k) if found { @@ -154,15 +134,6 @@ func NodesBackendPut(tx *leveldb.Transaction, k uint64, v utreexo.Leaf) error { // Put puts the given position and the leaf to the underlying map. func (m *NodesBackEnd) Put(k uint64, v utreexo.Leaf) { - if m.maxCacheElem == 0 { - err := m.dbPut(k, v) - if err != nil { - log.Warnf("NodesBackEnd dbPut fail. %v", err) - } - - return - } - if int64(m.cache.Length()) > m.maxCacheElem { m.Flush() } @@ -206,15 +177,6 @@ func NodesBackendDelete(tx *leveldb.Transaction, k uint64) error { // Delete removes the given key from the underlying map. No-op if the key // doesn't exist. func (m *NodesBackEnd) Delete(k uint64) { - if m.maxCacheElem == 0 { - err := m.dbDel(k) - if err != nil { - log.Warnf("NodesBackEnd dbDel fail. %v", err) - } - - return - } - leaf, found := m.cache.Get(k) if !found { if int64(m.cache.Length()) >= m.maxCacheElem { @@ -278,10 +240,6 @@ func (m *NodesBackEnd) ForEach(fn func(uint64, utreexo.Leaf) error) error { // flush saves all the cached entries to disk and resets the cache map. func (m *NodesBackEnd) Flush() { - if m.maxCacheElem == 0 { - return - } - ldbTx, err := m.db.OpenTransaction() if err != nil { log.Warnf("NodesBackEnd flush error. %v", err) @@ -323,13 +281,6 @@ type CachedLeavesBackEnd struct { cache utreexobackends.CachedLeavesMapSlice } -// dbPut serializes and puts the key and the value into the database. -func (m *CachedLeavesBackEnd) dbPut(k utreexo.Hash, v uint64) error { - var buf [vlqBufSize]byte - size := putVLQ(buf[:], v) - return m.db.Put(k[:], buf[:size], nil) -} - // dbGet fetches and deserializes the value from the database. func (m *CachedLeavesBackEnd) dbGet(k utreexo.Hash) (uint64, bool) { val, err := m.db.Get(k[:], nil) @@ -350,10 +301,6 @@ func InitCachedLeavesBackEnd(db *leveldb.DB, maxMemoryUsage int64) (*CachedLeave // Get returns the data from the underlying cache or the database. func (m *CachedLeavesBackEnd) Get(k utreexo.Hash) (uint64, bool) { - if m.maxCacheElem == 0 { - return m.dbGet(k) - } - pos, found := m.cache.Get(k) if !found { return m.dbGet(k) @@ -378,15 +325,6 @@ func CachedLeavesBackendPut(tx *leveldb.Transaction, k utreexo.Hash, v uint64) e // Put puts the given data to the underlying cache. If the cache is full, it evicts // the earliest entries to make room. func (m *CachedLeavesBackEnd) Put(k utreexo.Hash, v uint64) { - if m.maxCacheElem == 0 { - err := m.dbPut(k, v) - if err != nil { - log.Warnf("CachedLeavesBackEnd dbPut fail. %v", err) - } - - return - } - length := m.cache.Length() if int64(length) >= m.maxCacheElem { m.Flush() @@ -398,16 +336,6 @@ func (m *CachedLeavesBackEnd) Put(k utreexo.Hash, v uint64) { // Delete removes the given key from the underlying map. No-op if the key // doesn't exist. func (m *CachedLeavesBackEnd) Delete(k utreexo.Hash) { - // Delete directly from the database if we don't cache anything. - if m.maxCacheElem == 0 { - err := m.db.Delete(k[:], nil) - if err != nil { - log.Warnf("CachedLeavesBackEnd delete fail. %v", err) - } - - return - } - _, found := m.cache.Get(k) if !found { // Check if we need to flush as we'll be adding an entry to @@ -465,10 +393,6 @@ func (m *CachedLeavesBackEnd) ForEach(fn func(utreexo.Hash, uint64) error) error // Flush resets the cache and saves all the key values onto the database. func (m *CachedLeavesBackEnd) Flush() { - if m.maxCacheElem == 0 { - return - } - ldbTx, err := m.db.OpenTransaction() if err != nil { log.Warnf("CachedLeavesBackEnd flush error. %v", err) diff --git a/blockchain/utreexoio_test.go b/blockchain/utreexoio_test.go index fdfed0a1..8f00f10d 100644 --- a/blockchain/utreexoio_test.go +++ b/blockchain/utreexoio_test.go @@ -20,19 +20,7 @@ func TestCachedLeavesBackEnd(t *testing.T) { }{ { tmpDir: func() string { - return filepath.Join(os.TempDir(), "TestCachedLeavesBackEnd0") - }(), - maxMemUsage: -1, - }, - { - tmpDir: func() string { - return filepath.Join(os.TempDir(), "TestCachedLeavesBackEnd1") - }(), - maxMemUsage: 0, - }, - { - tmpDir: func() string { - return filepath.Join(os.TempDir(), "TestCachedLeavesBackEnd2") + return filepath.Join(os.TempDir(), "TestCachedLeavesBackEnd") }(), maxMemUsage: 1 * 1024 * 1024, }, @@ -164,19 +152,7 @@ func TestNodesBackEnd(t *testing.T) { }{ { tmpDir: func() string { - return filepath.Join(os.TempDir(), "TestNodesBackEnd0") - }(), - maxMemUsage: -1, - }, - { - tmpDir: func() string { - return filepath.Join(os.TempDir(), "TestNodesBackEnd1") - }(), - maxMemUsage: 0, - }, - { - tmpDir: func() string { - return filepath.Join(os.TempDir(), "TestNodesBackEnd2") + return filepath.Join(os.TempDir(), "TestNodesBackEnd") }(), maxMemUsage: 1 * 1024 * 1024, }, From 8312cbdd0f04ae7e522f52ff9075df27069dd7c1 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Fri, 5 Jul 2024 18:59:28 +0900 Subject: [PATCH 08/50] indexers: update memory split between nodes and cached leaves backend Memory splits between the nodes and cached leaves backend is updated to 70/30 after monitoring how the memory is used during ibd. --- blockchain/indexers/utreexobackend.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index c6a1def6..8abc5c1c 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -316,9 +316,7 @@ func deserializeUndoBlock(serialized []byte) (uint64, []uint64, []utreexo.Hash, func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) (*UtreexoState, error) { p := utreexo.NewMapPollard(true) - // 60% of the memory for the nodes map, 40% for the cache leaves map. - // TODO Totally arbitrary, it there's something better than change it to that. - maxNodesMem := maxMemoryUsage * 6 / 10 + maxNodesMem := maxMemoryUsage * 7 / 10 maxCachedLeavesMem := maxMemoryUsage - maxNodesMem db, err := leveldb.OpenFile(basePath, nil) From a91c37887567d7a1045403f75ad4ed3b8b1c0c45 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Mon, 8 Jul 2024 15:51:25 +0900 Subject: [PATCH 09/50] utreexobackends: add overflow map to the cached leaves map The overflow map allows for entries to be added that exceed the amount that the map slice was originally allocated for. This trades off memory usage guarantees with the ability to not flush in the middle of modifying the accumulator. --- .../utreexobackends/cachedleavesmap.go | 34 +++++++++++++++++++ .../utreexobackends/cachedleavesmap_test.go | 9 +++-- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/blockchain/internal/utreexobackends/cachedleavesmap.go b/blockchain/internal/utreexobackends/cachedleavesmap.go index f18111bd..9adfbe62 100644 --- a/blockchain/internal/utreexobackends/cachedleavesmap.go +++ b/blockchain/internal/utreexobackends/cachedleavesmap.go @@ -26,6 +26,9 @@ type CachedLeavesMapSlice struct { // maps are the underlying maps in the slice of maps. maps []map[utreexo.Hash]uint64 + // overflow puts the overflowed entries. + overflow map[utreexo.Hash]uint64 + // maxEntries is the maximum amount of elemnts that the map is allocated for. maxEntries []int @@ -46,6 +49,8 @@ func (ms *CachedLeavesMapSlice) Length() int { l += len(m) } + l += len(ms.overflow) + return l } @@ -67,6 +72,13 @@ func (ms *CachedLeavesMapSlice) Get(k utreexo.Hash) (uint64, bool) { } } + if len(ms.overflow) > 0 { + v, found = ms.overflow[k] + if found { + return v, found + } + } + return 0, false } @@ -88,6 +100,14 @@ func (ms *CachedLeavesMapSlice) Put(k utreexo.Hash, v uint64) bool { } } + if len(ms.overflow) > 0 { + _, found := ms.overflow[k] + if found { + ms.overflow[k] = v + return true + } + } + for i, maxNum := range ms.maxEntries { m := ms.maps[i] if len(m) >= maxNum { @@ -101,6 +121,8 @@ func (ms *CachedLeavesMapSlice) Put(k utreexo.Hash, v uint64) bool { return true // Return as we were successful in adding the entry. } + ms.overflow[k] = v + // We only reach this code if we've failed to insert into the map above as // all the current maps were full. return false @@ -117,6 +139,8 @@ func (ms *CachedLeavesMapSlice) Delete(k utreexo.Hash) { for i := 0; i < len(ms.maps); i++ { delete(ms.maps[i], k) } + + delete(ms.overflow, k) } // DeleteMaps deletes all maps and allocate new ones with the maxEntries defined in @@ -131,6 +155,8 @@ func (ms *CachedLeavesMapSlice) DeleteMaps() { for i := range ms.maxEntries { ms.maps[i] = make(map[utreexo.Hash]uint64, ms.maxEntries[i]) } + + ms.overflow = make(map[utreexo.Hash]uint64) } // ClearMaps clears all maps @@ -159,6 +185,12 @@ func (ms *CachedLeavesMapSlice) ForEach(fn func(utreexo.Hash, uint64)) { fn(k, v) } } + + if len(ms.overflow) > 0 { + for k, v := range ms.overflow { + fn(k, v) + } + } } // createMaps creates a slice of maps and returns the total count that the maps @@ -187,6 +219,8 @@ func (ms *CachedLeavesMapSlice) createMaps(maxMemoryUsage int64) int64 { ms.maps[i] = make(map[utreexo.Hash]uint64, ms.maxEntries[i]) } + ms.overflow = make(map[utreexo.Hash]uint64) + return int64(totalElemCount) } diff --git a/blockchain/internal/utreexobackends/cachedleavesmap_test.go b/blockchain/internal/utreexobackends/cachedleavesmap_test.go index 50735df0..f4fb8c11 100644 --- a/blockchain/internal/utreexobackends/cachedleavesmap_test.go +++ b/blockchain/internal/utreexobackends/cachedleavesmap_test.go @@ -62,7 +62,7 @@ func TestCachedLeaveMapSliceDuplicates(t *testing.T) { } // Make sure the length of the map is 1 less than the max elems. - if m.Length() != int(maxElems)-1 { + if m.Length()-len(m.overflow) != int(maxElems)-1 { t.Fatalf("expected length of %v but got %v", maxElems-1, m.Length()) } @@ -71,8 +71,13 @@ func TestCachedLeaveMapSliceDuplicates(t *testing.T) { if !m.Put(uint64ToHash(0), 0) { t.Fatalf("didn't expect error but unsuccessfully called put") } - if m.Length() != int(maxElems) { + if m.Length()-len(m.overflow) != int(maxElems) { t.Fatalf("expected length of %v but got %v", maxElems, m.Length()) } + + if len(m.overflow) != 1 { + t.Fatalf("expected length of %v but got %v", + 1, len(m.overflow)) + } } From 8160618dc8d9ac7f45477dd466e55909d92b114a Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Mon, 8 Jul 2024 16:03:59 +0900 Subject: [PATCH 10/50] utreexobackends: add overflow map to nodesmap The overflow map allows for entries to be added that exceed the amount that the map slice was originally allocated for. This trades off memory usage guarantees with the ability to not flush in the middle of modifying the accumulator. --- .../internal/utreexobackends/nodesmap.go | 36 ++++++++++++++++++- .../internal/utreexobackends/nodesmap_test.go | 9 +++-- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/blockchain/internal/utreexobackends/nodesmap.go b/blockchain/internal/utreexobackends/nodesmap.go index 0c2a753c..80c07035 100644 --- a/blockchain/internal/utreexobackends/nodesmap.go +++ b/blockchain/internal/utreexobackends/nodesmap.go @@ -64,6 +64,9 @@ type NodesMapSlice struct { // maps are the underlying maps in the slice of maps. maps []map[uint64]CachedLeaf + // overflow puts the overflowed entries. + overflow map[uint64]CachedLeaf + // maxEntries is the maximum amount of elemnts that the map is allocated for. maxEntries []int @@ -84,6 +87,8 @@ func (ms *NodesMapSlice) Length() int { l += len(m) } + l += len(ms.overflow) + return l } @@ -105,7 +110,14 @@ func (ms *NodesMapSlice) Get(k uint64) (CachedLeaf, bool) { } } - return v, found + if len(ms.overflow) > 0 { + v, found = ms.overflow[k] + if found { + return v, found + } + } + + return v, false } // put puts the keys and the values into one of the maps in the map slice. If the @@ -126,6 +138,14 @@ func (ms *NodesMapSlice) Put(k uint64, v CachedLeaf) bool { } } + if len(ms.overflow) > 0 { + _, found := ms.overflow[k] + if found { + ms.overflow[k] = v + return true + } + } + for i, maxNum := range ms.maxEntries { m := ms.maps[i] if len(m) >= maxNum { @@ -139,6 +159,8 @@ func (ms *NodesMapSlice) Put(k uint64, v CachedLeaf) bool { return true // Return as we were successful in adding the entry. } + ms.overflow[k] = v + // We only reach this code if we've failed to insert into the map above as // all the current maps were full. return false @@ -155,6 +177,8 @@ func (ms *NodesMapSlice) delete(k uint64) { for i := 0; i < len(ms.maps); i++ { delete(ms.maps[i], k) } + + delete(ms.overflow, k) } // DeleteMaps deletes all maps and allocate new ones with the maxEntries defined in @@ -168,6 +192,8 @@ func (ms *NodesMapSlice) DeleteMaps() { for i := range ms.maxEntries { ms.maps[i] = make(map[uint64]CachedLeaf, ms.maxEntries[i]) } + + ms.overflow = make(map[uint64]CachedLeaf) } // ClearMaps clears all maps @@ -196,6 +222,12 @@ func (ms *NodesMapSlice) ForEach(fn func(uint64, CachedLeaf)) { fn(k, v) } } + + if len(ms.overflow) > 0 { + for k, v := range ms.overflow { + fn(k, v) + } + } } // createMaps creates a slice of maps and returns the total count that the maps @@ -224,6 +256,8 @@ func (ms *NodesMapSlice) createMaps(maxMemoryUsage int64) int64 { ms.maps[i] = make(map[uint64]CachedLeaf, ms.maxEntries[i]) } + ms.overflow = make(map[uint64]CachedLeaf) + return int64(totalElemCount) } diff --git a/blockchain/internal/utreexobackends/nodesmap_test.go b/blockchain/internal/utreexobackends/nodesmap_test.go index de962924..19230c61 100644 --- a/blockchain/internal/utreexobackends/nodesmap_test.go +++ b/blockchain/internal/utreexobackends/nodesmap_test.go @@ -50,7 +50,7 @@ func TestNodesMapSliceDuplicates(t *testing.T) { } // Make sure the length of the map is 1 less than the max elems. - if m.Length() != int(maxElems)-1 { + if m.Length()-len(m.overflow) != int(maxElems)-1 { t.Fatalf("expected length of %v but got %v", maxElems-1, m.Length()) } @@ -59,8 +59,13 @@ func TestNodesMapSliceDuplicates(t *testing.T) { if !m.Put(0, CachedLeaf{}) { t.Fatalf("didn't expect error but unsuccessfully called put") } - if m.Length() != int(maxElems) { + if m.Length()-len(m.overflow) != int(maxElems) { t.Fatalf("expected length of %v but got %v", maxElems, m.Length()) } + + if len(m.overflow) != 1 { + t.Fatalf("expected length of %v but got %v", + 1, len(m.overflow)) + } } From a714247a4b63015fd291551d2e240ff230e7def8 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Mon, 8 Jul 2024 19:19:50 +0900 Subject: [PATCH 11/50] indexers, main: separate out flushing and closing for utreexostates UtreexoState flush used to flush and close the database. Now the closing of the database is separated from the flushing so that the flush function can be used elsewhere besides when the node is shutting down. --- blockchain/indexers/manager.go | 8 ++++---- blockchain/indexers/utreexobackend.go | 12 ++++++++++++ server.go | 4 ++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/blockchain/indexers/manager.go b/blockchain/indexers/manager.go index 45437d7f..6166d147 100644 --- a/blockchain/indexers/manager.go +++ b/blockchain/indexers/manager.go @@ -472,12 +472,12 @@ func (m *Manager) Init(chain *blockchain.BlockChain, interrupt <-chan struct{}) for _, indexer := range m.enabledIndexes { switch idxType := indexer.(type) { case *UtreexoProofIndex: - err := idxType.FlushUtreexoState() + err := idxType.CloseUtreexoState() if err != nil { log.Errorf("Error while flushing utreexo state: %v", err) } case *FlatUtreexoProofIndex: - err := idxType.FlushUtreexoState() + err := idxType.CloseUtreexoState() if err != nil { log.Errorf("Error while flushing utreexo state for flat utreexo proof index: %v", err) } @@ -523,12 +523,12 @@ func (m *Manager) Init(chain *blockchain.BlockChain, interrupt <-chan struct{}) for _, indexer := range m.enabledIndexes { switch idxType := indexer.(type) { case *UtreexoProofIndex: - err := idxType.FlushUtreexoState() + err := idxType.CloseUtreexoState() if err != nil { log.Errorf("Error while flushing utreexo state: %v", err) } case *FlatUtreexoProofIndex: - err := idxType.FlushUtreexoState() + err := idxType.CloseUtreexoState() if err != nil { log.Errorf("Error while flushing utreexo state for flat utreexo proof index: %v", err) } diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index 8abc5c1c..9544a3b6 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -172,6 +172,12 @@ func (idx *UtreexoProofIndex) FlushUtreexoState() error { } idx.utreexoState.flush() + return nil +} + +// CloseUtreexoState flushes and closes the utreexo database state. +func (idx *UtreexoProofIndex) CloseUtreexoState() error { + idx.FlushUtreexoState() return idx.utreexoState.closeDB() } @@ -194,6 +200,12 @@ func (idx *FlatUtreexoProofIndex) FlushUtreexoState() error { } idx.utreexoState.flush() + return nil +} + +// CloseUtreexoState flushes and closes the utreexo database state. +func (idx *FlatUtreexoProofIndex) CloseUtreexoState() error { + idx.FlushUtreexoState() return idx.utreexoState.closeDB() } diff --git a/server.go b/server.go index 74fcc96f..cccdde27 100644 --- a/server.go +++ b/server.go @@ -2574,7 +2574,7 @@ out: // If utreexoProofIndex option is on, flush it after closing down syncManager. if s.utreexoProofIndex != nil { - err := s.utreexoProofIndex.FlushUtreexoState() + err := s.utreexoProofIndex.CloseUtreexoState() if err != nil { btcdLog.Errorf("Error while flushing utreexo state: %v", err) } @@ -2582,7 +2582,7 @@ out: // If flatUtreexoProofIndex option is on, flush it after closing down syncManager. if s.flatUtreexoProofIndex != nil { - err := s.flatUtreexoProofIndex.FlushUtreexoState() + err := s.flatUtreexoProofIndex.CloseUtreexoState() if err != nil { btcdLog.Errorf("Error while flushing utreexo state: %v", err) } From 4c333a2c2290fde370171e4c88e8bc536d620888 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Tue, 9 Jul 2024 13:53:45 +0900 Subject: [PATCH 12/50] utreexobackends: add Overflowed() method to the map slices The overflowed method allows for callers to check if the node maps and the cached leaves slice is overallocated. --- blockchain/internal/utreexobackends/cachedleavesmap.go | 5 +++++ blockchain/internal/utreexobackends/nodesmap.go | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/blockchain/internal/utreexobackends/cachedleavesmap.go b/blockchain/internal/utreexobackends/cachedleavesmap.go index 9adfbe62..6641e8ec 100644 --- a/blockchain/internal/utreexobackends/cachedleavesmap.go +++ b/blockchain/internal/utreexobackends/cachedleavesmap.go @@ -224,6 +224,11 @@ func (ms *CachedLeavesMapSlice) createMaps(maxMemoryUsage int64) int64 { return int64(totalElemCount) } +// Overflowed returns true if the map slice overflowed. +func (ms *CachedLeavesMapSlice) Overflowed() bool { + return len(ms.overflow) > 0 +} + // NewCachedLeavesMapSlice returns a new CachedLeavesMapSlice and the total amount of elements // that the map slice can accomodate. func NewCachedLeavesMapSlice(maxTotalMemoryUsage int64) (CachedLeavesMapSlice, int64) { diff --git a/blockchain/internal/utreexobackends/nodesmap.go b/blockchain/internal/utreexobackends/nodesmap.go index 80c07035..461970dc 100644 --- a/blockchain/internal/utreexobackends/nodesmap.go +++ b/blockchain/internal/utreexobackends/nodesmap.go @@ -261,6 +261,11 @@ func (ms *NodesMapSlice) createMaps(maxMemoryUsage int64) int64 { return int64(totalElemCount) } +// Overflowed returns true if the map slice overflowed. +func (ms *NodesMapSlice) Overflowed() bool { + return len(ms.overflow) > 0 +} + // NewNodesMapSlice returns a new NodesMapSlice and the total amount of elements // that the map slice can accomodate. func NewNodesMapSlice(maxTotalMemoryUsage int64) (NodesMapSlice, int64) { From 10c929ff8cc2fa23139b5d264f719bdc59cd8ed3 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Tue, 9 Jul 2024 14:25:06 +0900 Subject: [PATCH 13/50] blockchain: don't flush on put, get, and delete Getting rid of flushes on put, get and delete. This is another step closer to making the accumulator state recoverable on crashes. --- blockchain/utreexoio.go | 62 ++++++----------------------------------- 1 file changed, 9 insertions(+), 53 deletions(-) diff --git a/blockchain/utreexoio.go b/blockchain/utreexoio.go index cc43cb6e..8aa31b93 100644 --- a/blockchain/utreexoio.go +++ b/blockchain/utreexoio.go @@ -95,12 +95,7 @@ func (m *NodesBackEnd) Get(k uint64) (utreexo.Leaf, bool) { return utreexo.Leaf{}, false } - // If the cache is full, flush the cache then Put - // the leaf in. - if !m.cache.Put(k, cLeaf) { - m.Flush() - m.cache.Put(k, cLeaf) - } + m.cache.Put(k, cLeaf) // If we found it, return here. return cLeaf.Leaf, true @@ -115,10 +110,8 @@ func (m *NodesBackEnd) Get(k uint64) (utreexo.Leaf, bool) { } // Cache the leaf before returning it. - if !m.cache.Put(k, utreexobackends.CachedLeaf{Leaf: leaf}) { - m.Flush() - m.cache.Put(k, utreexobackends.CachedLeaf{Leaf: leaf}) - } + m.cache.Put(k, utreexobackends.CachedLeaf{Leaf: leaf}) + return leaf, true } @@ -134,10 +127,6 @@ func NodesBackendPut(tx *leveldb.Transaction, k uint64, v utreexo.Leaf) error { // Put puts the given position and the leaf to the underlying map. func (m *NodesBackEnd) Put(k uint64, v utreexo.Leaf) { - if int64(m.cache.Length()) > m.maxCacheElem { - m.Flush() - } - leaf, found := m.cache.Get(k) if found { leaf.Flags &^= utreexobackends.Removed @@ -146,11 +135,7 @@ func (m *NodesBackEnd) Put(k uint64, v utreexo.Leaf) { Flags: leaf.Flags | utreexobackends.Modified, } - // It shouldn't fail here but handle it anyways. - if !m.cache.Put(k, l) { - m.Flush() - m.cache.Put(k, l) - } + m.cache.Put(k, l) } else { // If the key isn't found, mark it as fresh. l := utreexobackends.CachedLeaf{ @@ -158,11 +143,7 @@ func (m *NodesBackEnd) Put(k uint64, v utreexo.Leaf) { Flags: utreexobackends.Fresh, } - // It shouldn't fail here but handle it anyways. - if !m.cache.Put(k, l) { - m.Flush() - m.cache.Put(k, l) - } + m.cache.Put(k, l) } } @@ -177,20 +158,13 @@ func NodesBackendDelete(tx *leveldb.Transaction, k uint64) error { // Delete removes the given key from the underlying map. No-op if the key // doesn't exist. func (m *NodesBackEnd) Delete(k uint64) { - leaf, found := m.cache.Get(k) - if !found { - if int64(m.cache.Length()) >= m.maxCacheElem { - m.Flush() - } - } + leaf, _ := m.cache.Get(k) l := utreexobackends.CachedLeaf{ Leaf: leaf.Leaf, Flags: leaf.Flags | utreexobackends.Removed, } - if !m.cache.Put(k, l) { - m.Flush() - m.cache.Put(k, l) - } + + m.cache.Put(k, l) } // Length returns the amount of items in the underlying database. @@ -325,31 +299,13 @@ func CachedLeavesBackendPut(tx *leveldb.Transaction, k utreexo.Hash, v uint64) e // Put puts the given data to the underlying cache. If the cache is full, it evicts // the earliest entries to make room. func (m *CachedLeavesBackEnd) Put(k utreexo.Hash, v uint64) { - length := m.cache.Length() - if int64(length) >= m.maxCacheElem { - m.Flush() - } - m.cache.Put(k, v) } // Delete removes the given key from the underlying map. No-op if the key // doesn't exist. func (m *CachedLeavesBackEnd) Delete(k utreexo.Hash) { - _, found := m.cache.Get(k) - if !found { - // Check if we need to flush as we'll be adding an entry to - // the cache. - if int64(m.cache.Length()) >= m.maxCacheElem { - m.Flush() - } - } - - // Try inserting, if it fails, it means we need to flush. - if !m.cache.Put(k, math.MaxUint64) { - m.Flush() - m.cache.Put(k, math.MaxUint64) - } + m.cache.Put(k, math.MaxUint64) } // Length returns the amount of items in the underlying db and the cache. From a8bd6f38fbade633164f8908cc7381384845cde9 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Tue, 9 Jul 2024 16:42:58 +0900 Subject: [PATCH 14/50] blockchain: don't flush on length and foreach Getting rid of flushes on length and foreach. This is another step closer to making the accumulator state recoverable on crashes. --- blockchain/utreexoio.go | 75 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 68 insertions(+), 7 deletions(-) diff --git a/blockchain/utreexoio.go b/blockchain/utreexoio.go index 8aa31b93..b64300c1 100644 --- a/blockchain/utreexoio.go +++ b/blockchain/utreexoio.go @@ -169,11 +169,28 @@ func (m *NodesBackEnd) Delete(k uint64) { // Length returns the amount of items in the underlying database. func (m *NodesBackEnd) Length() int { - m.Flush() - length := 0 + m.cache.ForEach(func(u uint64, cl utreexobackends.CachedLeaf) { + // Only count the entry if it's not removed and it's not already + // in the database. + if !cl.IsRemoved() && cl.IsFresh() { + length++ + } + }) + iter := m.db.NewIterator(nil, nil) for iter.Next() { + // If the itered key is chainhash.HashSize, it means that the entry is for nodesbackend. + // Skip it since it's not relevant here. + if len(iter.Key()) == 32 { + continue + } + k, _ := deserializeVLQ(iter.Key()) + val, found := m.cache.Get(k) + if found && val.IsRemoved() { + // Skip if the key-value pair has already been removed in the cache. + continue + } length++ } iter.Release() @@ -183,7 +200,13 @@ func (m *NodesBackEnd) Length() int { // ForEach calls the given function for each of the elements in the underlying map. func (m *NodesBackEnd) ForEach(fn func(uint64, utreexo.Leaf) error) error { - m.Flush() + m.cache.ForEach(func(u uint64, cl utreexobackends.CachedLeaf) { + // Only operate on the entry if it's not removed and it's not already + // in the database. + if !cl.IsRemoved() && cl.IsFresh() { + fn(u, cl.Leaf) + } + }) iter := m.db.NewIterator(nil, nil) for iter.Next() { @@ -195,6 +218,11 @@ func (m *NodesBackEnd) ForEach(fn func(uint64, utreexo.Leaf) error) error { // Remember that the contents of the returned slice should not be modified, and // only valid until the next call to Next. k, _ := deserializeVLQ(iter.Key()) + val, found := m.cache.Get(k) + if found && val.IsRemoved() { + // Skip if the key-value pair has already been removed in the cache. + continue + } value := iter.Value() if len(value) != leafLength { @@ -310,11 +338,31 @@ func (m *CachedLeavesBackEnd) Delete(k utreexo.Hash) { // Length returns the amount of items in the underlying db and the cache. func (m *CachedLeavesBackEnd) Length() int { - m.Flush() - length := 0 + m.cache.ForEach(func(k utreexo.Hash, v uint64) { + // Only operate on the entry if it's not removed and it's not already + // in the database. + if v != math.MaxUint64 { + _, found := m.dbGet(k) + if !found { + length++ + } + } + }) iter := m.db.NewIterator(nil, nil) for iter.Next() { + // If the itered key is chainhash.HashSize, it means that the entry is for nodesbackend. + // Skip it since it's not relevant here. + if len(iter.Key()) != chainhash.HashSize { + continue + } + k := iter.Key() + val, found := m.cache.Get(*(*[chainhash.HashSize]byte)(k)) + if found && val == math.MaxUint64 { + // Skip if the key-value pair has already been removed in the cache. + continue + } + length++ } iter.Release() @@ -324,8 +372,16 @@ func (m *CachedLeavesBackEnd) Length() int { // ForEach calls the given function for each of the elements in the underlying map. func (m *CachedLeavesBackEnd) ForEach(fn func(utreexo.Hash, uint64) error) error { - m.Flush() - + m.cache.ForEach(func(k utreexo.Hash, v uint64) { + // Only operate on the entry if it's not removed and it's not already + // in the database. + if v != math.MaxUint64 { + _, found := m.dbGet(k) + if !found { + fn(k, v) + } + } + }) iter := m.db.NewIterator(nil, nil) for iter.Next() { // If the itered key isn't chainhash.HashSize, it means that the entry is for nodesbackend. @@ -336,6 +392,11 @@ func (m *CachedLeavesBackEnd) ForEach(fn func(utreexo.Hash, uint64) error) error // Remember that the contents of the returned slice should not be modified, and // only valid until the next call to Next. k := iter.Key() + val, found := m.cache.Get(*(*[chainhash.HashSize]byte)(k)) + if found && val == math.MaxUint64 { + // Skip if the key-value pair has already been removed in the cache. + continue + } v, _ := deserializeVLQ(iter.Value()) err := fn(*(*[chainhash.HashSize]byte)(k), v) From ea178d75b60cd8716ee869c760e4d96f47323e9c Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Tue, 9 Jul 2024 17:12:26 +0900 Subject: [PATCH 15/50] indexers, blockchain: require leveldb transaction on Flush For cached leaves backend and nodes backend, the flush functions created their own leveldb transaction and wrote to the database. This meant that one couldn't guarantee atomicity when flushing the utreexo accumulator. Requiring the leveldb transaction allows for the caller to generate a single leveldb transaction and guarantee atomicity. --- blockchain/indexers/utreexobackend.go | 16 +++++++++++++-- blockchain/utreexoio.go | 28 ++++----------------------- blockchain/utreexoio_test.go | 20 +++++++++++++++++-- 3 files changed, 36 insertions(+), 28 deletions(-) diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index 9544a3b6..bcced76d 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -366,8 +366,20 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) p.Nodes = nodesDB p.CachedLeaves = cachedLeavesDB flush = func() { - nodesDB.Flush() - cachedLeavesDB.Flush() + log.Infof("Flushing the utreexo state to disk...") + ldbTx, err := db.OpenTransaction() + if err != nil { + log.Warnf("error while opening transaction. %v", err) + } + + nodesDB.Flush(ldbTx) + cachedLeavesDB.Flush(ldbTx) + + err = ldbTx.Commit() + if err != nil { + log.Warnf("error while committing transaction. %v", err) + } + log.Infof("Finished flushing the utreexo state to disk.") } closeDB = func() error { return db.Close() diff --git a/blockchain/utreexoio.go b/blockchain/utreexoio.go index b64300c1..3ac8d4a8 100644 --- a/blockchain/utreexoio.go +++ b/blockchain/utreexoio.go @@ -241,12 +241,7 @@ func (m *NodesBackEnd) ForEach(fn func(uint64, utreexo.Leaf) error) error { } // flush saves all the cached entries to disk and resets the cache map. -func (m *NodesBackEnd) Flush() { - ldbTx, err := m.db.OpenTransaction() - if err != nil { - log.Warnf("NodesBackEnd flush error. %v", err) - return - } +func (m *NodesBackEnd) Flush(ldbTx *leveldb.Transaction) { m.cache.ForEach(func(k uint64, v utreexobackends.CachedLeaf) { if v.IsRemoved() { err := NodesBackendDelete(ldbTx, k) @@ -265,11 +260,6 @@ func (m *NodesBackEnd) Flush() { } }) - err = ldbTx.Commit() - if err != nil { - log.Warnf("NodesBackEnd flush error. Failed to commit leveldb tx. %v", err) - } - m.cache.ClearMaps() } @@ -409,22 +399,17 @@ func (m *CachedLeavesBackEnd) ForEach(fn func(utreexo.Hash, uint64) error) error } // Flush resets the cache and saves all the key values onto the database. -func (m *CachedLeavesBackEnd) Flush() { - ldbTx, err := m.db.OpenTransaction() - if err != nil { - log.Warnf("CachedLeavesBackEnd flush error. %v", err) - return - } +func (m *CachedLeavesBackEnd) Flush(ldbTx *leveldb.Transaction) { m.cache.ForEach(func(k utreexo.Hash, v uint64) { if v == math.MaxUint64 { - err = ldbTx.Delete(k[:], nil) + err := ldbTx.Delete(k[:], nil) if err != nil { ldbTx.Discard() log.Warnf("CachedLeavesBackEnd delete fail. %v", err) return } } else { - err = CachedLeavesBackendPut(ldbTx, k, v) + err := CachedLeavesBackendPut(ldbTx, k, v) if err != nil { ldbTx.Discard() log.Warnf("CachedLeavesBackEnd put fail. %v", err) @@ -433,10 +418,5 @@ func (m *CachedLeavesBackEnd) Flush() { } }) - err = ldbTx.Commit() - if err != nil { - log.Warnf("CachedLeavesBackEnd flush error. Failed to commit leveldb tx. %v", err) - } - m.cache.ClearMaps() } diff --git a/blockchain/utreexoio_test.go b/blockchain/utreexoio_test.go index 8f00f10d..e0f1361f 100644 --- a/blockchain/utreexoio_test.go +++ b/blockchain/utreexoio_test.go @@ -48,8 +48,16 @@ func TestCachedLeavesBackEnd(t *testing.T) { cachedLeavesBackEnd.Put(hash, i) } + ldbTx, err := db.OpenTransaction() + if err != nil { + t.Fatal(err) + } // Close and reopen the backend. - cachedLeavesBackEnd.Flush() + cachedLeavesBackEnd.Flush(ldbTx) + err = ldbTx.Commit() + if err != nil { + t.Fatal(err) + } err = db.Close() if err != nil { t.Fatal(err) @@ -180,8 +188,16 @@ func TestNodesBackEnd(t *testing.T) { nodesBackEnd.Put(i, utreexo.Leaf{Hash: hash}) } + ldbTx, err := db.OpenTransaction() + if err != nil { + t.Fatal(err) + } // Close and reopen the backend. - nodesBackEnd.Flush() + nodesBackEnd.Flush(ldbTx) + err = ldbTx.Commit() + if err != nil { + t.Fatal(err) + } err = db.Close() if err != nil { t.Fatal(err) From 3d608f293b9d543f7732c2ed366592e93aaacc4b Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Tue, 9 Jul 2024 17:50:03 +0900 Subject: [PATCH 16/50] indexers, blockchain: add isFlushNeeded for utreexo indexes The caches used for utreexo indexes can now overflow. The newly added method allows for the caller to check if the caches have overflowed and thus require a flush. --- blockchain/indexers/utreexobackend.go | 20 ++++++++++++++------ blockchain/utreexoio.go | 10 ++++++++++ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index bcced76d..9ff6130c 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -47,8 +47,9 @@ type UtreexoState struct { config *UtreexoConfig state utreexo.Utreexo - flush func() - closeDB func() error + isFlushNeeded func() bool + flush func() + closeDB func() error } // utreexoBasePath returns the base path of where the utreexo state should be @@ -362,6 +363,7 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) var closeDB func() error var flush func() + var isFlushNeeded func() bool if maxMemoryUsage >= 0 { p.Nodes = nodesDB p.CachedLeaves = cachedLeavesDB @@ -381,6 +383,11 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) } log.Infof("Finished flushing the utreexo state to disk.") } + isFlushNeeded = func() bool { + nodesNeedsFlush := nodesDB.IsFlushNeeded() + leavesNeedsFlush := cachedLeavesDB.IsFlushNeeded() + return nodesNeedsFlush && leavesNeedsFlush + } closeDB = func() error { return db.Close() } @@ -444,10 +451,11 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) } uState := &UtreexoState{ - config: cfg, - state: &p, - flush: flush, - closeDB: closeDB, + config: cfg, + state: &p, + isFlushNeeded: isFlushNeeded, + flush: flush, + closeDB: closeDB, } return uState, err diff --git a/blockchain/utreexoio.go b/blockchain/utreexoio.go index 3ac8d4a8..44f65061 100644 --- a/blockchain/utreexoio.go +++ b/blockchain/utreexoio.go @@ -240,6 +240,11 @@ func (m *NodesBackEnd) ForEach(fn func(uint64, utreexo.Leaf) error) error { return iter.Error() } +// IsFlushNeeded returns true if the backend needs to be flushed. +func (m *NodesBackEnd) IsFlushNeeded() bool { + return m.cache.Overflowed() +} + // flush saves all the cached entries to disk and resets the cache map. func (m *NodesBackEnd) Flush(ldbTx *leveldb.Transaction) { m.cache.ForEach(func(k uint64, v utreexobackends.CachedLeaf) { @@ -398,6 +403,11 @@ func (m *CachedLeavesBackEnd) ForEach(fn func(utreexo.Hash, uint64) error) error return iter.Error() } +// IsFlushNeeded returns true if the backend needs to be flushed. +func (m *CachedLeavesBackEnd) IsFlushNeeded() bool { + return m.cache.Overflowed() +} + // Flush resets the cache and saves all the key values onto the database. func (m *CachedLeavesBackEnd) Flush(ldbTx *leveldb.Transaction) { m.cache.ForEach(func(k utreexo.Hash, v uint64) { From eb79f3bd20ad0005d150cc0a7738fe83dd9ef71e Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Tue, 9 Jul 2024 17:53:03 +0900 Subject: [PATCH 17/50] indexers: add and call flushIfNeeded method FlushIfNeeded only flushes the utreexo indexes if the cache is full. It's called at the end of every block connect. --- blockchain/indexers/flatutreexoproofindex.go | 1 + blockchain/indexers/utreexobackend.go | 14 ++++++++++++++ blockchain/indexers/utreexoproofindex.go | 1 + 3 files changed, 16 insertions(+) diff --git a/blockchain/indexers/flatutreexoproofindex.go b/blockchain/indexers/flatutreexoproofindex.go index 72a77463..32601352 100644 --- a/blockchain/indexers/flatutreexoproofindex.go +++ b/blockchain/indexers/flatutreexoproofindex.go @@ -317,6 +317,7 @@ func (idx *FlatUtreexoProofIndex) ConnectBlock(dbTx database.Tx, block *btcutil. if err != nil { return err } + idx.FlushUtreexoStateIfNeeded() // Don't store proofs if the node is pruned. if idx.pruned { diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index 9ff6130c..65c39f94 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -154,6 +154,13 @@ func (idx *FlatUtreexoProofIndex) FetchUtreexoState(blockHeight int32) ([]*chain return chainhashRoots, stump.NumLeaves, nil } +// FlushUtreexoStateIfNeeded flushes the utreexo state only if the cache is full. +func (idx *UtreexoProofIndex) FlushUtreexoStateIfNeeded() { + if idx.utreexoState.isFlushNeeded() { + idx.FlushUtreexoState() + } +} + // FlushUtreexoState saves the utreexo state to disk. func (idx *UtreexoProofIndex) FlushUtreexoState() error { basePath := utreexoBasePath(idx.utreexoState.config) @@ -182,6 +189,13 @@ func (idx *UtreexoProofIndex) CloseUtreexoState() error { return idx.utreexoState.closeDB() } +// FlushUtreexoStateIfNeeded flushes the utreexo state only if the cache is full. +func (idx *FlatUtreexoProofIndex) FlushUtreexoStateIfNeeded() { + if idx.utreexoState.isFlushNeeded() { + idx.FlushUtreexoState() + } +} + // FlushUtreexoState saves the utreexo state to disk. func (idx *FlatUtreexoProofIndex) FlushUtreexoState() error { basePath := utreexoBasePath(idx.utreexoState.config) diff --git a/blockchain/indexers/utreexoproofindex.go b/blockchain/indexers/utreexoproofindex.go index 27224899..955bebfa 100644 --- a/blockchain/indexers/utreexoproofindex.go +++ b/blockchain/indexers/utreexoproofindex.go @@ -299,6 +299,7 @@ func (idx *UtreexoProofIndex) ConnectBlock(dbTx database.Tx, block *btcutil.Bloc if err != nil { return err } + idx.FlushUtreexoStateIfNeeded() return nil } From b8aad8f9b777f642b9e6397ba098708c7276ca5e Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Wed, 10 Jul 2024 14:06:27 +0900 Subject: [PATCH 18/50] blockchain, indexers: add usage stats for the caches The added usage stats helps in monitoring how much each cache was utilized on flushes. --- blockchain/indexers/utreexobackend.go | 10 ++++++++++ blockchain/utreexoio.go | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index 65c39f94..1a4ec809 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -388,6 +388,16 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) log.Warnf("error while opening transaction. %v", err) } + nodesUsed, nodesCapacity := nodesDB.UsageStats() + log.Debugf("Utreexo index nodesDB cache usage: %d/%d (%v%%)\n", + nodesUsed, nodesCapacity, + float64(nodesUsed)/float64(nodesCapacity)) + + cachedLeavesUsed, cachedLeavesCapacity := cachedLeavesDB.UsageStats() + log.Debugf("Utreexo index cachedLeavesDB cache usage: %d/%d (%v%%)\n", + cachedLeavesUsed, cachedLeavesCapacity, + float64(cachedLeavesUsed)/float64(cachedLeavesCapacity)) + nodesDB.Flush(ldbTx) cachedLeavesDB.Flush(ldbTx) diff --git a/blockchain/utreexoio.go b/blockchain/utreexoio.go index 44f65061..1e67bf16 100644 --- a/blockchain/utreexoio.go +++ b/blockchain/utreexoio.go @@ -245,6 +245,11 @@ func (m *NodesBackEnd) IsFlushNeeded() bool { return m.cache.Overflowed() } +// UsageStats returns the currently cached elements and the total amount the cache can hold. +func (m *NodesBackEnd) UsageStats() (int64, int64) { + return int64(m.cache.Length()), m.maxCacheElem +} + // flush saves all the cached entries to disk and resets the cache map. func (m *NodesBackEnd) Flush(ldbTx *leveldb.Transaction) { m.cache.ForEach(func(k uint64, v utreexobackends.CachedLeaf) { @@ -408,6 +413,11 @@ func (m *CachedLeavesBackEnd) IsFlushNeeded() bool { return m.cache.Overflowed() } +// UsageStats returns the currently cached elements and the total amount the cache can hold. +func (m *CachedLeavesBackEnd) UsageStats() (int64, int64) { + return int64(m.cache.Length()), m.maxCacheElem +} + // Flush resets the cache and saves all the key values onto the database. func (m *CachedLeavesBackEnd) Flush(ldbTx *leveldb.Transaction) { m.cache.ForEach(func(k utreexo.Hash, v uint64) { From a83cce23c14519aebe8b01dac34d2c992a207d61 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Wed, 10 Jul 2024 14:28:38 +0900 Subject: [PATCH 19/50] indexers, main: require best block hash on utreexo index flushes The best hash will be written along with the numleaves of the utreexo accumulator so that it'll be used to mark that the accumulator state on disk is at least consistent to that block. --- blockchain/indexers/flatutreexoproofindex.go | 2 +- blockchain/indexers/manager.go | 8 ++++---- blockchain/indexers/utreexobackend.go | 20 ++++++++++---------- blockchain/indexers/utreexoproofindex.go | 2 +- server.go | 4 ++-- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/blockchain/indexers/flatutreexoproofindex.go b/blockchain/indexers/flatutreexoproofindex.go index 32601352..1dbcd1da 100644 --- a/blockchain/indexers/flatutreexoproofindex.go +++ b/blockchain/indexers/flatutreexoproofindex.go @@ -317,7 +317,7 @@ func (idx *FlatUtreexoProofIndex) ConnectBlock(dbTx database.Tx, block *btcutil. if err != nil { return err } - idx.FlushUtreexoStateIfNeeded() + idx.FlushUtreexoStateIfNeeded(block.Hash()) // Don't store proofs if the node is pruned. if idx.pruned { diff --git a/blockchain/indexers/manager.go b/blockchain/indexers/manager.go index 6166d147..3ea27758 100644 --- a/blockchain/indexers/manager.go +++ b/blockchain/indexers/manager.go @@ -472,12 +472,12 @@ func (m *Manager) Init(chain *blockchain.BlockChain, interrupt <-chan struct{}) for _, indexer := range m.enabledIndexes { switch idxType := indexer.(type) { case *UtreexoProofIndex: - err := idxType.CloseUtreexoState() + err := idxType.CloseUtreexoState(block.Hash()) if err != nil { log.Errorf("Error while flushing utreexo state: %v", err) } case *FlatUtreexoProofIndex: - err := idxType.CloseUtreexoState() + err := idxType.CloseUtreexoState(block.Hash()) if err != nil { log.Errorf("Error while flushing utreexo state for flat utreexo proof index: %v", err) } @@ -523,12 +523,12 @@ func (m *Manager) Init(chain *blockchain.BlockChain, interrupt <-chan struct{}) for _, indexer := range m.enabledIndexes { switch idxType := indexer.(type) { case *UtreexoProofIndex: - err := idxType.CloseUtreexoState() + err := idxType.CloseUtreexoState(block.Hash()) if err != nil { log.Errorf("Error while flushing utreexo state: %v", err) } case *FlatUtreexoProofIndex: - err := idxType.CloseUtreexoState() + err := idxType.CloseUtreexoState(block.Hash()) if err != nil { log.Errorf("Error while flushing utreexo state for flat utreexo proof index: %v", err) } diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index 1a4ec809..cb77d27e 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -155,14 +155,14 @@ func (idx *FlatUtreexoProofIndex) FetchUtreexoState(blockHeight int32) ([]*chain } // FlushUtreexoStateIfNeeded flushes the utreexo state only if the cache is full. -func (idx *UtreexoProofIndex) FlushUtreexoStateIfNeeded() { +func (idx *UtreexoProofIndex) FlushUtreexoStateIfNeeded(bestHash *chainhash.Hash) { if idx.utreexoState.isFlushNeeded() { - idx.FlushUtreexoState() + idx.FlushUtreexoState(bestHash) } } // FlushUtreexoState saves the utreexo state to disk. -func (idx *UtreexoProofIndex) FlushUtreexoState() error { +func (idx *UtreexoProofIndex) FlushUtreexoState(bestHash *chainhash.Hash) error { basePath := utreexoBasePath(idx.utreexoState.config) if _, err := os.Stat(basePath); err != nil { os.MkdirAll(basePath, os.ModePerm) @@ -184,20 +184,20 @@ func (idx *UtreexoProofIndex) FlushUtreexoState() error { } // CloseUtreexoState flushes and closes the utreexo database state. -func (idx *UtreexoProofIndex) CloseUtreexoState() error { - idx.FlushUtreexoState() +func (idx *UtreexoProofIndex) CloseUtreexoState(bestHash *chainhash.Hash) error { + idx.FlushUtreexoState(bestHash) return idx.utreexoState.closeDB() } // FlushUtreexoStateIfNeeded flushes the utreexo state only if the cache is full. -func (idx *FlatUtreexoProofIndex) FlushUtreexoStateIfNeeded() { +func (idx *FlatUtreexoProofIndex) FlushUtreexoStateIfNeeded(bestHash *chainhash.Hash) { if idx.utreexoState.isFlushNeeded() { - idx.FlushUtreexoState() + idx.FlushUtreexoState(bestHash) } } // FlushUtreexoState saves the utreexo state to disk. -func (idx *FlatUtreexoProofIndex) FlushUtreexoState() error { +func (idx *FlatUtreexoProofIndex) FlushUtreexoState(bestHash *chainhash.Hash) error { basePath := utreexoBasePath(idx.utreexoState.config) if _, err := os.Stat(basePath); err != nil { os.MkdirAll(basePath, os.ModePerm) @@ -219,8 +219,8 @@ func (idx *FlatUtreexoProofIndex) FlushUtreexoState() error { } // CloseUtreexoState flushes and closes the utreexo database state. -func (idx *FlatUtreexoProofIndex) CloseUtreexoState() error { - idx.FlushUtreexoState() +func (idx *FlatUtreexoProofIndex) CloseUtreexoState(bestHash *chainhash.Hash) error { + idx.FlushUtreexoState(bestHash) return idx.utreexoState.closeDB() } diff --git a/blockchain/indexers/utreexoproofindex.go b/blockchain/indexers/utreexoproofindex.go index 955bebfa..5ecc990f 100644 --- a/blockchain/indexers/utreexoproofindex.go +++ b/blockchain/indexers/utreexoproofindex.go @@ -299,7 +299,7 @@ func (idx *UtreexoProofIndex) ConnectBlock(dbTx database.Tx, block *btcutil.Bloc if err != nil { return err } - idx.FlushUtreexoStateIfNeeded() + idx.FlushUtreexoStateIfNeeded(block.Hash()) return nil } diff --git a/server.go b/server.go index cccdde27..b83a5b7e 100644 --- a/server.go +++ b/server.go @@ -2574,7 +2574,7 @@ out: // If utreexoProofIndex option is on, flush it after closing down syncManager. if s.utreexoProofIndex != nil { - err := s.utreexoProofIndex.CloseUtreexoState() + err := s.utreexoProofIndex.CloseUtreexoState(&s.chain.BestSnapshot().Hash) if err != nil { btcdLog.Errorf("Error while flushing utreexo state: %v", err) } @@ -2582,7 +2582,7 @@ out: // If flatUtreexoProofIndex option is on, flush it after closing down syncManager. if s.flatUtreexoProofIndex != nil { - err := s.flatUtreexoProofIndex.CloseUtreexoState() + err := s.flatUtreexoProofIndex.CloseUtreexoState(&s.chain.BestSnapshot().Hash) if err != nil { btcdLog.Errorf("Error while flushing utreexo state: %v", err) } From bd28aca74a4f77e47757cdc829cd0f755f22bde9 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Wed, 10 Jul 2024 15:43:48 +0900 Subject: [PATCH 20/50] indexers: return error on FlushUtreexoStateIfNeeded --- blockchain/indexers/flatutreexoproofindex.go | 5 ++++- blockchain/indexers/utreexobackend.go | 10 ++++++---- blockchain/indexers/utreexoproofindex.go | 5 ++++- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/blockchain/indexers/flatutreexoproofindex.go b/blockchain/indexers/flatutreexoproofindex.go index 1dbcd1da..e9f4f978 100644 --- a/blockchain/indexers/flatutreexoproofindex.go +++ b/blockchain/indexers/flatutreexoproofindex.go @@ -317,7 +317,10 @@ func (idx *FlatUtreexoProofIndex) ConnectBlock(dbTx database.Tx, block *btcutil. if err != nil { return err } - idx.FlushUtreexoStateIfNeeded(block.Hash()) + err = idx.FlushUtreexoStateIfNeeded(block.Hash()) + if err != nil { + log.Warnf("error while flushing the utreexo state. %v", err) + } // Don't store proofs if the node is pruned. if idx.pruned { diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index cb77d27e..cbaea40b 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -155,10 +155,11 @@ func (idx *FlatUtreexoProofIndex) FetchUtreexoState(blockHeight int32) ([]*chain } // FlushUtreexoStateIfNeeded flushes the utreexo state only if the cache is full. -func (idx *UtreexoProofIndex) FlushUtreexoStateIfNeeded(bestHash *chainhash.Hash) { +func (idx *UtreexoProofIndex) FlushUtreexoStateIfNeeded(bestHash *chainhash.Hash) error { if idx.utreexoState.isFlushNeeded() { - idx.FlushUtreexoState(bestHash) + return idx.FlushUtreexoState(bestHash) } + return nil } // FlushUtreexoState saves the utreexo state to disk. @@ -190,10 +191,11 @@ func (idx *UtreexoProofIndex) CloseUtreexoState(bestHash *chainhash.Hash) error } // FlushUtreexoStateIfNeeded flushes the utreexo state only if the cache is full. -func (idx *FlatUtreexoProofIndex) FlushUtreexoStateIfNeeded(bestHash *chainhash.Hash) { +func (idx *FlatUtreexoProofIndex) FlushUtreexoStateIfNeeded(bestHash *chainhash.Hash) error { if idx.utreexoState.isFlushNeeded() { - idx.FlushUtreexoState(bestHash) + return idx.FlushUtreexoState(bestHash) } + return nil } // FlushUtreexoState saves the utreexo state to disk. diff --git a/blockchain/indexers/utreexoproofindex.go b/blockchain/indexers/utreexoproofindex.go index 5ecc990f..678054d9 100644 --- a/blockchain/indexers/utreexoproofindex.go +++ b/blockchain/indexers/utreexoproofindex.go @@ -299,7 +299,10 @@ func (idx *UtreexoProofIndex) ConnectBlock(dbTx database.Tx, block *btcutil.Bloc if err != nil { return err } - idx.FlushUtreexoStateIfNeeded(block.Hash()) + err = idx.FlushUtreexoStateIfNeeded(block.Hash()) + if err != nil { + log.Warnf("error while flushing the utreexo state. %v", err) + } return nil } From de474901bd417df440403fbcc8289ef0a9c1e7dc Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Wed, 10 Jul 2024 16:02:59 +0900 Subject: [PATCH 21/50] indexers: log the returned error on CloseUtreexoState --- blockchain/indexers/utreexobackend.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index cbaea40b..ecf34dce 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -186,7 +186,10 @@ func (idx *UtreexoProofIndex) FlushUtreexoState(bestHash *chainhash.Hash) error // CloseUtreexoState flushes and closes the utreexo database state. func (idx *UtreexoProofIndex) CloseUtreexoState(bestHash *chainhash.Hash) error { - idx.FlushUtreexoState(bestHash) + err := idx.FlushUtreexoState(bestHash) + if err != nil { + log.Warnf("error whiling flushing the utreexo state. %v", err) + } return idx.utreexoState.closeDB() } @@ -222,7 +225,10 @@ func (idx *FlatUtreexoProofIndex) FlushUtreexoState(bestHash *chainhash.Hash) er // CloseUtreexoState flushes and closes the utreexo database state. func (idx *FlatUtreexoProofIndex) CloseUtreexoState(bestHash *chainhash.Hash) error { - idx.FlushUtreexoState(bestHash) + err := idx.FlushUtreexoState(bestHash) + if err != nil { + log.Warnf("error whiling flushing the utreexo state. %v", err) + } return idx.utreexoState.closeDB() } From 37c995f8901c08274bd1f1b61dc785307f87e86c Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Wed, 10 Jul 2024 16:07:42 +0900 Subject: [PATCH 22/50] indexers: expose utreexo state db Utreexo state db is exposed by including it in the UtreexoState during initialization. This allows for functions on UtreexoState to create leveldb transactions which allow for it to write to the database. --- blockchain/indexers/utreexobackend.go | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index ecf34dce..29250906 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -44,12 +44,12 @@ type UtreexoConfig struct { // UtreexoState is a wrapper around the raw accumulator with configuration // information. It contains the entire, non-pruned accumulator. type UtreexoState struct { - config *UtreexoConfig - state utreexo.Utreexo + config *UtreexoConfig + state utreexo.Utreexo + utreexoStateDB *leveldb.DB isFlushNeeded func() bool flush func() - closeDB func() error } // utreexoBasePath returns the base path of where the utreexo state should be @@ -190,7 +190,7 @@ func (idx *UtreexoProofIndex) CloseUtreexoState(bestHash *chainhash.Hash) error if err != nil { log.Warnf("error whiling flushing the utreexo state. %v", err) } - return idx.utreexoState.closeDB() + return idx.utreexoState.utreexoStateDB.Close() } // FlushUtreexoStateIfNeeded flushes the utreexo state only if the cache is full. @@ -229,7 +229,7 @@ func (idx *FlatUtreexoProofIndex) CloseUtreexoState(bestHash *chainhash.Hash) er if err != nil { log.Warnf("error whiling flushing the utreexo state. %v", err) } - return idx.utreexoState.closeDB() + return idx.utreexoState.utreexoStateDB.Close() } // serializeUndoBlock serializes all the data that's needed for undoing a full utreexo state @@ -383,7 +383,6 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) p.NumLeaves = binary.LittleEndian.Uint64(buf[:]) } - var closeDB func() error var flush func() var isFlushNeeded func() bool if maxMemoryUsage >= 0 { @@ -420,9 +419,6 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) leavesNeedsFlush := cachedLeavesDB.IsFlushNeeded() return nodesNeedsFlush && leavesNeedsFlush } - closeDB = func() error { - return db.Close() - } } else { log.Infof("loading the utreexo state from disk...") err = nodesDB.ForEach(func(k uint64, v utreexo.Leaf) error { @@ -477,17 +473,14 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) log.Infof("Finished flushing the utreexo state to disk.") } - closeDB = func() error { - return db.Close() - } } uState := &UtreexoState{ - config: cfg, - state: &p, - isFlushNeeded: isFlushNeeded, - flush: flush, - closeDB: closeDB, + config: cfg, + state: &p, + utreexoStateDB: db, + isFlushNeeded: isFlushNeeded, + flush: flush, } return uState, err From ff591e3411627a9991c7d19ae0beeae57b75998b Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Wed, 10 Jul 2024 17:06:50 +0900 Subject: [PATCH 23/50] blockchain, utreexobackends: return error on ForEach The functions that are passed into ForEach now are required to have errors returned and ForEach will return early if there's any errors. This is desirable as on flushes ForEach is called and thus can return early with an error on flushes. --- .../utreexobackends/cachedleavesmap.go | 14 ++++-- .../internal/utreexobackends/nodesmap.go | 14 ++++-- blockchain/utreexoio.go | 50 +++++++++++-------- 3 files changed, 52 insertions(+), 26 deletions(-) diff --git a/blockchain/internal/utreexobackends/cachedleavesmap.go b/blockchain/internal/utreexobackends/cachedleavesmap.go index 6641e8ec..a8064e1a 100644 --- a/blockchain/internal/utreexobackends/cachedleavesmap.go +++ b/blockchain/internal/utreexobackends/cachedleavesmap.go @@ -176,21 +176,29 @@ func (ms *CachedLeavesMapSlice) ClearMaps() { // ForEach loops through all the elements in the cachedleaves map slice and calls fn with the key-value pairs. // // This function is safe for concurrent access. -func (ms *CachedLeavesMapSlice) ForEach(fn func(utreexo.Hash, uint64)) { +func (ms *CachedLeavesMapSlice) ForEach(fn func(utreexo.Hash, uint64) error) error { ms.mtx.Lock() defer ms.mtx.Unlock() for _, m := range ms.maps { for k, v := range m { - fn(k, v) + err := fn(k, v) + if err != nil { + return err + } } } if len(ms.overflow) > 0 { for k, v := range ms.overflow { - fn(k, v) + err := fn(k, v) + if err != nil { + return err + } } } + + return nil } // createMaps creates a slice of maps and returns the total count that the maps diff --git a/blockchain/internal/utreexobackends/nodesmap.go b/blockchain/internal/utreexobackends/nodesmap.go index 461970dc..a5f16bda 100644 --- a/blockchain/internal/utreexobackends/nodesmap.go +++ b/blockchain/internal/utreexobackends/nodesmap.go @@ -213,21 +213,29 @@ func (ms *NodesMapSlice) ClearMaps() { // ForEach loops through all the elements in the nodes map slice and calls fn with the key-value pairs. // // This function is safe for concurrent access. -func (ms *NodesMapSlice) ForEach(fn func(uint64, CachedLeaf)) { +func (ms *NodesMapSlice) ForEach(fn func(uint64, CachedLeaf) error) error { ms.mtx.Lock() defer ms.mtx.Unlock() for _, m := range ms.maps { for k, v := range m { - fn(k, v) + err := fn(k, v) + if err != nil { + return err + } } } if len(ms.overflow) > 0 { for k, v := range ms.overflow { - fn(k, v) + err := fn(k, v) + if err != nil { + return err + } } } + + return nil } // createMaps creates a slice of maps and returns the total count that the maps diff --git a/blockchain/utreexoio.go b/blockchain/utreexoio.go index 1e67bf16..68e7eb67 100644 --- a/blockchain/utreexoio.go +++ b/blockchain/utreexoio.go @@ -170,12 +170,14 @@ func (m *NodesBackEnd) Delete(k uint64) { // Length returns the amount of items in the underlying database. func (m *NodesBackEnd) Length() int { length := 0 - m.cache.ForEach(func(u uint64, cl utreexobackends.CachedLeaf) { + m.cache.ForEach(func(u uint64, cl utreexobackends.CachedLeaf) error { // Only count the entry if it's not removed and it's not already // in the database. if !cl.IsRemoved() && cl.IsFresh() { length++ } + + return nil }) iter := m.db.NewIterator(nil, nil) @@ -200,12 +202,14 @@ func (m *NodesBackEnd) Length() int { // ForEach calls the given function for each of the elements in the underlying map. func (m *NodesBackEnd) ForEach(fn func(uint64, utreexo.Leaf) error) error { - m.cache.ForEach(func(u uint64, cl utreexobackends.CachedLeaf) { + m.cache.ForEach(func(u uint64, cl utreexobackends.CachedLeaf) error { // Only operate on the entry if it's not removed and it's not already // in the database. if !cl.IsRemoved() && cl.IsFresh() { fn(u, cl.Leaf) } + + return nil }) iter := m.db.NewIterator(nil, nil) @@ -251,26 +255,28 @@ func (m *NodesBackEnd) UsageStats() (int64, int64) { } // flush saves all the cached entries to disk and resets the cache map. -func (m *NodesBackEnd) Flush(ldbTx *leveldb.Transaction) { - m.cache.ForEach(func(k uint64, v utreexobackends.CachedLeaf) { +func (m *NodesBackEnd) Flush(ldbTx *leveldb.Transaction) error { + err := m.cache.ForEach(func(k uint64, v utreexobackends.CachedLeaf) error { if v.IsRemoved() { err := NodesBackendDelete(ldbTx, k) if err != nil { - ldbTx.Discard() - log.Warnf("NodesBackEnd flush error. %v", err) - return + return err } } else if v.IsFresh() || v.IsModified() { err := NodesBackendPut(ldbTx, k, v.Leaf) if err != nil { - ldbTx.Discard() - log.Warnf("NodesBackEnd flush error. %v", err) - return + return err } } + + return nil }) + if err != nil { + return fmt.Errorf("NodesBackEnd flush error. %v", err) + } m.cache.ClearMaps() + return nil } var _ utreexo.CachedLeavesInterface = (*CachedLeavesBackEnd)(nil) @@ -339,7 +345,7 @@ func (m *CachedLeavesBackEnd) Delete(k utreexo.Hash) { // Length returns the amount of items in the underlying db and the cache. func (m *CachedLeavesBackEnd) Length() int { length := 0 - m.cache.ForEach(func(k utreexo.Hash, v uint64) { + m.cache.ForEach(func(k utreexo.Hash, v uint64) error { // Only operate on the entry if it's not removed and it's not already // in the database. if v != math.MaxUint64 { @@ -348,6 +354,7 @@ func (m *CachedLeavesBackEnd) Length() int { length++ } } + return nil }) iter := m.db.NewIterator(nil, nil) for iter.Next() { @@ -372,7 +379,7 @@ func (m *CachedLeavesBackEnd) Length() int { // ForEach calls the given function for each of the elements in the underlying map. func (m *CachedLeavesBackEnd) ForEach(fn func(utreexo.Hash, uint64) error) error { - m.cache.ForEach(func(k utreexo.Hash, v uint64) { + m.cache.ForEach(func(k utreexo.Hash, v uint64) error { // Only operate on the entry if it's not removed and it's not already // in the database. if v != math.MaxUint64 { @@ -381,6 +388,7 @@ func (m *CachedLeavesBackEnd) ForEach(fn func(utreexo.Hash, uint64) error) error fn(k, v) } } + return nil }) iter := m.db.NewIterator(nil, nil) for iter.Next() { @@ -419,24 +427,26 @@ func (m *CachedLeavesBackEnd) UsageStats() (int64, int64) { } // Flush resets the cache and saves all the key values onto the database. -func (m *CachedLeavesBackEnd) Flush(ldbTx *leveldb.Transaction) { - m.cache.ForEach(func(k utreexo.Hash, v uint64) { +func (m *CachedLeavesBackEnd) Flush(ldbTx *leveldb.Transaction) error { + err := m.cache.ForEach(func(k utreexo.Hash, v uint64) error { if v == math.MaxUint64 { err := ldbTx.Delete(k[:], nil) if err != nil { - ldbTx.Discard() - log.Warnf("CachedLeavesBackEnd delete fail. %v", err) - return + return err } } else { err := CachedLeavesBackendPut(ldbTx, k, v) if err != nil { - ldbTx.Discard() - log.Warnf("CachedLeavesBackEnd put fail. %v", err) - return + return err } } + + return nil }) + if err != nil { + return fmt.Errorf("CachedLeavesBackEnd flush error. %v", err) + } m.cache.ClearMaps() + return nil } From 12ecdf27a1b19d1ecf137b75b87bab877c701c88 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Wed, 10 Jul 2024 17:23:06 +0900 Subject: [PATCH 24/50] indexers: add isFlushNeeded for in memory utreexo index It'll never really be called but not having it risk runtime panics. Just adding it in case it'll be called with later code changes. --- blockchain/indexers/utreexobackend.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index 29250906..21e4d83e 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -473,6 +473,11 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) log.Infof("Finished flushing the utreexo state to disk.") } + + // Flush is never needed since we're keeping everything in memory. + isFlushNeeded = func() bool { + return false + } } uState := &UtreexoState{ From da885d55ebc534568eef3ff2c94c5c9776aa6c41 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Wed, 10 Jul 2024 17:37:54 +0900 Subject: [PATCH 25/50] indexers: ask for leveldb.Transaction on flush Flushes now return errors and ask for leveldb.Transaction from the caller. This is so that the caller can write the best block hash and the utreexo state numleaves on flushes to mark that the state is consistent up until that point. --- blockchain/indexers/utreexobackend.go | 82 ++++++++++++++++----------- 1 file changed, 50 insertions(+), 32 deletions(-) diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index 21e4d83e..5219d60e 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -49,7 +49,7 @@ type UtreexoState struct { utreexoStateDB *leveldb.DB isFlushNeeded func() bool - flush func() + flush func(ldbTx *leveldb.Transaction) error } // utreexoBasePath returns the base path of where the utreexo state should be @@ -180,7 +180,23 @@ func (idx *UtreexoProofIndex) FlushUtreexoState(bestHash *chainhash.Hash) error return err } - idx.utreexoState.flush() + ldbTx, err := idx.utreexoState.utreexoStateDB.OpenTransaction() + if err != nil { + return err + } + + err = idx.utreexoState.flush(ldbTx) + if err != nil { + ldbTx.Discard() + return err + } + + err = ldbTx.Commit() + if err != nil { + ldbTx.Discard() + return err + } + return nil } @@ -219,7 +235,23 @@ func (idx *FlatUtreexoProofIndex) FlushUtreexoState(bestHash *chainhash.Hash) er return err } - idx.utreexoState.flush() + ldbTx, err := idx.utreexoState.utreexoStateDB.OpenTransaction() + if err != nil { + return err + } + + err = idx.utreexoState.flush(ldbTx) + if err != nil { + ldbTx.Discard() + return err + } + + err = ldbTx.Commit() + if err != nil { + ldbTx.Discard() + return err + } + return nil } @@ -383,17 +415,13 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) p.NumLeaves = binary.LittleEndian.Uint64(buf[:]) } - var flush func() + var flush func(ldbTx *leveldb.Transaction) error var isFlushNeeded func() bool if maxMemoryUsage >= 0 { p.Nodes = nodesDB p.CachedLeaves = cachedLeavesDB - flush = func() { + flush = func(ldbTx *leveldb.Transaction) error { log.Infof("Flushing the utreexo state to disk...") - ldbTx, err := db.OpenTransaction() - if err != nil { - log.Warnf("error while opening transaction. %v", err) - } nodesUsed, nodesCapacity := nodesDB.UsageStats() log.Debugf("Utreexo index nodesDB cache usage: %d/%d (%v%%)\n", @@ -405,14 +433,18 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) cachedLeavesUsed, cachedLeavesCapacity, float64(cachedLeavesUsed)/float64(cachedLeavesCapacity)) - nodesDB.Flush(ldbTx) - cachedLeavesDB.Flush(ldbTx) - - err = ldbTx.Commit() + err = nodesDB.Flush(ldbTx) + if err != nil { + return err + } + err = cachedLeavesDB.Flush(ldbTx) if err != nil { - log.Warnf("error while committing transaction. %v", err) + return err } + log.Infof("Finished flushing the utreexo state to disk.") + + return nil } isFlushNeeded = func() bool { nodesNeedsFlush := nodesDB.IsFlushNeeded() @@ -439,39 +471,25 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) log.Infof("Finished loading the utreexo state from disk.") - flush = func() { + flush = func(ldbTx *leveldb.Transaction) error { log.Infof("Flushing the utreexo state to disk. May take a while...") - ldbTx, err := db.OpenTransaction() - if err != nil { - log.Warnf("flush error, failed to open leveldb tx. %v", err) - return - } err = p.Nodes.ForEach(func(k uint64, v utreexo.Leaf) error { return blockchain.NodesBackendPut(ldbTx, k, v) }) if err != nil { - ldbTx.Discard() - log.Warnf("flush error. %v", err) - return + return err } err = p.CachedLeaves.ForEach(func(k utreexo.Hash, v uint64) error { return blockchain.CachedLeavesBackendPut(ldbTx, k, v) }) if err != nil { - ldbTx.Discard() - log.Warnf("flush error. %v", err) - return - } - - err = ldbTx.Commit() - if err != nil { - log.Warnf("flush error, failed to commit leveldb tx. %v", err) - return + return err } log.Infof("Finished flushing the utreexo state to disk.") + return nil } // Flush is never needed since we're keeping everything in memory. From 90d52e0f27a9e610ec63ed48a6666c65dd9f112c Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Thu, 11 Jul 2024 15:36:42 +0900 Subject: [PATCH 26/50] indexers: write consistency state on flushes When the utreexo state is flushed to disk, the best block hash and the numleaves are also written. This is all done in one leveldb transaction and so it's guaranteed to be atomic. The best block hash that's written allows for callers to check if the utreexo state is consistent or not with the blockindex. --- blockchain/indexers/utreexobackend.go | 101 ++++++++++++--------- blockchain/indexers/utreexobackend_test.go | 55 +++++++++++ 2 files changed, 111 insertions(+), 45 deletions(-) create mode 100644 blockchain/indexers/utreexobackend_test.go diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index 5219d60e..d1d9790c 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -25,6 +25,12 @@ const ( defaultUtreexoFileName = "forest.dat" ) +var ( + // utreexoStateConsistencyKeyName is name of the db key used to store the consistency + // state for the utreexo accumulator state. + utreexoStateConsistencyKeyName = []byte("utreexostateconsistency") +) + // UtreexoConfig is a descriptor which specifies the Utreexo state instance configuration. type UtreexoConfig struct { // DataDir is the base path of where all the data for this node will be stored. @@ -92,6 +98,36 @@ func checkUtreexoExists(cfg *UtreexoConfig, basePath string) bool { return true } +// dbWriteUtreexoStateConsistency writes the consistency state to the database using the given transaction. +func dbWriteUtreexoStateConsistency(ldbTx *leveldb.Transaction, bestHash *chainhash.Hash, numLeaves uint64) error { + // Create the byte slice to be written. + var buf [8 + chainhash.HashSize]byte + binary.LittleEndian.PutUint64(buf[:8], numLeaves) + copy(buf[8:], bestHash[:]) + + return ldbTx.Put(utreexoStateConsistencyKeyName, buf[:], nil) +} + +// dbFetchUtreexoStateConsistency returns the stored besthash and the numleaves in the database. +func dbFetchUtreexoStateConsistency(db *leveldb.DB) (*chainhash.Hash, uint64, error) { + buf, err := db.Get(utreexoStateConsistencyKeyName, nil) + if err != nil && err != leveldb.ErrNotFound { + return nil, 0, err + } + // Set error to nil as the error may have been ErrNotFound. + err = nil + if buf == nil { + return nil, 0, nil + } + + bestHash, err := chainhash.NewHash(buf[8:]) + if err != nil { + return nil, 0, err + } + + return bestHash, binary.LittleEndian.Uint64(buf[:8]), nil +} + // FetchCurrentUtreexoState returns the current utreexo state. func (idx *UtreexoProofIndex) FetchCurrentUtreexoState() ([]*chainhash.Hash, uint64) { idx.mtx.RLock() @@ -164,23 +200,18 @@ func (idx *UtreexoProofIndex) FlushUtreexoStateIfNeeded(bestHash *chainhash.Hash // FlushUtreexoState saves the utreexo state to disk. func (idx *UtreexoProofIndex) FlushUtreexoState(bestHash *chainhash.Hash) error { - basePath := utreexoBasePath(idx.utreexoState.config) - if _, err := os.Stat(basePath); err != nil { - os.MkdirAll(basePath, os.ModePerm) - } - forestFilePath := filepath.Join(basePath, defaultUtreexoFileName) - forestFile, err := os.OpenFile(forestFilePath, os.O_RDWR|os.O_CREATE, 0666) - if err != nil { - return err - } - var buf [8]byte - binary.LittleEndian.PutUint64(buf[:], idx.utreexoState.state.GetNumLeaves()) - _, err = forestFile.Write(buf[:]) + idx.mtx.Lock() + defer idx.mtx.Unlock() + + log.Infof("Flushing the utreexo state to disk...") + + ldbTx, err := idx.utreexoState.utreexoStateDB.OpenTransaction() if err != nil { return err } - ldbTx, err := idx.utreexoState.utreexoStateDB.OpenTransaction() + // Write the best block hash and the numleaves for the utreexo state. + err = dbWriteUtreexoStateConsistency(ldbTx, bestHash, idx.utreexoState.state.GetNumLeaves()) if err != nil { return err } @@ -197,6 +228,8 @@ func (idx *UtreexoProofIndex) FlushUtreexoState(bestHash *chainhash.Hash) error return err } + log.Infof("Finished flushing the utreexo state to disk.") + return nil } @@ -219,23 +252,16 @@ func (idx *FlatUtreexoProofIndex) FlushUtreexoStateIfNeeded(bestHash *chainhash. // FlushUtreexoState saves the utreexo state to disk. func (idx *FlatUtreexoProofIndex) FlushUtreexoState(bestHash *chainhash.Hash) error { - basePath := utreexoBasePath(idx.utreexoState.config) - if _, err := os.Stat(basePath); err != nil { - os.MkdirAll(basePath, os.ModePerm) - } - forestFilePath := filepath.Join(basePath, defaultUtreexoFileName) - forestFile, err := os.OpenFile(forestFilePath, os.O_RDWR|os.O_CREATE, 0666) - if err != nil { - return err - } - var buf [8]byte - binary.LittleEndian.PutUint64(buf[:], idx.utreexoState.state.GetNumLeaves()) - _, err = forestFile.Write(buf[:]) + idx.mtx.Lock() + defer idx.mtx.Unlock() + + ldbTx, err := idx.utreexoState.utreexoStateDB.OpenTransaction() if err != nil { return err } - ldbTx, err := idx.utreexoState.utreexoStateDB.OpenTransaction() + // Write the best block hash and the numleaves for the utreexo state. + err = dbWriteUtreexoStateConsistency(ldbTx, bestHash, idx.utreexoState.state.GetNumLeaves()) if err != nil { return err } @@ -401,19 +427,11 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) return nil, err } - if checkUtreexoExists(cfg, basePath) { - forestFilePath := filepath.Join(basePath, defaultUtreexoFileName) - file, err := os.OpenFile(forestFilePath, os.O_RDWR, 0400) - if err != nil { - return nil, err - } - var buf [8]byte - _, err = file.Read(buf[:]) - if err != nil { - return nil, err - } - p.NumLeaves = binary.LittleEndian.Uint64(buf[:]) + _, numLeaves, err := dbFetchUtreexoStateConsistency(db) + if err != nil { + return nil, err } + p.NumLeaves = numLeaves var flush func(ldbTx *leveldb.Transaction) error var isFlushNeeded func() bool @@ -421,8 +439,6 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) p.Nodes = nodesDB p.CachedLeaves = cachedLeavesDB flush = func(ldbTx *leveldb.Transaction) error { - log.Infof("Flushing the utreexo state to disk...") - nodesUsed, nodesCapacity := nodesDB.UsageStats() log.Debugf("Utreexo index nodesDB cache usage: %d/%d (%v%%)\n", nodesUsed, nodesCapacity, @@ -442,8 +458,6 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) return err } - log.Infof("Finished flushing the utreexo state to disk.") - return nil } isFlushNeeded = func() bool { @@ -472,8 +486,6 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) log.Infof("Finished loading the utreexo state from disk.") flush = func(ldbTx *leveldb.Transaction) error { - log.Infof("Flushing the utreexo state to disk. May take a while...") - err = p.Nodes.ForEach(func(k uint64, v utreexo.Leaf) error { return blockchain.NodesBackendPut(ldbTx, k, v) }) @@ -488,7 +500,6 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) return err } - log.Infof("Finished flushing the utreexo state to disk.") return nil } diff --git a/blockchain/indexers/utreexobackend_test.go b/blockchain/indexers/utreexobackend_test.go new file mode 100644 index 00000000..16f2130a --- /dev/null +++ b/blockchain/indexers/utreexobackend_test.go @@ -0,0 +1,55 @@ +// Copyright (c) 2024 The utreexo developers +// Use of this source code is governed by an ISC +// license that can be found in the LICENSE file. + +package indexers + +import ( + "math/rand" + "os" + "testing" + + "github.com/syndtr/goleveldb/leveldb" + "github.com/utreexo/utreexod/chaincfg" +) + +func TestUtreexoStateConsistencyWrite(t *testing.T) { + dbPath := t.TempDir() + db, err := leveldb.OpenFile(dbPath, nil) + if err != nil { + t.Fatal(err) + } + defer func() { os.RemoveAll(dbPath) }() + + // Values to write. + numLeaves := rand.Uint64() + hash := chaincfg.MainNetParams.GenesisHash + + // Write the consistency state. + ldbTx, err := db.OpenTransaction() + if err != nil { + t.Fatal(err) + } + err = dbWriteUtreexoStateConsistency(ldbTx, hash, numLeaves) + if err != nil { + t.Fatal(err) + } + err = ldbTx.Commit() + if err != nil { + t.Fatal(err) + } + + // Fetch the consistency state. + gotHash, gotNumLeaves, err := dbFetchUtreexoStateConsistency(db) + if err != nil { + t.Fatal(err) + } + + // Compare. + if *hash != *gotHash { + t.Fatalf("expected %v, got %v", hash.String(), gotHash.String()) + } + if numLeaves != gotNumLeaves { + t.Fatalf("expected %v, got %v", numLeaves, gotNumLeaves) + } +} From 8897ad4cc8c6d414130f7fbe09e36fd7c309e52e Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Thu, 11 Jul 2024 16:09:37 +0900 Subject: [PATCH 27/50] indexers: refactor utreexo config Pruned and max memory is moved to the utreexo config and init functions now take in just the utreexo config as all the needed information is stored in the config. --- blockchain/indexers/flatutreexoproofindex.go | 44 ++++++++++---------- blockchain/indexers/indexers_test.go | 8 ++-- blockchain/indexers/utreexobackend.go | 44 ++++++++++---------- blockchain/indexers/utreexoproofindex.go | 44 ++++++++++---------- 4 files changed, 68 insertions(+), 72 deletions(-) diff --git a/blockchain/indexers/flatutreexoproofindex.go b/blockchain/indexers/flatutreexoproofindex.go index e9f4f978..cc47b169 100644 --- a/blockchain/indexers/flatutreexoproofindex.go +++ b/blockchain/indexers/flatutreexoproofindex.go @@ -87,11 +87,9 @@ type FlatUtreexoProofIndex struct { rememberIdxState FlatFileState proofStatsState FlatFileState rootsState FlatFileState - chainParams *chaincfg.Params - dataDir string - // True if the node is pruned. - pruned bool + // All the configurable metadata. + config *UtreexoConfig // The blockchain instance the index corresponds to. chain *blockchain.BlockChain @@ -124,18 +122,18 @@ func (idx *FlatUtreexoProofIndex) Init(chain *blockchain.BlockChain) error { // // If the node is pruned, then we need to check if it started off as // a pruned node or if the user switch to being a pruned node. - if !idx.pruned { + if !idx.config.Pruned { return nil } - proofPath := flatFilePath(idx.dataDir, flatUtreexoProofName) + proofPath := flatFilePath(idx.config.DataDir, flatUtreexoProofName) _, err := os.Stat(proofPath) if err != nil { // If the error isn't nil, that means the proofpath // doesn't exist. return nil } - proofState, err := loadFlatFileState(idx.dataDir, flatUtreexoProofName) + proofState, err := loadFlatFileState(idx.config.DataDir, flatUtreexoProofName) if err != nil { return err } @@ -212,7 +210,7 @@ func (idx *FlatUtreexoProofIndex) Init(chain *blockchain.BlockChain) error { } // Delete proof stat file since it's not relevant to a pruned bridge node. - proofStatPath := flatFilePath(idx.dataDir, flatUtreexoProofStatsName) + proofStatPath := flatFilePath(idx.config.DataDir, flatUtreexoProofStatsName) err = deleteFlatFile(proofStatPath) if err != nil { return err @@ -287,7 +285,7 @@ func (idx *FlatUtreexoProofIndex) ConnectBlock(dbTx database.Tx, block *btcutil. // undo block in order to undo a block on reorgs. If we have all the // proofs block by block, that data can be used for reorgs but these // two modes will not have the proofs available. - if idx.pruned || idx.proofGenInterVal != 1 { + if idx.config.Pruned || idx.proofGenInterVal != 1 { err = idx.storeUndoBlock(block.Height(), uint64(len(adds)), ud.AccProof.Targets, delHashes) if err != nil { @@ -323,7 +321,7 @@ func (idx *FlatUtreexoProofIndex) ConnectBlock(dbTx database.Tx, block *btcutil. } // Don't store proofs if the node is pruned. - if idx.pruned { + if idx.config.Pruned { return nil } @@ -719,7 +717,7 @@ func (idx *FlatUtreexoProofIndex) getUndoData(block *btcutil.Block) (uint64, []u delHashes []utreexo.Hash ) - if !idx.pruned || idx.proofGenInterVal != 1 { + if !idx.config.Pruned || idx.proofGenInterVal != 1 { ud, err := idx.FetchUtreexoProof(block.Height(), false) if err != nil { return 0, nil, nil, err @@ -774,7 +772,7 @@ func (idx *FlatUtreexoProofIndex) DisconnectBlock(dbTx database.Tx, block *btcut // Check if we're at a height where proof was generated. Only check if we're not // pruned as we don't keep the historical proofs as a pruned node. - if (block.Height()%idx.proofGenInterVal) == 0 && !idx.pruned { + if (block.Height()%idx.proofGenInterVal) == 0 && !idx.config.Pruned { height := block.Height() / idx.proofGenInterVal err = idx.proofState.DisconnectBlock(height) if err != nil { @@ -811,7 +809,7 @@ func (idx *FlatUtreexoProofIndex) FetchUtreexoProof(height int32, excludeAccProo return nil, fmt.Errorf("No Utreexo Proof for height %d", height) } - if idx.pruned { + if idx.config.Pruned { return nil, fmt.Errorf("Cannot fetch historical proof as the node is pruned") } @@ -900,7 +898,7 @@ func (idx *FlatUtreexoProofIndex) FetchMultiUtreexoProof(height int32) ( return nil, nil, nil, fmt.Errorf("No Utreexo Proof for height %d", height) } - if idx.pruned { + if idx.config.Pruned { return nil, nil, nil, fmt.Errorf("Cannot fetch historical proof as the node is pruned") } @@ -1269,25 +1267,25 @@ func NewFlatUtreexoProofIndex(pruned bool, chainParams *chaincfg.Params, idx := &FlatUtreexoProofIndex{ proofGenInterVal: intervalToUse, - chainParams: chainParams, mtx: new(sync.RWMutex), - dataDir: dataDir, + config: &UtreexoConfig{ + MaxMemoryUsage: maxMemoryUsage, + Params: chainParams, + Pruned: pruned, + DataDir: dataDir, + Name: flatUtreexoProofIndexType, + }, } // Init Utreexo State. - uState, err := InitUtreexoState(&UtreexoConfig{ - DataDir: dataDir, - Name: flatUtreexoProofIndexType, - Params: chainParams, - }, maxMemoryUsage) + uState, err := InitUtreexoState(idx.config) if err != nil { return nil, err } idx.utreexoState = uState - idx.pruned = pruned // Init the utreexo proof state if the node isn't pruned. - if !idx.pruned { + if !idx.config.Pruned { proofState, err := loadFlatFileState(dataDir, flatUtreexoProofName) if err != nil { return nil, err diff --git a/blockchain/indexers/indexers_test.go b/blockchain/indexers/indexers_test.go index a9695294..0a127ce4 100644 --- a/blockchain/indexers/indexers_test.go +++ b/blockchain/indexers/indexers_test.go @@ -203,7 +203,7 @@ func compareUtreexoIdx(start, end int32, pruned bool, chain *blockchain.BlockCha return err } - if !idxType.pruned { + if !idxType.config.Pruned { utreexoUD, err = idxType.FetchUtreexoProof(block.Hash()) if err != nil { return err @@ -222,7 +222,7 @@ func compareUtreexoIdx(start, end int32, pruned bool, chain *blockchain.BlockCha case *FlatUtreexoProofIndex: var err error - if !idxType.pruned { + if !idxType.config.Pruned { flatUD, err = idxType.FetchUtreexoProof(b, false) if err != nil { return err @@ -1033,7 +1033,7 @@ func TestBridgeNodePruneUndoDataGen(t *testing.T) { t.Fatal(err) } } - idxType.pruned = true + idxType.config.Pruned = true case *UtreexoProofIndex: for height := int32(1); height <= maxHeight; height++ { @@ -1047,7 +1047,7 @@ func TestBridgeNodePruneUndoDataGen(t *testing.T) { t.Fatal(err) } } - idxType.pruned = true + idxType.config.Pruned = true } } diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index d1d9790c..145bc1b1 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -33,6 +33,16 @@ var ( // UtreexoConfig is a descriptor which specifies the Utreexo state instance configuration. type UtreexoConfig struct { + // MaxMemoryUsage is the desired memory usage for the utreexo state cache. + MaxMemoryUsage int64 + + // Params are the Bitcoin network parameters. This is used to separately store + // different accumulators. + Params *chaincfg.Params + + // If the node is a pruned node or not. + Pruned bool + // DataDir is the base path of where all the data for this node will be stored. // Utreexo has custom storage method and that data will be stored under this // directory. @@ -41,10 +51,6 @@ type UtreexoConfig struct { // Name is what the type of utreexo proof indexer this utreexo state is related // to. Name string - - // Params are the Bitcoin network parameters. This is used to separately store - // different accumulators. - Params *chaincfg.Params } // UtreexoState is a wrapper around the raw accumulator with configuration @@ -64,17 +70,6 @@ func utreexoBasePath(cfg *UtreexoConfig) string { return filepath.Join(cfg.DataDir, utreexoDirName+"_"+cfg.Name) } -// InitUtreexoState returns an initialized utreexo state. If there isn't an -// existing state on disk, it creates one and returns it. -// maxMemoryUsage of 0 will keep every element on disk. A negaive maxMemoryUsage will -// load every element to the memory. -func InitUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64) (*UtreexoState, error) { - basePath := utreexoBasePath(cfg) - log.Infof("Initializing Utreexo state from '%s'", basePath) - defer log.Info("Utreexo state loaded") - return initUtreexoState(cfg, maxMemoryUsage, basePath) -} - // deleteUtreexoState removes the utreexo state directory and all the contents // in it. func deleteUtreexoState(path string) error { @@ -404,15 +399,20 @@ func deserializeUndoBlock(serialized []byte) (uint64, []uint64, []utreexo.Hash, return numAdds, targets, delHashes, nil } -// initUtreexoState creates a new utreexo state and returns it. maxMemoryUsage of 0 will keep -// every element on disk and a negative maxMemoryUsage will load all the elemnts to memory. -func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) (*UtreexoState, error) { +// InitUtreexoState returns an initialized utreexo state. If there isn't an +// existing state on disk, it creates one and returns it. +// maxMemoryUsage of 0 will keep every element on disk. A negaive maxMemoryUsage will +// load every element to the memory. +func InitUtreexoState(cfg *UtreexoConfig) (*UtreexoState, error) { + log.Infof("Initializing Utreexo state from '%s'", utreexoBasePath(cfg)) + defer log.Info("Utreexo state loaded") + p := utreexo.NewMapPollard(true) - maxNodesMem := maxMemoryUsage * 7 / 10 - maxCachedLeavesMem := maxMemoryUsage - maxNodesMem + maxNodesMem := cfg.MaxMemoryUsage * 7 / 10 + maxCachedLeavesMem := cfg.MaxMemoryUsage - maxNodesMem - db, err := leveldb.OpenFile(basePath, nil) + db, err := leveldb.OpenFile(utreexoBasePath(cfg), nil) if err != nil { return nil, err } @@ -435,7 +435,7 @@ func initUtreexoState(cfg *UtreexoConfig, maxMemoryUsage int64, basePath string) var flush func(ldbTx *leveldb.Transaction) error var isFlushNeeded func() bool - if maxMemoryUsage >= 0 { + if cfg.MaxMemoryUsage >= 0 { p.Nodes = nodesDB p.CachedLeaves = cachedLeavesDB flush = func(ldbTx *leveldb.Transaction) error { diff --git a/blockchain/indexers/utreexoproofindex.go b/blockchain/indexers/utreexoproofindex.go index 678054d9..8cb20910 100644 --- a/blockchain/indexers/utreexoproofindex.go +++ b/blockchain/indexers/utreexoproofindex.go @@ -52,11 +52,8 @@ var _ NeedsInputser = (*UtreexoProofIndex)(nil) // UtreexoProofIndex implements a utreexo accumulator proof index for all the blocks. type UtreexoProofIndex struct { - db database.DB - chainParams *chaincfg.Params - - // If the node is a pruned node or not. - pruned bool + db database.DB + config *UtreexoConfig // The blockchain instance the index corresponds to. chain *blockchain.BlockChain @@ -83,7 +80,7 @@ func (idx *UtreexoProofIndex) Init(chain *blockchain.BlockChain) error { idx.chain = chain // Nothing else to do if the node is an archive node. - if !idx.pruned { + if !idx.config.Pruned { return nil } @@ -226,7 +223,7 @@ func (idx *UtreexoProofIndex) Create(dbTx database.Tx) error { } // Only create the undo bucket if the node is pruned. - if idx.pruned { + if idx.config.Pruned { _, err = utreexoParentBucket.CreateBucket(utreexoUndoKey) if err != nil { return err @@ -267,7 +264,7 @@ func (idx *UtreexoProofIndex) ConnectBlock(dbTx database.Tx, block *btcutil.Bloc } // Only store the proofs if the node is not pruned. - if !idx.pruned { + if !idx.config.Pruned { err = dbStoreUtreexoProof(dbTx, block.Hash(), ud) if err != nil { return err @@ -285,7 +282,7 @@ func (idx *UtreexoProofIndex) ConnectBlock(dbTx database.Tx, block *btcutil.Bloc } // For pruned nodes, the undo data is necessary for reorgs. - if idx.pruned { + if idx.config.Pruned { err = dbStoreUndoData(dbTx, uint64(len(adds)), ud.AccProof.Targets, block.Hash(), delHashes) if err != nil { @@ -316,7 +313,7 @@ func (idx *UtreexoProofIndex) getUndoData(dbTx database.Tx, block *btcutil.Block delHashes []utreexo.Hash ) - if !idx.pruned { + if !idx.config.Pruned { ud, err := idx.FetchUtreexoProof(block.Hash()) if err != nil { return 0, nil, nil, err @@ -374,7 +371,7 @@ func (idx *UtreexoProofIndex) DisconnectBlock(dbTx database.Tx, block *btcutil.B return err } - if idx.pruned { + if idx.config.Pruned { err = dbDeleteUndoData(dbTx, block.Hash()) if err != nil { return err @@ -391,7 +388,7 @@ func (idx *UtreexoProofIndex) DisconnectBlock(dbTx database.Tx, block *btcutil.B // FetchUtreexoProof returns the Utreexo proof data for the given block hash. func (idx *UtreexoProofIndex) FetchUtreexoProof(hash *chainhash.Hash) (*wire.UData, error) { - if idx.pruned { + if idx.config.Pruned { return nil, fmt.Errorf("Cannot fetch historical proof as the node is pruned") } @@ -577,21 +574,22 @@ func NewUtreexoProofIndex(db database.DB, pruned bool, maxMemoryUsage int64, chainParams *chaincfg.Params, dataDir string) (*UtreexoProofIndex, error) { idx := &UtreexoProofIndex{ - db: db, - chainParams: chainParams, - mtx: new(sync.RWMutex), - } - - uState, err := InitUtreexoState(&UtreexoConfig{ - DataDir: dataDir, - Name: db.Type(), - Params: chainParams, - }, maxMemoryUsage) + db: db, + mtx: new(sync.RWMutex), + config: &UtreexoConfig{ + MaxMemoryUsage: maxMemoryUsage, + Params: chainParams, + Pruned: pruned, + DataDir: dataDir, + Name: db.Type(), + }, + } + + uState, err := InitUtreexoState(idx.config) if err != nil { return nil, err } idx.utreexoState = uState - idx.pruned = pruned return idx, nil } From f5166e235207b7456eec1d0ab26919dec4a40843 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Fri, 12 Jul 2024 16:20:33 +0900 Subject: [PATCH 28/50] indexers: don't init utreexo state when creating a new index In order to support checking if the utreexo state is consistent, we need the best block hash, which can only be attained when the chain is initialized. The current initialization for the utreexo state was being done when the index was created. Moving the initialization to when the chain is being initialized allows us to pass in the best block hash. --- blockchain/indexers/flatutreexoproofindex.go | 16 ++++++------- blockchain/indexers/indexers_test.go | 25 +++++++++++++------- blockchain/indexers/utreexoproofindex.go | 15 ++++++------ 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/blockchain/indexers/flatutreexoproofindex.go b/blockchain/indexers/flatutreexoproofindex.go index cc47b169..fb40297a 100644 --- a/blockchain/indexers/flatutreexoproofindex.go +++ b/blockchain/indexers/flatutreexoproofindex.go @@ -118,6 +118,13 @@ func (idx *FlatUtreexoProofIndex) NeedsInputs() bool { func (idx *FlatUtreexoProofIndex) Init(chain *blockchain.BlockChain) error { idx.chain = chain + // Init Utreexo State. + uState, err := InitUtreexoState(idx.config) + if err != nil { + return err + } + idx.utreexoState = uState + // Nothing to do if the node is not pruned. // // If the node is pruned, then we need to check if it started off as @@ -127,7 +134,7 @@ func (idx *FlatUtreexoProofIndex) Init(chain *blockchain.BlockChain) error { } proofPath := flatFilePath(idx.config.DataDir, flatUtreexoProofName) - _, err := os.Stat(proofPath) + _, err = os.Stat(proofPath) if err != nil { // If the error isn't nil, that means the proofpath // doesn't exist. @@ -1277,13 +1284,6 @@ func NewFlatUtreexoProofIndex(pruned bool, chainParams *chaincfg.Params, }, } - // Init Utreexo State. - uState, err := InitUtreexoState(idx.config) - if err != nil { - return nil, err - } - idx.utreexoState = uState - // Init the utreexo proof state if the node isn't pruned. if !idx.config.Pruned { proofState, err := loadFlatFileState(dataDir, flatUtreexoProofName) diff --git a/blockchain/indexers/indexers_test.go b/blockchain/indexers/indexers_test.go index 0a127ce4..21d5183e 100644 --- a/blockchain/indexers/indexers_test.go +++ b/blockchain/indexers/indexers_test.go @@ -132,14 +132,6 @@ func indexersTestChain(testName string, proofGenInterval int32) (*blockchain.Blo panic(fmt.Errorf("failed to create chain instance: %v", err)) } - // Init the indexes. - err = indexManager.Init(chain, nil) - if err != nil { - tearDown() - os.RemoveAll(testDbRoot) - panic(fmt.Errorf("failed to init indexs: %v", err)) - } - return chain, indexes, ¶ms, indexManager, tearDown } @@ -1051,6 +1043,23 @@ func TestBridgeNodePruneUndoDataGen(t *testing.T) { } } + // Close the databases so that they can be initialized again + // to generate the undo data. + bestHash := chain.BestSnapshot().Hash + for _, indexer := range indexes { + switch idxType := indexer.(type) { + case *FlatUtreexoProofIndex: + err = idxType.CloseUtreexoState(&bestHash) + if err != nil { + t.Fatal(err) + } + case *UtreexoProofIndex: + err = idxType.CloseUtreexoState(&bestHash) + if err != nil { + t.Fatal(err) + } + } + } // Here we generate the undo data and delete the proof files. err = indexManager.Init(chain, nil) if err != nil { diff --git a/blockchain/indexers/utreexoproofindex.go b/blockchain/indexers/utreexoproofindex.go index 8cb20910..20ba77ba 100644 --- a/blockchain/indexers/utreexoproofindex.go +++ b/blockchain/indexers/utreexoproofindex.go @@ -79,6 +79,13 @@ func (idx *UtreexoProofIndex) NeedsInputs() bool { func (idx *UtreexoProofIndex) Init(chain *blockchain.BlockChain) error { idx.chain = chain + // Init Utreexo State. + uState, err := InitUtreexoState(idx.config) + if err != nil { + return err + } + idx.utreexoState = uState + // Nothing else to do if the node is an archive node. if !idx.config.Pruned { return nil @@ -86,7 +93,7 @@ func (idx *UtreexoProofIndex) Init(chain *blockchain.BlockChain) error { // Check if the utreexo undo bucket exists. var exists bool - err := idx.db.View(func(dbTx database.Tx) error { + err = idx.db.View(func(dbTx database.Tx) error { parentBucket := dbTx.Metadata().Bucket(utreexoParentBucketKey) bucket := parentBucket.Bucket(utreexoUndoKey) exists = bucket != nil @@ -585,12 +592,6 @@ func NewUtreexoProofIndex(db database.DB, pruned bool, maxMemoryUsage int64, }, } - uState, err := InitUtreexoState(idx.config) - if err != nil { - return nil, err - } - idx.utreexoState = uState - return idx, nil } From 12ae71b40edf4c5cfc2573c9ddac42295701322a Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Fri, 12 Jul 2024 17:41:45 +0900 Subject: [PATCH 29/50] indexers: require tip hash and height for index Init This change requires the indexers to take in the tip hash and height as an argument to Init(). With this, utreexo indexes are able to see what tip they're synced to. Since the accumulator isn't written to disk every block, this is useful information to check which block each utreexo index should sync to. --- blockchain/indexers/addrindex.go | 2 +- blockchain/indexers/cfindex.go | 2 +- blockchain/indexers/common.go | 2 +- blockchain/indexers/flatutreexoproofindex.go | 4 +++- blockchain/indexers/manager.go | 13 ++++++++++++- blockchain/indexers/ttlindex.go | 2 +- blockchain/indexers/txindex.go | 2 +- blockchain/indexers/utreexoproofindex.go | 4 +++- 8 files changed, 23 insertions(+), 8 deletions(-) diff --git a/blockchain/indexers/addrindex.go b/blockchain/indexers/addrindex.go index 154c091f..61ad1e98 100644 --- a/blockchain/indexers/addrindex.go +++ b/blockchain/indexers/addrindex.go @@ -645,7 +645,7 @@ func (idx *AddrIndex) NeedsInputs() bool { // initialize for this index. // // This is part of the Indexer interface. -func (idx *AddrIndex) Init(_ *blockchain.BlockChain) error { +func (idx *AddrIndex) Init(_ *blockchain.BlockChain, _ *chainhash.Hash, _ int32) error { // Nothing to do. return nil } diff --git a/blockchain/indexers/cfindex.go b/blockchain/indexers/cfindex.go index a1394ccd..681f6640 100644 --- a/blockchain/indexers/cfindex.go +++ b/blockchain/indexers/cfindex.go @@ -96,7 +96,7 @@ func (idx *CfIndex) NeedsInputs() bool { // Init initializes the hash-based cf index. This is part of the Indexer // interface. -func (idx *CfIndex) Init(_ *blockchain.BlockChain) error { +func (idx *CfIndex) Init(_ *blockchain.BlockChain, _ *chainhash.Hash, _ int32) error { return nil // Nothing to do. } diff --git a/blockchain/indexers/common.go b/blockchain/indexers/common.go index 6bc0106a..f830f5ce 100644 --- a/blockchain/indexers/common.go +++ b/blockchain/indexers/common.go @@ -49,7 +49,7 @@ type Indexer interface { // Init is invoked when the index manager is first initializing the // index. This differs from the Create method in that it is called on // every load, including the case the index was just created. - Init(*blockchain.BlockChain) error + Init(*blockchain.BlockChain, *chainhash.Hash, int32) error // ConnectBlock is invoked when a new block has been connected to the // main chain. The set of output spent within a block is also passed in diff --git a/blockchain/indexers/flatutreexoproofindex.go b/blockchain/indexers/flatutreexoproofindex.go index fb40297a..d07bbcb5 100644 --- a/blockchain/indexers/flatutreexoproofindex.go +++ b/blockchain/indexers/flatutreexoproofindex.go @@ -115,7 +115,9 @@ func (idx *FlatUtreexoProofIndex) NeedsInputs() bool { // Init initializes the flat utreexo proof index. This is part of the Indexer // interface. -func (idx *FlatUtreexoProofIndex) Init(chain *blockchain.BlockChain) error { +func (idx *FlatUtreexoProofIndex) Init(chain *blockchain.BlockChain, + tipHash *chainhash.Hash, tipHeight int32) error { + idx.chain = chain // Init Utreexo State. diff --git a/blockchain/indexers/manager.go b/blockchain/indexers/manager.go index 3ea27758..b9888bb8 100644 --- a/blockchain/indexers/manager.go +++ b/blockchain/indexers/manager.go @@ -311,7 +311,18 @@ func (m *Manager) Init(chain *blockchain.BlockChain, interrupt <-chan struct{}) // Initialize each of the enabled indexes. for _, indexer := range m.enabledIndexes { - if err := indexer.Init(chain); err != nil { + // Fetch the current tip for the index. + var height int32 + var hash *chainhash.Hash + err := m.db.View(func(dbTx database.Tx) error { + idxKey := indexer.Key() + hash, height, err = dbFetchIndexerTip(dbTx, idxKey) + return err + }) + if err != nil { + return err + } + if err := indexer.Init(chain, hash, height); err != nil { return err } } diff --git a/blockchain/indexers/ttlindex.go b/blockchain/indexers/ttlindex.go index 06009ee1..ba35e091 100644 --- a/blockchain/indexers/ttlindex.go +++ b/blockchain/indexers/ttlindex.go @@ -41,7 +41,7 @@ func (idx *TTLIndex) NeedsInputs() bool { // Init initializes the time to live index. This is part of the Indexer // interface. -func (idx *TTLIndex) Init(_ *blockchain.BlockChain) error { +func (idx *TTLIndex) Init(_ *blockchain.BlockChain, _ *chainhash.Hash, _ int32) error { return nil // Nothing to do. } diff --git a/blockchain/indexers/txindex.go b/blockchain/indexers/txindex.go index d08db037..06c88d7f 100644 --- a/blockchain/indexers/txindex.go +++ b/blockchain/indexers/txindex.go @@ -294,7 +294,7 @@ var _ Indexer = (*TxIndex)(nil) // disconnecting blocks. // // This is part of the Indexer interface. -func (idx *TxIndex) Init(_ *blockchain.BlockChain) error { +func (idx *TxIndex) Init(_ *blockchain.BlockChain, _ *chainhash.Hash, _ int32) error { // Find the latest known block id field for the internal block id // index and initialize it. This is done because it's a lot more // efficient to do a single search at initialize time than it is to diff --git a/blockchain/indexers/utreexoproofindex.go b/blockchain/indexers/utreexoproofindex.go index 20ba77ba..64fa948a 100644 --- a/blockchain/indexers/utreexoproofindex.go +++ b/blockchain/indexers/utreexoproofindex.go @@ -76,7 +76,9 @@ func (idx *UtreexoProofIndex) NeedsInputs() bool { // Init initializes the utreexo proof index. This is part of the Indexer // interface. -func (idx *UtreexoProofIndex) Init(chain *blockchain.BlockChain) error { +func (idx *UtreexoProofIndex) Init(chain *blockchain.BlockChain, + tipHash *chainhash.Hash, tipHeight int32) error { + idx.chain = chain // Init Utreexo State. From 7c5645c5c89020344f436a96b319fdfc17a2f2cb Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Sat, 13 Jul 2024 16:01:45 +0900 Subject: [PATCH 30/50] indexers: flush utreexo state on block disconnects Always flush the utreexo state on block disconnects. This allows the utreexo state to always stay at a recoverable state. Even if the ffldb fails to write, we can always re-connect blocks and reorganize. However, if the utreexo state isn't flushed, we can't disconnect on restart as the data could have been deleted from ffldb. --- blockchain/indexers/flatutreexoproofindex.go | 7 +++++++ blockchain/indexers/utreexoproofindex.go | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/blockchain/indexers/flatutreexoproofindex.go b/blockchain/indexers/flatutreexoproofindex.go index d07bbcb5..6e4511c5 100644 --- a/blockchain/indexers/flatutreexoproofindex.go +++ b/blockchain/indexers/flatutreexoproofindex.go @@ -779,6 +779,13 @@ func (idx *FlatUtreexoProofIndex) DisconnectBlock(dbTx database.Tx, block *btcut return err } + // Always flush the utreexo state on flushes to never leave the utreexoState + // at an unrecoverable state. + err = idx.FlushUtreexoState(&block.MsgBlock().Header.PrevBlock) + if err != nil { + return err + } + // Check if we're at a height where proof was generated. Only check if we're not // pruned as we don't keep the historical proofs as a pruned node. if (block.Height()%idx.proofGenInterVal) == 0 && !idx.config.Pruned { diff --git a/blockchain/indexers/utreexoproofindex.go b/blockchain/indexers/utreexoproofindex.go index 64fa948a..0529525d 100644 --- a/blockchain/indexers/utreexoproofindex.go +++ b/blockchain/indexers/utreexoproofindex.go @@ -375,6 +375,13 @@ func (idx *UtreexoProofIndex) DisconnectBlock(dbTx database.Tx, block *btcutil.B return err } + // Always flush the utreexo state on flushes to never leave the utreexoState + // at an unrecoverable state. + err = idx.FlushUtreexoState(&block.MsgBlock().Header.PrevBlock) + if err != nil { + return err + } + err = dbDeleteUtreexoState(dbTx, block.Hash()) if err != nil { return err From 853a0de02c054e3b0314070feb281a6802f84587 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Sat, 13 Jul 2024 16:26:13 +0900 Subject: [PATCH 31/50] ffldb, database: add Flush Flush allows a caller to flush the cached internals of the database. This is useful for keeping the two separate databases (ffldb and utreexo state db) in sync. --- database/ffldb/db.go | 16 ++++++++++++++++ database/interface.go | 3 +++ 2 files changed, 19 insertions(+) diff --git a/database/ffldb/db.go b/database/ffldb/db.go index 93523748..3612a93a 100644 --- a/database/ffldb/db.go +++ b/database/ffldb/db.go @@ -2288,6 +2288,22 @@ func (db *db) Update(fn func(database.Tx) error) error { return tx.Commit() } +// Flush flushes the internal cache of the database to the disk. +// +// This function is part of the database.DB interface implementation. +func (db *db) Flush() error { + // Since all transactions have a read lock on this mutex, this will + // cause Flush to wait for all readers to complete. + db.closeLock.Lock() + defer db.closeLock.Unlock() + + if db.closed { + return makeDbErr(database.ErrDbNotOpen, errDbNotOpenStr, nil) + } + + return db.cache.flush() +} + // Close cleanly shuts down the database and syncs all data. It will block // until all database transactions have been finalized (rolled back or // committed). diff --git a/database/interface.go b/database/interface.go index 058c90cd..31340752 100644 --- a/database/interface.go +++ b/database/interface.go @@ -504,6 +504,9 @@ type DB interface { // user-supplied function will result in a panic. Update(fn func(tx Tx) error) error + // Flush flushes the internal cache of the database to the disk. + Flush() error + // Close cleanly shuts down the database and syncs all data. It will // block until all database transactions have been finalized (rolled // back or committed). From 01310945551ebf296d2cc118ce42b248425d579b Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Mon, 15 Jul 2024 18:00:36 +0900 Subject: [PATCH 32/50] main: increase the default memory used for utreexo proof index by 2x Utreexo proof indexes used to use the same amount of memory as the utxocache but since the utreexo proof index's entire size is basically 2 times the utxo cache, it makes sense to increase it by that much to minimize the db fetches. --- config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.go b/config.go index 59f21754..b43710bc 100644 --- a/config.go +++ b/config.go @@ -490,7 +490,7 @@ func loadConfig() (*config, []string, error) { MaxOrphanTxs: defaultMaxOrphanTransactions, SigCacheMaxSize: defaultSigCacheMaxSize, UtxoCacheMaxSizeMiB: defaultUtxoCacheMaxSizeMiB, - UtreexoProofIndexMaxMemory: defaultUtxoCacheMaxSizeMiB, + UtreexoProofIndexMaxMemory: defaultUtxoCacheMaxSizeMiB * 2, Generate: defaultGenerate, TxIndex: defaultTxIndex, TTLIndex: defaultTTLIndex, From edc65f8c30ac2b8c1a460281ad3e839a8fdfa0d3 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Mon, 15 Jul 2024 18:35:00 +0900 Subject: [PATCH 33/50] indexers: add initConsistentState to utreexo state The utreexo state may be behind the index tip if the node had an unexpected shutdown. Calling initConsistentState syncs up the utreexo state so that it stays consistent with the indexer tip. --- blockchain/indexers/flatutreexoproofindex.go | 2 +- blockchain/indexers/utreexobackend.go | 95 +++++++++++++++++++- blockchain/indexers/utreexoproofindex.go | 2 +- 3 files changed, 95 insertions(+), 4 deletions(-) diff --git a/blockchain/indexers/flatutreexoproofindex.go b/blockchain/indexers/flatutreexoproofindex.go index 6e4511c5..5f245e94 100644 --- a/blockchain/indexers/flatutreexoproofindex.go +++ b/blockchain/indexers/flatutreexoproofindex.go @@ -121,7 +121,7 @@ func (idx *FlatUtreexoProofIndex) Init(chain *blockchain.BlockChain, idx.chain = chain // Init Utreexo State. - uState, err := InitUtreexoState(idx.config) + uState, err := InitUtreexoState(idx.config, chain, tipHash, tipHeight) if err != nil { return err } diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index 145bc1b1..0e872003 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -7,6 +7,7 @@ package indexers import ( "bytes" "encoding/binary" + "fmt" "os" "path/filepath" @@ -16,6 +17,7 @@ import ( "github.com/utreexo/utreexod/chaincfg" "github.com/utreexo/utreexod/chaincfg/chainhash" "github.com/utreexo/utreexod/database" + "github.com/utreexo/utreexod/wire" ) const ( @@ -399,11 +401,94 @@ func deserializeUndoBlock(serialized []byte) (uint64, []uint64, []utreexo.Hash, return numAdds, targets, delHashes, nil } +// initConsistentUtreexoState makes the utreexo state consistent with the given tipHash. +func (us *UtreexoState) initConsistentUtreexoState(chain *blockchain.BlockChain, + savedHash, tipHash *chainhash.Hash, tipHeight int32) error { + + // This is a new accumulator state that we're working with. + var empty chainhash.Hash + if tipHeight == -1 && tipHash.IsEqual(&empty) { + return nil + } + + // We're all caught up if both of the hashes are equal. + if savedHash != nil && savedHash.IsEqual(tipHash) { + return nil + } + + currentHeight := int32(-1) + if savedHash != nil { + // Even though this should always be true, make sure the fetched hash is in + // the best chain. + if !chain.MainChainHasBlock(savedHash) { + return fmt.Errorf("last utreexo consistency status contains "+ + "hash that is not in best chain: %v", savedHash) + } + + var err error + currentHeight, err = chain.BlockHeightByHash(savedHash) + if err != nil { + return err + } + + if currentHeight > tipHeight { + return fmt.Errorf("Saved besthash has a heigher height "+ + "of %v than tip height of %v. The utreexo state is NOT "+ + "recoverable and should be dropped and reindexed", + currentHeight, tipHeight) + } + } else { + // Mark it as an empty hash for logging below. + savedHash = new(chainhash.Hash) + } + + log.Infof("Reconstructing the Utreexo state after an unclean shutdown. The Utreexo state is "+ + "consistent at block %s (%d) but the index tip is at block %s (%d), This may "+ + "take a long time...", savedHash.String(), currentHeight, tipHash.String(), tipHeight) + + for h := currentHeight + 1; h <= tipHeight; h++ { + block, err := chain.BlockByHeight(h) + if err != nil { + return err + } + + stxos, err := chain.FetchSpendJournal(block) + if err != nil { + return err + } + + _, outCount, inskip, outskip := blockchain.DedupeBlock(block) + dels, _, err := blockchain.BlockToDelLeaves(stxos, chain, block, inskip, -1) + if err != nil { + return err + } + adds := blockchain.BlockToAddLeaves(block, outskip, nil, outCount) + + ud, err := wire.GenerateUData(dels, us.state) + if err != nil { + return err + } + delHashes := make([]utreexo.Hash, len(ud.LeafDatas)) + for i := range delHashes { + delHashes[i] = ud.LeafDatas[i].LeafHash() + } + + err = us.state.Modify(adds, delHashes, ud.AccProof) + if err != nil { + return err + } + } + + return nil +} + // InitUtreexoState returns an initialized utreexo state. If there isn't an // existing state on disk, it creates one and returns it. // maxMemoryUsage of 0 will keep every element on disk. A negaive maxMemoryUsage will // load every element to the memory. -func InitUtreexoState(cfg *UtreexoConfig) (*UtreexoState, error) { +func InitUtreexoState(cfg *UtreexoConfig, chain *blockchain.BlockChain, + tipHash *chainhash.Hash, tipHeight int32) (*UtreexoState, error) { + log.Infof("Initializing Utreexo state from '%s'", utreexoBasePath(cfg)) defer log.Info("Utreexo state loaded") @@ -427,7 +512,7 @@ func InitUtreexoState(cfg *UtreexoConfig) (*UtreexoState, error) { return nil, err } - _, numLeaves, err := dbFetchUtreexoStateConsistency(db) + savedHash, numLeaves, err := dbFetchUtreexoStateConsistency(db) if err != nil { return nil, err } @@ -517,5 +602,11 @@ func InitUtreexoState(cfg *UtreexoConfig) (*UtreexoState, error) { flush: flush, } + // Make sure that the utreexo state is consistent before returning it. + err = uState.initConsistentUtreexoState(chain, savedHash, tipHash, tipHeight) + if err != nil { + return nil, err + } + return uState, err } diff --git a/blockchain/indexers/utreexoproofindex.go b/blockchain/indexers/utreexoproofindex.go index 0529525d..e7dc11f9 100644 --- a/blockchain/indexers/utreexoproofindex.go +++ b/blockchain/indexers/utreexoproofindex.go @@ -82,7 +82,7 @@ func (idx *UtreexoProofIndex) Init(chain *blockchain.BlockChain, idx.chain = chain // Init Utreexo State. - uState, err := InitUtreexoState(idx.config) + uState, err := InitUtreexoState(idx.config, chain, tipHash, tipHeight) if err != nil { return err } From 16263364ccbeceb9c37f489df0b92e5615a15ba3 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Tue, 16 Jul 2024 15:37:06 +0900 Subject: [PATCH 34/50] indexers, main: expose ffldb flush to the utreexo state The main database has a cache where the data is written to and this must be flushed as well in order for the utreexo state to be recoverable. Allowing access to the main db flush let's us keep the utreexo state consistent. --- blockchain/indexers/flatutreexoproofindex.go | 3 ++- blockchain/indexers/indexers_test.go | 10 +++++----- blockchain/indexers/utreexobackend.go | 3 +++ blockchain/indexers/utreexoproofindex.go | 3 ++- server.go | 4 ++-- 5 files changed, 14 insertions(+), 9 deletions(-) diff --git a/blockchain/indexers/flatutreexoproofindex.go b/blockchain/indexers/flatutreexoproofindex.go index 5f245e94..9de2abed 100644 --- a/blockchain/indexers/flatutreexoproofindex.go +++ b/blockchain/indexers/flatutreexoproofindex.go @@ -1271,7 +1271,7 @@ func loadFlatFileState(dataDir, name string) (*FlatFileState, error) { // turn is used by the blockchain package. This allows the index to be // seamlessly maintained along with the chain. func NewFlatUtreexoProofIndex(pruned bool, chainParams *chaincfg.Params, - proofGenInterVal *int32, maxMemoryUsage int64, dataDir string) (*FlatUtreexoProofIndex, error) { + proofGenInterVal *int32, maxMemoryUsage int64, dataDir string, flush func() error) (*FlatUtreexoProofIndex, error) { // If the proofGenInterVal argument is nil, use the default value. var intervalToUse int32 @@ -1290,6 +1290,7 @@ func NewFlatUtreexoProofIndex(pruned bool, chainParams *chaincfg.Params, Pruned: pruned, DataDir: dataDir, Name: flatUtreexoProofIndexType, + FlushMainDB: flush, }, } diff --git a/blockchain/indexers/indexers_test.go b/blockchain/indexers/indexers_test.go index 21d5183e..de53b507 100644 --- a/blockchain/indexers/indexers_test.go +++ b/blockchain/indexers/indexers_test.go @@ -70,17 +70,17 @@ func createDB(dbName string) (database.DB, string, error) { return db, dbPath, nil } -func initIndexes(interval int32, dbPath string, db *database.DB, params *chaincfg.Params) ( +func initIndexes(interval int32, dbPath string, db database.DB, params *chaincfg.Params) ( *Manager, []Indexer, error) { proofGenInterval := new(int32) *proofGenInterval = interval - flatUtreexoProofIndex, err := NewFlatUtreexoProofIndex(false, params, proofGenInterval, 50*1024*1024, dbPath) + flatUtreexoProofIndex, err := NewFlatUtreexoProofIndex(false, params, proofGenInterval, 50*1024*1024, dbPath, db.Flush) if err != nil { return nil, nil, err } - utreexoProofIndex, err := NewUtreexoProofIndex(*db, false, 50*1024*1024, params, dbPath) + utreexoProofIndex, err := NewUtreexoProofIndex(db, false, 50*1024*1024, params, dbPath, db.Flush) if err != nil { return nil, nil, err } @@ -89,7 +89,7 @@ func initIndexes(interval int32, dbPath string, db *database.DB, params *chaincf utreexoProofIndex, flatUtreexoProofIndex, } - indexManager := NewManager(*db, indexes) + indexManager := NewManager(db, indexes) return indexManager, indexes, nil } @@ -109,7 +109,7 @@ func indexersTestChain(testName string, proofGenInterval int32) (*blockchain.Blo } // Create the indexes to be used in the chain. - indexManager, indexes, err := initIndexes(proofGenInterval, dbPath, &db, ¶ms) + indexManager, indexes, err := initIndexes(proofGenInterval, dbPath, db, ¶ms) if err != nil { tearDown() os.RemoveAll(testDbRoot) diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index 0e872003..1bae823c 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -53,6 +53,9 @@ type UtreexoConfig struct { // Name is what the type of utreexo proof indexer this utreexo state is related // to. Name string + + // FlushMainDB flushes the main database where all the data is stored. + FlushMainDB func() error } // UtreexoState is a wrapper around the raw accumulator with configuration diff --git a/blockchain/indexers/utreexoproofindex.go b/blockchain/indexers/utreexoproofindex.go index e7dc11f9..0d6649fb 100644 --- a/blockchain/indexers/utreexoproofindex.go +++ b/blockchain/indexers/utreexoproofindex.go @@ -587,7 +587,7 @@ func (idx *UtreexoProofIndex) PruneBlock(dbTx database.Tx, blockHash *chainhash. // turn is used by the blockchain package. This allows the index to be // seamlessly maintained along with the chain. func NewUtreexoProofIndex(db database.DB, pruned bool, maxMemoryUsage int64, - chainParams *chaincfg.Params, dataDir string) (*UtreexoProofIndex, error) { + chainParams *chaincfg.Params, dataDir string, flush func() error) (*UtreexoProofIndex, error) { idx := &UtreexoProofIndex{ db: db, @@ -598,6 +598,7 @@ func NewUtreexoProofIndex(db database.DB, pruned bool, maxMemoryUsage int64, Pruned: pruned, DataDir: dataDir, Name: db.Type(), + FlushMainDB: flush, }, } diff --git a/server.go b/server.go index b83a5b7e..ea7865ee 100644 --- a/server.go +++ b/server.go @@ -3239,7 +3239,7 @@ func newServer(listenAddrs, agentBlacklist, agentWhitelist []string, var err error s.utreexoProofIndex, err = indexers.NewUtreexoProofIndex( db, cfg.Prune != 0, cfg.UtreexoProofIndexMaxMemory*1024*1024, - chainParams, cfg.DataDir) + chainParams, cfg.DataDir, db.Flush) if err != nil { return nil, err } @@ -3256,7 +3256,7 @@ func newServer(listenAddrs, agentBlacklist, agentWhitelist []string, var err error s.flatUtreexoProofIndex, err = indexers.NewFlatUtreexoProofIndex( cfg.Prune != 0, chainParams, interval, - cfg.UtreexoProofIndexMaxMemory*1024*1024, cfg.DataDir) + cfg.UtreexoProofIndexMaxMemory*1024*1024, cfg.DataDir, db.Flush) if err != nil { return nil, err } From 6dd9c8a6a0f96c1e2ec4b86a67dd05d62bbdd3b1 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Tue, 16 Jul 2024 15:58:06 +0900 Subject: [PATCH 35/50] blockchain: export UtxoFlushPeriodicInterval Exported so that the utreexo state can use the same variable for flushing the utreexo state. --- blockchain/utxocache.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/blockchain/utxocache.go b/blockchain/utxocache.go index 49872bde..37bc70ef 100644 --- a/blockchain/utxocache.go +++ b/blockchain/utxocache.go @@ -180,11 +180,11 @@ func (ms *mapSlice) deleteMaps() { } const ( - // utxoFlushPeriodicInterval is the interval at which a flush is performed + // UtxoFlushPeriodicInterval is the interval at which a flush is performed // when the flush mode FlushPeriodic is used. This is used when the initial // block download is complete and it's useful to flush periodically in case // of unforseen shutdowns. - utxoFlushPeriodicInterval = time.Minute * 5 + UtxoFlushPeriodicInterval = time.Minute * 5 ) // FlushMode is used to indicate the different urgency types for a flush. @@ -563,7 +563,7 @@ func (s *utxoCache) flush(dbTx database.Tx, mode FlushMode, bestState *BestState case FlushPeriodic: // If the time since the last flush is over the periodic interval, // force a flush. Otherwise just flush when the cache is full. - if time.Since(s.lastFlushTime) > utxoFlushPeriodicInterval { + if time.Since(s.lastFlushTime) > UtxoFlushPeriodicInterval { threshold = 0 } else { threshold = s.maxTotalMemoryUsage From c70b4e4135536838987ef4b11f81d1604fdb58fa Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Tue, 16 Jul 2024 16:03:18 +0900 Subject: [PATCH 36/50] indexers: add flush to utreexo state --- blockchain/indexers/utreexobackend.go | 105 ++++++++++++-------------- 1 file changed, 47 insertions(+), 58 deletions(-) diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index 1bae823c..e9b03c71 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -65,8 +65,37 @@ type UtreexoState struct { state utreexo.Utreexo utreexoStateDB *leveldb.DB - isFlushNeeded func() bool - flush func(ldbTx *leveldb.Transaction) error + isFlushNeeded func() bool + flushLeavesAndNodes func(ldbTx *leveldb.Transaction) error +} + +// flush flushes the utreexo state and all the data necessary for the utreexo state to be recoverable +// on sudden crashes. +func (us *UtreexoState) flush(bestHash *chainhash.Hash) error { + ldbTx, err := us.utreexoStateDB.OpenTransaction() + if err != nil { + return err + } + + // Write the best block hash and the numleaves for the utreexo state. + err = dbWriteUtreexoStateConsistency(ldbTx, bestHash, us.state.GetNumLeaves()) + if err != nil { + return err + } + + err = us.flushLeavesAndNodes(ldbTx) + if err != nil { + ldbTx.Discard() + return err + } + + err = ldbTx.Commit() + if err != nil { + ldbTx.Discard() + return err + } + + return nil } // utreexoBasePath returns the base path of where the utreexo state should be @@ -204,33 +233,7 @@ func (idx *UtreexoProofIndex) FlushUtreexoState(bestHash *chainhash.Hash) error defer idx.mtx.Unlock() log.Infof("Flushing the utreexo state to disk...") - - ldbTx, err := idx.utreexoState.utreexoStateDB.OpenTransaction() - if err != nil { - return err - } - - // Write the best block hash and the numleaves for the utreexo state. - err = dbWriteUtreexoStateConsistency(ldbTx, bestHash, idx.utreexoState.state.GetNumLeaves()) - if err != nil { - return err - } - - err = idx.utreexoState.flush(ldbTx) - if err != nil { - ldbTx.Discard() - return err - } - - err = ldbTx.Commit() - if err != nil { - ldbTx.Discard() - return err - } - - log.Infof("Finished flushing the utreexo state to disk.") - - return nil + return idx.utreexoState.flush(bestHash) } // CloseUtreexoState flushes and closes the utreexo database state. @@ -255,30 +258,8 @@ func (idx *FlatUtreexoProofIndex) FlushUtreexoState(bestHash *chainhash.Hash) er idx.mtx.Lock() defer idx.mtx.Unlock() - ldbTx, err := idx.utreexoState.utreexoStateDB.OpenTransaction() - if err != nil { - return err - } - - // Write the best block hash and the numleaves for the utreexo state. - err = dbWriteUtreexoStateConsistency(ldbTx, bestHash, idx.utreexoState.state.GetNumLeaves()) - if err != nil { - return err - } - - err = idx.utreexoState.flush(ldbTx) - if err != nil { - ldbTx.Discard() - return err - } - - err = ldbTx.Commit() - if err != nil { - ldbTx.Discard() - return err - } - - return nil + log.Infof("Flushing the utreexo state to disk...") + return idx.utreexoState.flush(bestHash) } // CloseUtreexoState flushes and closes the utreexo database state. @@ -480,6 +461,14 @@ func (us *UtreexoState) initConsistentUtreexoState(chain *blockchain.BlockChain, if err != nil { return err } + + if us.isFlushNeeded() { + log.Infof("Flushing the utreexo state to disk...") + err = us.flush(block.Hash()) + if err != nil { + return err + } + } } return nil @@ -598,11 +587,11 @@ func InitUtreexoState(cfg *UtreexoConfig, chain *blockchain.BlockChain, } uState := &UtreexoState{ - config: cfg, - state: &p, - utreexoStateDB: db, - isFlushNeeded: isFlushNeeded, - flush: flush, + config: cfg, + state: &p, + utreexoStateDB: db, + isFlushNeeded: isFlushNeeded, + flushLeavesAndNodes: flush, } // Make sure that the utreexo state is consistent before returning it. From 746389e540ff33abaab69fdd62895b4edc48761f Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Tue, 16 Jul 2024 16:13:57 +0900 Subject: [PATCH 37/50] indexers: refactor utreexo state flushes The new flushes are able to support different modes which flush the utreexo state when it meets various conditions. The flushes that were also called on ConnectBlocks are removed so that they can be controlled by outside callers. --- blockchain/indexers/flatutreexoproofindex.go | 10 +- blockchain/indexers/utreexobackend.go | 99 +++++++++++++++++--- blockchain/indexers/utreexoproofindex.go | 10 +- 3 files changed, 97 insertions(+), 22 deletions(-) diff --git a/blockchain/indexers/flatutreexoproofindex.go b/blockchain/indexers/flatutreexoproofindex.go index 9de2abed..f5f6e3af 100644 --- a/blockchain/indexers/flatutreexoproofindex.go +++ b/blockchain/indexers/flatutreexoproofindex.go @@ -13,6 +13,7 @@ import ( "path/filepath" "reflect" "sync" + "time" "github.com/utreexo/utreexo" "github.com/utreexo/utreexod/blockchain" @@ -103,6 +104,9 @@ type FlatUtreexoProofIndex struct { // pStats are the proof size statistics that are kept for research purposes. pStats proofStats + + // The time of when the utreexo state was last flushed. + lastFlushTime time.Time } // NeedsInputs signals that the index requires the referenced inputs in order @@ -324,10 +328,6 @@ func (idx *FlatUtreexoProofIndex) ConnectBlock(dbTx database.Tx, block *btcutil. if err != nil { return err } - err = idx.FlushUtreexoStateIfNeeded(block.Hash()) - if err != nil { - log.Warnf("error while flushing the utreexo state. %v", err) - } // Don't store proofs if the node is pruned. if idx.config.Pruned { @@ -781,7 +781,7 @@ func (idx *FlatUtreexoProofIndex) DisconnectBlock(dbTx database.Tx, block *btcut // Always flush the utreexo state on flushes to never leave the utreexoState // at an unrecoverable state. - err = idx.FlushUtreexoState(&block.MsgBlock().Header.PrevBlock) + err = idx.flushUtreexoState(&block.MsgBlock().Header.PrevBlock) if err != nil { return err } diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index e9b03c71..aca60720 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -10,6 +10,7 @@ import ( "fmt" "os" "path/filepath" + "time" "github.com/syndtr/goleveldb/leveldb" "github.com/utreexo/utreexo" @@ -219,16 +220,53 @@ func (idx *FlatUtreexoProofIndex) FetchUtreexoState(blockHeight int32) ([]*chain return chainhashRoots, stump.NumLeaves, nil } -// FlushUtreexoStateIfNeeded flushes the utreexo state only if the cache is full. -func (idx *UtreexoProofIndex) FlushUtreexoStateIfNeeded(bestHash *chainhash.Hash) error { - if idx.utreexoState.isFlushNeeded() { - return idx.FlushUtreexoState(bestHash) +// Flush flushes the utreexo state. The different modes pass in as an argument determine if the utreexo state +// will be flushed or not. +// +// The onConnect bool is if the Flush is called on a block connect or a disconnect. +// It's important as it determines if we flush the main node db before attempting to flush the utreexo state. +// For the utreexo state to be recoverable, it has to be behind whatever tip the main database is at. +// On block connects, we always want to flush first but on disconnects, we want to flush first before the +// data necessary undo data is removed. +func (idx *UtreexoProofIndex) Flush(bestHash *chainhash.Hash, mode blockchain.FlushMode, onConnect bool) error { + switch mode { + case blockchain.FlushPeriodic: + // If the time since the last flush less then the interval, just return. + if time.Since(idx.lastFlushTime) < blockchain.UtxoFlushPeriodicInterval { + return nil + } + case blockchain.FlushIfNeeded: + if !idx.utreexoState.isFlushNeeded() { + return nil + } + case blockchain.FlushRequired: + // Purposely left empty. + } + + if onConnect { + // Flush the main database first. This is because the block and other data may still + // be in the database cache. If we flush the utreexo state before, there's no way to + // undo the utreexo state to the last block where the main database flushed. Flushing + // this before we flush the utreexo state ensures that we leave the database state at + // a recoverable state. + // + // This is different from on disconnect as you want the utreexo state to be flushed + // first as the utreexo state can always catch up to the main db tip but can't undo + // without the main database data. + err := idx.config.FlushMainDB() + if err != nil { + return err + } + } + err := idx.flushUtreexoState(bestHash) + if err != nil { + return err } return nil } // FlushUtreexoState saves the utreexo state to disk. -func (idx *UtreexoProofIndex) FlushUtreexoState(bestHash *chainhash.Hash) error { +func (idx *UtreexoProofIndex) flushUtreexoState(bestHash *chainhash.Hash) error { idx.mtx.Lock() defer idx.mtx.Unlock() @@ -238,23 +276,60 @@ func (idx *UtreexoProofIndex) FlushUtreexoState(bestHash *chainhash.Hash) error // CloseUtreexoState flushes and closes the utreexo database state. func (idx *UtreexoProofIndex) CloseUtreexoState(bestHash *chainhash.Hash) error { - err := idx.FlushUtreexoState(bestHash) + err := idx.flushUtreexoState(bestHash) if err != nil { log.Warnf("error whiling flushing the utreexo state. %v", err) } return idx.utreexoState.utreexoStateDB.Close() } -// FlushUtreexoStateIfNeeded flushes the utreexo state only if the cache is full. -func (idx *FlatUtreexoProofIndex) FlushUtreexoStateIfNeeded(bestHash *chainhash.Hash) error { - if idx.utreexoState.isFlushNeeded() { - return idx.FlushUtreexoState(bestHash) +// Flush flushes the utreexo state. The different modes pass in as an argument determine if the utreexo state +// will be flushed or not. +// +// The onConnect bool is if the Flush is called on a block connect or a disconnect. +// It's important as it determines if we flush the main node db before attempting to flush the utreexo state. +// For the utreexo state to be recoverable, it has to be behind whatever tip the main database is at. +// On block connects, we always want to flush first but on disconnects, we want to flush first before the +// data necessary undo data is removed. +func (idx *FlatUtreexoProofIndex) Flush(bestHash *chainhash.Hash, mode blockchain.FlushMode, onConnect bool) error { + switch mode { + case blockchain.FlushPeriodic: + // If the time since the last flush less then the interval, just return. + if time.Since(idx.lastFlushTime) < blockchain.UtxoFlushPeriodicInterval { + return nil + } + case blockchain.FlushIfNeeded: + if !idx.utreexoState.isFlushNeeded() { + return nil + } + case blockchain.FlushRequired: + // Purposely left empty. + } + + if onConnect { + // Flush the main database first. This is because the block and other data may still + // be in the database cache. If we flush the utreexo state before, there's no way to + // undo the utreexo state to the last block where the main database flushed. Flushing + // this before we flush the utreexo state ensures that we leave the database state at + // a recoverable state. + // + // This is different from on disconnect as you want the utreexo state to be flushed + // first as the utreexo state can always catch up to the main db tip but can't undo + // without the main database data. + err := idx.config.FlushMainDB() + if err != nil { + return err + } + } + err := idx.flushUtreexoState(bestHash) + if err != nil { + return err } return nil } // FlushUtreexoState saves the utreexo state to disk. -func (idx *FlatUtreexoProofIndex) FlushUtreexoState(bestHash *chainhash.Hash) error { +func (idx *FlatUtreexoProofIndex) flushUtreexoState(bestHash *chainhash.Hash) error { idx.mtx.Lock() defer idx.mtx.Unlock() @@ -264,7 +339,7 @@ func (idx *FlatUtreexoProofIndex) FlushUtreexoState(bestHash *chainhash.Hash) er // CloseUtreexoState flushes and closes the utreexo database state. func (idx *FlatUtreexoProofIndex) CloseUtreexoState(bestHash *chainhash.Hash) error { - err := idx.FlushUtreexoState(bestHash) + err := idx.flushUtreexoState(bestHash) if err != nil { log.Warnf("error whiling flushing the utreexo state. %v", err) } diff --git a/blockchain/indexers/utreexoproofindex.go b/blockchain/indexers/utreexoproofindex.go index 0d6649fb..b48df680 100644 --- a/blockchain/indexers/utreexoproofindex.go +++ b/blockchain/indexers/utreexoproofindex.go @@ -8,6 +8,7 @@ import ( "bytes" "fmt" "sync" + "time" "github.com/utreexo/utreexo" "github.com/utreexo/utreexod/blockchain" @@ -64,6 +65,9 @@ type UtreexoProofIndex struct { // utreexoState represents the Bitcoin UTXO set as a utreexo accumulator. // It keeps all the elements of the forest in order to generate proofs. utreexoState *UtreexoState + + // The time of when the utreexo state was last flushed. + lastFlushTime time.Time } // NeedsInputs signals that the index requires the referenced inputs in order @@ -305,10 +309,6 @@ func (idx *UtreexoProofIndex) ConnectBlock(dbTx database.Tx, block *btcutil.Bloc if err != nil { return err } - err = idx.FlushUtreexoStateIfNeeded(block.Hash()) - if err != nil { - log.Warnf("error while flushing the utreexo state. %v", err) - } return nil } @@ -377,7 +377,7 @@ func (idx *UtreexoProofIndex) DisconnectBlock(dbTx database.Tx, block *btcutil.B // Always flush the utreexo state on flushes to never leave the utreexoState // at an unrecoverable state. - err = idx.FlushUtreexoState(&block.MsgBlock().Header.PrevBlock) + err = idx.flushUtreexoState(&block.MsgBlock().Header.PrevBlock) if err != nil { return err } From 64ac1beb20c44ccec9c6bba3ba6f1066baebf319 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Tue, 16 Jul 2024 16:20:35 +0900 Subject: [PATCH 38/50] indexers, main: refactor CloseUtreexoState CloseUtreexoState asked for the best hash because the best hash fetched when catching up the index was different from the one you get from the best snapshot. This is solved by not calling CloseUtreexoState but calling the internal methods directly. This simplifies calling CloseUtreexoState for the callers. --- blockchain/indexers/indexers_test.go | 5 ++--- blockchain/indexers/manager.go | 26 +++++++++++++++++++++----- blockchain/indexers/utreexobackend.go | 10 ++++++---- server.go | 4 ++-- 4 files changed, 31 insertions(+), 14 deletions(-) diff --git a/blockchain/indexers/indexers_test.go b/blockchain/indexers/indexers_test.go index de53b507..5c0ab9fc 100644 --- a/blockchain/indexers/indexers_test.go +++ b/blockchain/indexers/indexers_test.go @@ -1045,16 +1045,15 @@ func TestBridgeNodePruneUndoDataGen(t *testing.T) { // Close the databases so that they can be initialized again // to generate the undo data. - bestHash := chain.BestSnapshot().Hash for _, indexer := range indexes { switch idxType := indexer.(type) { case *FlatUtreexoProofIndex: - err = idxType.CloseUtreexoState(&bestHash) + err := idxType.CloseUtreexoState() if err != nil { t.Fatal(err) } case *UtreexoProofIndex: - err = idxType.CloseUtreexoState(&bestHash) + err := idxType.CloseUtreexoState() if err != nil { t.Fatal(err) } diff --git a/blockchain/indexers/manager.go b/blockchain/indexers/manager.go index b9888bb8..a8df4f20 100644 --- a/blockchain/indexers/manager.go +++ b/blockchain/indexers/manager.go @@ -483,15 +483,23 @@ func (m *Manager) Init(chain *blockchain.BlockChain, interrupt <-chan struct{}) for _, indexer := range m.enabledIndexes { switch idxType := indexer.(type) { case *UtreexoProofIndex: - err := idxType.CloseUtreexoState(block.Hash()) + err := idxType.flushUtreexoState(block.Hash()) + if err != nil { + log.Errorf("Error while flushing utreexo state: %v", err) + } + err = idxType.utreexoState.utreexoStateDB.Close() if err != nil { log.Errorf("Error while flushing utreexo state: %v", err) } case *FlatUtreexoProofIndex: - err := idxType.CloseUtreexoState(block.Hash()) + err := idxType.flushUtreexoState(block.Hash()) if err != nil { log.Errorf("Error while flushing utreexo state for flat utreexo proof index: %v", err) } + err = idxType.utreexoState.utreexoStateDB.Close() + if err != nil { + log.Errorf("Error while closing the utreexo state for flat utreexo proof index: %v", err) + } } } return errInterruptRequested @@ -534,15 +542,23 @@ func (m *Manager) Init(chain *blockchain.BlockChain, interrupt <-chan struct{}) for _, indexer := range m.enabledIndexes { switch idxType := indexer.(type) { case *UtreexoProofIndex: - err := idxType.CloseUtreexoState(block.Hash()) + err := idxType.flushUtreexoState(block.Hash()) if err != nil { - log.Errorf("Error while flushing utreexo state: %v", err) + log.Errorf("Error while flushing utreexo state for utreexo proof index: %v", err) + } + err = idxType.utreexoState.utreexoStateDB.Close() + if err != nil { + log.Errorf("Error while closing the utreexo state for utreexo proof index: %v", err) } case *FlatUtreexoProofIndex: - err := idxType.CloseUtreexoState(block.Hash()) + err := idxType.flushUtreexoState(block.Hash()) if err != nil { log.Errorf("Error while flushing utreexo state for flat utreexo proof index: %v", err) } + err = idxType.utreexoState.utreexoStateDB.Close() + if err != nil { + log.Errorf("Error while closing the utreexo state for flat utreexo proof index: %v", err) + } } } return errInterruptRequested diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index aca60720..3c124e90 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -275,8 +275,9 @@ func (idx *UtreexoProofIndex) flushUtreexoState(bestHash *chainhash.Hash) error } // CloseUtreexoState flushes and closes the utreexo database state. -func (idx *UtreexoProofIndex) CloseUtreexoState(bestHash *chainhash.Hash) error { - err := idx.flushUtreexoState(bestHash) +func (idx *UtreexoProofIndex) CloseUtreexoState() error { + bestHash := idx.chain.BestSnapshot().Hash + err := idx.flushUtreexoState(&bestHash) if err != nil { log.Warnf("error whiling flushing the utreexo state. %v", err) } @@ -338,8 +339,9 @@ func (idx *FlatUtreexoProofIndex) flushUtreexoState(bestHash *chainhash.Hash) er } // CloseUtreexoState flushes and closes the utreexo database state. -func (idx *FlatUtreexoProofIndex) CloseUtreexoState(bestHash *chainhash.Hash) error { - err := idx.flushUtreexoState(bestHash) +func (idx *FlatUtreexoProofIndex) CloseUtreexoState() error { + bestHash := idx.chain.BestSnapshot().Hash + err := idx.flushUtreexoState(&bestHash) if err != nil { log.Warnf("error whiling flushing the utreexo state. %v", err) } diff --git a/server.go b/server.go index ea7865ee..6079d69b 100644 --- a/server.go +++ b/server.go @@ -2574,7 +2574,7 @@ out: // If utreexoProofIndex option is on, flush it after closing down syncManager. if s.utreexoProofIndex != nil { - err := s.utreexoProofIndex.CloseUtreexoState(&s.chain.BestSnapshot().Hash) + err := s.utreexoProofIndex.CloseUtreexoState() if err != nil { btcdLog.Errorf("Error while flushing utreexo state: %v", err) } @@ -2582,7 +2582,7 @@ out: // If flatUtreexoProofIndex option is on, flush it after closing down syncManager. if s.flatUtreexoProofIndex != nil { - err := s.flatUtreexoProofIndex.CloseUtreexoState(&s.chain.BestSnapshot().Hash) + err := s.flatUtreexoProofIndex.CloseUtreexoState() if err != nil { btcdLog.Errorf("Error while flushing utreexo state: %v", err) } From a1e008e6f39327b55e61af5f77ee77831e69f8bb Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Tue, 16 Jul 2024 16:39:13 +0900 Subject: [PATCH 39/50] blockchain, indexers: add flush to indexer interface Flush is added to the indexer interface so that we can call the flush from when connecting the block in the blockchain package. This allows the utreexo indexes to also flush the main database. They couldn't when being called from the ConnectBlock on the indexers as that acquires the database tx lock. --- blockchain/chain.go | 11 +++++++++++ blockchain/indexers/addrindex.go | 7 +++++++ blockchain/indexers/cfindex.go | 7 +++++++ blockchain/indexers/common.go | 3 +++ blockchain/indexers/manager.go | 18 ++++++++++++++++++ blockchain/indexers/ttlindex.go | 7 +++++++ blockchain/indexers/txindex.go | 7 +++++++ 7 files changed, 60 insertions(+) diff --git a/blockchain/chain.go b/blockchain/chain.go index 5287ca0c..992de321 100644 --- a/blockchain/chain.go +++ b/blockchain/chain.go @@ -760,6 +760,14 @@ func (b *BlockChain) connectBlock(node *blockNode, block *btcutil.Block, return err } + // Flush the indexes if they need to be flushed. + if b.indexManager != nil { + err := b.indexManager.Flush(&state.Hash, FlushIfNeeded, true) + if err != nil { + return err + } + } + // Prune fully spent entries and mark all entries in the view unmodified // now that the modifications have been committed to the database. if view != nil { @@ -2328,6 +2336,9 @@ type IndexManager interface { // PruneBlock is invoked when an older block is deleted after it's been // processed. This lowers the storage requirement for a node. PruneBlocks(database.Tx, int32, func(int32) (*chainhash.Hash, error)) error + + // Flush flushes the relevant indexes if they need to be flushed. + Flush(*chainhash.Hash, FlushMode, bool) error } // Config is a descriptor which specifies the blockchain instance configuration. diff --git a/blockchain/indexers/addrindex.go b/blockchain/indexers/addrindex.go index 61ad1e98..1fa678d5 100644 --- a/blockchain/indexers/addrindex.go +++ b/blockchain/indexers/addrindex.go @@ -820,6 +820,13 @@ func (idx *AddrIndex) PruneBlock(dbTx database.Tx, blockHash *chainhash.Hash) er return nil } +// For AddrIndex, flush is a no-op. +// +// This is part of the Indexer interface. +func (idx *AddrIndex) Flush(_ *chainhash.Hash, _ blockchain.FlushMode, _ bool) error { + return nil +} + // TxRegionsForAddress returns a slice of block regions which identify each // transaction that involves the passed address according to the specified // number to skip, number requested, and whether or not the results should be diff --git a/blockchain/indexers/cfindex.go b/blockchain/indexers/cfindex.go index 681f6640..6ab2f093 100644 --- a/blockchain/indexers/cfindex.go +++ b/blockchain/indexers/cfindex.go @@ -286,6 +286,13 @@ func (idx *CfIndex) PruneBlock(dbTx database.Tx, blockHash *chainhash.Hash) erro return nil } +// For CfIndex, flush is a no-op. +// +// This is part of the Indexer interface. +func (idx *CfIndex) Flush(_ *chainhash.Hash, _ blockchain.FlushMode, _ bool) error { + return nil +} + // entryByBlockHash fetches a filter index entry of a particular type // (eg. filter, filter header, etc) for a filter type and block hash. func (idx *CfIndex) entryByBlockHash(filterTypeKeys [][]byte, diff --git a/blockchain/indexers/common.go b/blockchain/indexers/common.go index f830f5ce..6c3d0302 100644 --- a/blockchain/indexers/common.go +++ b/blockchain/indexers/common.go @@ -66,6 +66,9 @@ type Indexer interface { // PruneBlock is invoked when an older block is deleted after it's been // processed. PruneBlock(database.Tx, *chainhash.Hash) error + + // Flush flushes the index. + Flush(*chainhash.Hash, blockchain.FlushMode, bool) error } // AssertError identifies an error that indicates an internal code consistency diff --git a/blockchain/indexers/manager.go b/blockchain/indexers/manager.go index a8df4f20..2d1f3366 100644 --- a/blockchain/indexers/manager.go +++ b/blockchain/indexers/manager.go @@ -563,6 +563,12 @@ func (m *Manager) Init(chain *blockchain.BlockChain, interrupt <-chan struct{}) } return errInterruptRequested } + + // Flush indexes if needed. + err = m.Flush(block.Hash(), blockchain.FlushIfNeeded, true) + if err != nil { + return err + } } log.Infof("Indexes caught up to height %d", bestHeight) @@ -694,6 +700,18 @@ func (m *Manager) PruneBlocks(dbTx database.Tx, lastKeptHeight int32, return nil } +// Flush flushes the enabled indexes. For the indexers that do not need to be flushed, it's a no-op. +func (m *Manager) Flush(bestHash *chainhash.Hash, mode blockchain.FlushMode, onConnect bool) error { + for _, index := range m.enabledIndexes { + err := index.Flush(bestHash, mode, onConnect) + if err != nil { + return err + } + } + + return nil +} + // NewManager returns a new index manager with the provided indexes enabled. // // The manager returned satisfies the blockchain.IndexManager interface and thus diff --git a/blockchain/indexers/ttlindex.go b/blockchain/indexers/ttlindex.go index ba35e091..7b3fc46f 100644 --- a/blockchain/indexers/ttlindex.go +++ b/blockchain/indexers/ttlindex.go @@ -101,6 +101,13 @@ func (idx *TTLIndex) PruneBlock(dbTx database.Tx, blockHash *chainhash.Hash) err return nil } +// For TTLIndex, flush is a no-op. +// +// This is part of the Indexer interface. +func (idx *TTLIndex) Flush(_ *chainhash.Hash, _ blockchain.FlushMode, _ bool) error { + return nil +} + // GetTTL returns a pointer to the ttl value of a transaction outpout. // Returns nil for a UTXO. // diff --git a/blockchain/indexers/txindex.go b/blockchain/indexers/txindex.go index 06c88d7f..1c8f3846 100644 --- a/blockchain/indexers/txindex.go +++ b/blockchain/indexers/txindex.go @@ -441,6 +441,13 @@ func (idx *TxIndex) PruneBlock(dbTx database.Tx, blockHash *chainhash.Hash) erro return nil } +// NOTE: For TxIndex, flush is a no-op. +// +// This is part of the Indexer interface. +func (idx *TxIndex) Flush(_ *chainhash.Hash, _ blockchain.FlushMode, _ bool) error { + return nil +} + // TxBlockRegion returns the block region for the provided transaction hash // from the transaction index. The block region can in turn be used to load the // raw transaction bytes. When there is no entry for the provided hash, nil From 19a6e7f7bdb0d388c12019fcd109eba190a7ee4b Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Tue, 16 Jul 2024 17:34:18 +0900 Subject: [PATCH 40/50] indexers: get rid of one of the flushes when catching up the indexes There are two quit on interrupts when indexers are catching up. The first flush was problematic in that the flush would save the blockhash of a block that wasn't processed. Getting rid of this first flush solves this problem. --- blockchain/indexers/manager.go | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/blockchain/indexers/manager.go b/blockchain/indexers/manager.go index 2d1f3366..b1fbfcd7 100644 --- a/blockchain/indexers/manager.go +++ b/blockchain/indexers/manager.go @@ -479,32 +479,6 @@ func (m *Manager) Init(chain *blockchain.BlockChain, interrupt <-chan struct{}) return err } - if interruptRequested(interrupt) { - for _, indexer := range m.enabledIndexes { - switch idxType := indexer.(type) { - case *UtreexoProofIndex: - err := idxType.flushUtreexoState(block.Hash()) - if err != nil { - log.Errorf("Error while flushing utreexo state: %v", err) - } - err = idxType.utreexoState.utreexoStateDB.Close() - if err != nil { - log.Errorf("Error while flushing utreexo state: %v", err) - } - case *FlatUtreexoProofIndex: - err := idxType.flushUtreexoState(block.Hash()) - if err != nil { - log.Errorf("Error while flushing utreexo state for flat utreexo proof index: %v", err) - } - err = idxType.utreexoState.utreexoStateDB.Close() - if err != nil { - log.Errorf("Error while closing the utreexo state for flat utreexo proof index: %v", err) - } - } - } - return errInterruptRequested - } - // Connect the block for all indexes that need it. var spentTxos []blockchain.SpentTxOut for i, indexer := range m.enabledIndexes { From b8d85e6f22da46388702c2391bcf7a665a97c21a Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Tue, 16 Jul 2024 17:48:07 +0900 Subject: [PATCH 41/50] indexers: add upgrade code for older utreexo states The older utreexo states used to save the numLeaves to the flatfiles. We read the numLeaves from the flatfile and save it to the database. --- blockchain/indexers/utreexobackend.go | 112 +++++++++++++++++++++++++- 1 file changed, 109 insertions(+), 3 deletions(-) diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index 3c124e90..d43f66fb 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -24,8 +24,11 @@ import ( const ( // utreexoDirName is the name of the directory in which the utreexo state // is stored. - utreexoDirName = "utreexostate" - defaultUtreexoFileName = "forest.dat" + utreexoDirName = "utreexostate" + + // oldDefaultUtreexoFileName is the file name of the utreexo state that the num leaves + // used to be stored in. + oldDefaultUtreexoFileName = "forest.dat" ) var ( @@ -120,7 +123,7 @@ func deleteUtreexoState(path string) error { // checkUtreexoExists checks that the data for this utreexo state type specified // in the config is present and should be resumed off of. func checkUtreexoExists(cfg *UtreexoConfig, basePath string) bool { - path := filepath.Join(basePath, defaultUtreexoFileName) + path := filepath.Join(basePath, oldDefaultUtreexoFileName) _, err := os.Stat(path) if err != nil && os.IsNotExist(err) { return false @@ -462,6 +465,102 @@ func deserializeUndoBlock(serialized []byte) (uint64, []uint64, []utreexo.Hash, return numAdds, targets, delHashes, nil } +// upgradeUtreexoState upgrades the utreexo state to be atomic. +func upgradeUtreexoState(cfg *UtreexoConfig, p *utreexo.MapPollard, + db *leveldb.DB, bestHash *chainhash.Hash) error { + + // Check if the current database is an older database that needs to be upgraded. + if !checkUtreexoExists(cfg, utreexoBasePath(cfg)) { + return nil + } + + log.Infof("Upgrading the utreexo state database. Do NOT shut down this process. " + + "This may take a while...") + + // Write the nodes to the new database. + nodesPath := filepath.Join(utreexoBasePath(cfg), "nodes") + nodesDB, err := leveldb.OpenFile(nodesPath, nil) + if err != nil { + return err + } + + ldbTx, err := db.OpenTransaction() + if err != nil { + return err + } + + iter := nodesDB.NewIterator(nil, nil) + for iter.Next() { + err = ldbTx.Put(iter.Key(), iter.Value(), nil) + if err != nil { + ldbTx.Discard() + return err + } + } + nodesDB.Close() + + // Write the cached leaves to the new database. + cachedLeavesPath := filepath.Join(utreexoBasePath(cfg), "cachedleaves") + cachedLeavesDB, err := leveldb.OpenFile(cachedLeavesPath, nil) + if err != nil { + return err + } + + iter = cachedLeavesDB.NewIterator(nil, nil) + for iter.Next() { + err = ldbTx.Put(iter.Key(), iter.Value(), nil) + if err != nil { + ldbTx.Discard() + return err + } + } + cachedLeavesDB.Close() + + // Open the file and read the numLeaves. + forestFilePath := filepath.Join(utreexoBasePath(cfg), oldDefaultUtreexoFileName) + file, err := os.OpenFile(forestFilePath, os.O_RDWR, 0400) + if err != nil { + return err + } + var buf [8]byte + _, err = file.Read(buf[:]) + if err != nil { + return err + } + + // Save the consistency state + p.NumLeaves = binary.LittleEndian.Uint64(buf[:8]) + err = dbWriteUtreexoStateConsistency(ldbTx, bestHash, p.NumLeaves) + if err != nil { + ldbTx.Discard() + return err + } + + // Commit all the writes to the database. + err = ldbTx.Commit() + if err != nil { + ldbTx.Discard() + return err + } + + // Remove the unnecessary file after the upgrade. + err = os.Remove(forestFilePath) + if err != nil { + return err + } + err = os.RemoveAll(cachedLeavesPath) + if err != nil { + return err + } + err = os.RemoveAll(nodesPath) + if err != nil { + return err + } + + log.Infof("Finished upgrading the utreexo state database.") + return nil +} + // initConsistentUtreexoState makes the utreexo state consistent with the given tipHash. func (us *UtreexoState) initConsistentUtreexoState(chain *blockchain.BlockChain, savedHash, tipHash *chainhash.Hash, tipHeight int32) error { @@ -581,6 +680,13 @@ func InitUtreexoState(cfg *UtreexoConfig, chain *blockchain.BlockChain, return nil, err } + // The utreexo state may be an older version where the numLeaves were stored in a flat + // file. Upgrade the utreexo state if it needs to be. + err = upgradeUtreexoState(cfg, &p, db, tipHash) + if err != nil { + return nil, err + } + savedHash, numLeaves, err := dbFetchUtreexoStateConsistency(db) if err != nil { return nil, err From cbff685af91c1076b7cab82ea75817e222ae30e6 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Wed, 17 Jul 2024 14:26:24 +0900 Subject: [PATCH 42/50] blockchain, indexers, netsync: add flush for Indexers FlushIndexes method is added to blockchain so that external callers can trigger flushes on the indexes. It's useful for periodic flushes were the node is already caught up to the tip but should be flushed to keep the node from being too far behind if there were to be an unexpected shutdown. --- blockchain/chain.go | 18 ++++++++++++++++++ blockchain/indexers/flatutreexoproofindex.go | 1 + blockchain/indexers/utreexobackend.go | 6 ++++++ blockchain/indexers/utreexoproofindex.go | 1 + netsync/manager.go | 7 ++++++- 5 files changed, 32 insertions(+), 1 deletion(-) diff --git a/blockchain/chain.go b/blockchain/chain.go index 992de321..78ed8a56 100644 --- a/blockchain/chain.go +++ b/blockchain/chain.go @@ -2341,6 +2341,24 @@ type IndexManager interface { Flush(*chainhash.Hash, FlushMode, bool) error } +// FlushUtxoCache flushes the indexes if a flush is needed with the given flush mode. +// If the flush is on a block connect and not a reorg, the onConnect bool should be true. +// +// This function is safe for concurrent access. +func (b *BlockChain) FlushIndexes(mode FlushMode, onConnect bool) error { + b.chainLock.Lock() + defer b.chainLock.Unlock() + + if b.indexManager != nil { + err := b.indexManager.Flush(&b.BestSnapshot().Hash, mode, onConnect) + if err != nil { + return err + } + } + + return nil +} + // Config is a descriptor which specifies the blockchain instance configuration. type Config struct { // DB defines the database which houses the blocks and will be used to diff --git a/blockchain/indexers/flatutreexoproofindex.go b/blockchain/indexers/flatutreexoproofindex.go index f5f6e3af..fae3278f 100644 --- a/blockchain/indexers/flatutreexoproofindex.go +++ b/blockchain/indexers/flatutreexoproofindex.go @@ -130,6 +130,7 @@ func (idx *FlatUtreexoProofIndex) Init(chain *blockchain.BlockChain, return err } idx.utreexoState = uState + idx.lastFlushTime = time.Now() // Nothing to do if the node is not pruned. // diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index d43f66fb..d8443e11 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -265,6 +265,9 @@ func (idx *UtreexoProofIndex) Flush(bestHash *chainhash.Hash, mode blockchain.Fl if err != nil { return err } + + // Set the last flush time as now as the flush was successful. + idx.lastFlushTime = time.Now() return nil } @@ -329,6 +332,9 @@ func (idx *FlatUtreexoProofIndex) Flush(bestHash *chainhash.Hash, mode blockchai if err != nil { return err } + + // Set the last flush time as now as the flush was successful. + idx.lastFlushTime = time.Now() return nil } diff --git a/blockchain/indexers/utreexoproofindex.go b/blockchain/indexers/utreexoproofindex.go index b48df680..a1c7ac7f 100644 --- a/blockchain/indexers/utreexoproofindex.go +++ b/blockchain/indexers/utreexoproofindex.go @@ -91,6 +91,7 @@ func (idx *UtreexoProofIndex) Init(chain *blockchain.BlockChain, return err } idx.utreexoState = uState + idx.lastFlushTime = time.Now() // Nothing else to do if the node is an archive node. if !idx.config.Pruned { diff --git a/netsync/manager.go b/netsync/manager.go index 5c988669..b7e84863 100644 --- a/netsync/manager.go +++ b/netsync/manager.go @@ -837,14 +837,19 @@ func (sm *SyncManager) handleBlockMsg(bmsg *blockMsg) { // flush the blockchain cache because we don't expect new blocks immediately. // After that, there is nothing more to do. if !sm.headersFirstMode { + // Flush relevant indexes. + if err := sm.chain.FlushIndexes(blockchain.FlushPeriodic, true); err != nil { + log.Errorf("Error while flushing the blockchain cache: %v", err) + } // Only flush if utreexoView is not active since a utreexo node does // not have a utxo cache. if !sm.chain.IsUtreexoViewActive() { if err := sm.chain.FlushUtxoCache(blockchain.FlushPeriodic); err != nil { log.Errorf("Error while flushing the blockchain cache: %v", err) } - return } + + return } // This is headers-first mode, so if the block is not a checkpoint From 1729643f79ff03fd561a95a13b230af429f4933e Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Fri, 19 Jul 2024 13:53:18 +0900 Subject: [PATCH 43/50] indexers: change magicbytes to array from a slice --- blockchain/indexers/flatfile.go | 8 ++++---- blockchain/indexers/flatfile_test.go | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/blockchain/indexers/flatfile.go b/blockchain/indexers/flatfile.go index fe806f55..b3468f4d 100644 --- a/blockchain/indexers/flatfile.go +++ b/blockchain/indexers/flatfile.go @@ -29,7 +29,7 @@ const ( var ( // magicBytes are the bytes prepended to any entry in the dataFiles. - magicBytes = []byte{0xaa, 0xff, 0xaa, 0xff} + magicBytes = [4]byte{0xaa, 0xff, 0xaa, 0xff} ) // FlatFileState is the shared state for storing flatfiles. It is specifically designed @@ -179,7 +179,7 @@ func (ff *FlatFileState) StoreData(height int32, data []byte) error { buf = buf[:len(data)+8] // Add the magic bytes, size, and the data to the buffer to be written. - copy(buf[:4], magicBytes) + copy(buf[:4], magicBytes[:]) binary.BigEndian.PutUint32(buf[4:8], uint32(len(data))) copy(buf[8:], data) @@ -225,7 +225,7 @@ func (ff *FlatFileState) FetchData(height int32) ([]byte, error) { } // Sanity check. If wrong magic was read, then error out. - if !bytes.Equal(buf[:4], magicBytes) { + if !bytes.Equal(buf[:4], magicBytes[:]) { return nil, fmt.Errorf("Read wrong magic bytes. Expect %x but got %x", magicBytes, buf[:4]) } @@ -266,7 +266,7 @@ func (ff *FlatFileState) DisconnectBlock(height int32) error { return err } - if !bytes.Equal(buf[:4], magicBytes) { + if !bytes.Equal(buf[:4], magicBytes[:]) { return fmt.Errorf("read wrong magic of %x", buf[:4]) } diff --git a/blockchain/indexers/flatfile_test.go b/blockchain/indexers/flatfile_test.go index 00d40640..d2093bcc 100644 --- a/blockchain/indexers/flatfile_test.go +++ b/blockchain/indexers/flatfile_test.go @@ -327,7 +327,7 @@ func getAfterSizes(ff *FlatFileState, height int32) (int64, int64, error) { if err != nil { return 0, 0, err } - if !bytes.Equal(buf[:4], magicBytes) { + if !bytes.Equal(buf[:4], magicBytes[:]) { return 0, 0, fmt.Errorf("read wrong magic of %x", buf[:4]) } dataSize := binary.BigEndian.Uint32(buf[4:]) From c02bfa2f2c214a7cfe1fd7feb9c2afe964aba8e1 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Fri, 19 Jul 2024 13:58:25 +0900 Subject: [PATCH 44/50] indexers: add recovery methods on flatfile state The flatfile states were not recoverable if the node was suddenly crashed if data were being written to it. These recovery methods recover the flatfile state to the latest readable data on restarts. --- blockchain/indexers/flatfile.go | 85 +++++++++++- blockchain/indexers/flatfile_test.go | 197 +++++++++++++++++++++++++++ 2 files changed, 278 insertions(+), 4 deletions(-) diff --git a/blockchain/indexers/flatfile.go b/blockchain/indexers/flatfile.go index b3468f4d..3e84aaa0 100644 --- a/blockchain/indexers/flatfile.go +++ b/blockchain/indexers/flatfile.go @@ -59,6 +59,72 @@ type FlatFileState struct { offsets []int64 } +// recoverOffsetFile recovers the offset file to the latest readable offset. +func (ff *FlatFileState) recoverOffsetFile(fileSize int64) error { + offsetFileSize := (fileSize / 8) * 8 + return ff.offsetFile.Truncate(offsetFileSize) +} + +// recover recovers the flat file state to a readable state by rolling back to the latest +// reable stored data. +func (ff *FlatFileState) recover() error { + log.Infof("Recovering flatfile as it's not consistent") + buf := make([]byte, 8) + for ; ff.currentHeight > 0; ff.currentHeight-- { + // Read from the dataFile. This read will grab the magic bytes and the + // size bytes. + offset := ff.offsets[ff.currentHeight] + + _, err := ff.dataFile.ReadAt(buf, offset) + if err == nil && bytes.Equal(buf[:4], magicBytes[:]) { + // Size of the actual data we want to fetch. + size := binary.BigEndian.Uint32(buf[4:]) + + // Read the data. + dataBuf := make([]byte, size) + read, err := ff.dataFile.ReadAt(dataBuf, offset+8) + if err == nil && uint32(read) == size { + _, err := ff.FetchData(ff.currentHeight) + if err == nil { + // If we're able to read the data bytes, then return here. + return nil + } + } + } + + // Truncating when the offset is bigger will append 0s. + // Only truncate when the offset is less than the data file size. + dataFileSize, err := ff.dataFile.Seek(0, 2) + if err != nil { + return err + } + if offset < dataFileSize { + err = ff.dataFile.Truncate(offset) + if err != nil { + return err + } + } + + offsetFileSize, err := ff.offsetFile.Seek(0, 2) + if err != nil { + return err + } + // Each offset is 8 bytes. + err = ff.offsetFile.Truncate(offsetFileSize - 8) + if err != nil { + return err + } + + // Set the currentOffset as the last offset. + ff.currentOffset = ff.offsets[len(ff.offsets)-1] + + // Pop the offset in memory. + ff.offsets = ff.offsets[:len(ff.offsets)-1] + } + + return nil +} + // Init initializes the FlatFileState. If resuming, it loads the offsets onto memory. // If starting new, it creates an offsetFile and a dataFile along with the directories // those belong in. @@ -90,8 +156,12 @@ func (ff *FlatFileState) Init(path, dataName string) error { // Offsets are always 8 bytes each. if offsetFileSize%8 != 0 { - return fmt.Errorf("FlatFileState.Init(): Corrupt FlatFileState. " + - "offsetFile not mulitple of 8 bytes") + log.Infof("Recovering flatfile offsets as it's not consistent") + // recover the offsetfile if it's not in 8 byte increments. + err = ff.recoverOffsetFile(offsetFileSize) + if err != nil { + return err + } } // If the file size is bigger than 0, we're resuming and will read all @@ -131,11 +201,18 @@ func (ff *FlatFileState) Init(path, dataName string) error { return err } - // Oo the same with the in-ram slice. + // Do the same with the in-ram slice. ff.offsets = make([]int64, 1) } - return nil + // Test if we can fetch the last stored data. + _, err = ff.FetchData(ff.currentHeight) + if err == nil { + return nil + } + + // If we can't fetch the last stored data, recover to the last readable data. + return ff.recover() } // StoreData stores the given byte slice as a new entry in the dataFile. diff --git a/blockchain/indexers/flatfile_test.go b/blockchain/indexers/flatfile_test.go index d2093bcc..720cfe59 100644 --- a/blockchain/indexers/flatfile_test.go +++ b/blockchain/indexers/flatfile_test.go @@ -649,3 +649,200 @@ func TestMultipleFetchData(t *testing.T) { wg.Wait() } + +func TestRecover(t *testing.T) { + rnd := rand.New(rand.NewSource(time.Now().UnixNano())) + + tests := []struct { + name string + datas [][]byte + truncateLen int64 + truncateOffsetFile bool + }{ + { + name: "first", + datas: func() [][]byte { + datas := make([][]byte, 0, 100) + for i := int32(1); i <= 100; i++ { + data, err := createRandByteSlice(rnd) + if err != nil { + t.Fatal(err) + } + datas = append(datas, data) + } + + return datas + }(), + truncateLen: 1, + truncateOffsetFile: true, + }, + { + name: "second", + datas: func() [][]byte { + datas := make([][]byte, 0, 100) + for i := int32(1); i <= 100; i++ { + data, err := createRandByteSlice(rnd) + if err != nil { + t.Fatal(err) + } + datas = append(datas, data) + } + + return datas + }(), + truncateLen: 7, + truncateOffsetFile: true, + }, + { + name: "third", + datas: func() [][]byte { + datas := make([][]byte, 0, 100) + for i := int32(1); i <= 100; i++ { + data, err := createRandByteSlice(rnd) + if err != nil { + t.Fatal(err) + } + datas = append(datas, data) + } + + return datas + }(), + truncateLen: 5, + truncateOffsetFile: true, + }, + { + name: "fourth", + datas: func() [][]byte { + datas := make([][]byte, 0, 100) + for i := int32(1); i <= 100; i++ { + data, err := createRandByteSlice(rnd) + if err != nil { + t.Fatal(err) + } + datas = append(datas, data) + } + + return datas + }(), + truncateLen: 1, + truncateOffsetFile: false, + }, + { + name: "fifth", + datas: func() [][]byte { + datas := make([][]byte, 0, 100) + for i := int32(1); i <= 100; i++ { + data, err := createRandByteSlice(rnd) + if err != nil { + t.Fatal(err) + } + datas = append(datas, data) + } + + return datas + }(), + truncateLen: 15, + truncateOffsetFile: false, + }, + { + name: "sixth", + datas: func() [][]byte { + datas := make([][]byte, 0, 100) + for i := int32(1); i <= 100; i++ { + data, err := createRandByteSlice(rnd) + if err != nil { + t.Fatal(err) + } + datas = append(datas, data) + } + + return datas + }(), + truncateLen: 155, + truncateOffsetFile: false, + }, + } + + for _, test := range tests { + tmpDir := t.TempDir() + dir := filepath.Join(tmpDir, "dir_"+test.name) + defer deleteFlatFile(dir) + + // Create and store data in the flat file state to test it. + ff := NewFlatFileState() + err := ff.Init(dir, test.name) + if err != nil { + t.Fatal(err) + } + for i, data := range test.datas { + err = ff.StoreData(int32(i)+1, data) + if err != nil { + t.Fatal(err) + } + } + + // Sanity checking. + for i, data := range test.datas { + fetched, err := ff.FetchData(int32(i) + 1) + if err != nil { + t.Fatal(err) + } + + if !bytes.Equal(fetched, data) { + t.Fatalf("test %v. for height %v, expected %v, got %v", + test.name, + i, + hex.EncodeToString(test.datas[i-1]), + hex.EncodeToString(fetched)) + } + } + + // Corrupt the flat file state. + if test.truncateOffsetFile { + offsetFileSize, err := ff.offsetFile.Seek(0, 2) + if err != nil { + t.Fatal(err) + } + err = ff.offsetFile.Truncate(offsetFileSize - test.truncateLen) + if err != nil { + t.Fatal(err) + } + } else { + dataFileSize, err := ff.dataFile.Seek(0, 2) + if err != nil { + t.Fatal(err) + } + err = ff.dataFile.Truncate(dataFileSize - test.truncateLen) + if err != nil { + t.Fatal(err) + } + + // Test if we can fetch the last stored data. + _, err = ff.FetchData(ff.currentHeight) + if err == nil { + t.Fatalf("test %v. expected error", test.name) + } + } + + // Calling init here calls the recovery functions. + err = ff.Init(dir, test.name) + if err != nil { + t.Fatal(err) + } + + // Check that the data til the currentHeight is correct. + for i := int32(1); i <= ff.currentHeight; i++ { + fetched, err := ff.FetchData(i) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(fetched, test.datas[i-1]) { + t.Fatalf("test %v. for height %v, expected %v, got %v", + test.name, + i, + hex.EncodeToString(test.datas[i-1]), + hex.EncodeToString(fetched)) + } + } + } +} From 46123ae38de4741f5d39bc1d8b82b9115768f405 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Fri, 19 Jul 2024 14:02:44 +0900 Subject: [PATCH 45/50] indexers: disconnect blocks from flatfile state if it's beyond the index tip The index tip may be behind what's saved in the flatfile state as the main database has a cache but the flatfile states do not. After an unexpected crash, the on recovery the flatfile states are now disconnected to match the index tip height to keep the indexer consistent. --- blockchain/indexers/flatfile.go | 5 ++ blockchain/indexers/flatutreexoproofindex.go | 74 ++++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/blockchain/indexers/flatfile.go b/blockchain/indexers/flatfile.go index 3e84aaa0..562d7541 100644 --- a/blockchain/indexers/flatfile.go +++ b/blockchain/indexers/flatfile.go @@ -382,6 +382,11 @@ func (ff *FlatFileState) DisconnectBlock(height int32) error { return nil } +// BestHeight returns the current latest height of the flat file state. +func (ff *FlatFileState) BestHeight() int32 { + return ff.currentHeight +} + // deleteFileFile removes the flat file state directory and all the contents // in it. func deleteFlatFile(path string) error { diff --git a/blockchain/indexers/flatutreexoproofindex.go b/blockchain/indexers/flatutreexoproofindex.go index fae3278f..bbf4da63 100644 --- a/blockchain/indexers/flatutreexoproofindex.go +++ b/blockchain/indexers/flatutreexoproofindex.go @@ -117,6 +117,75 @@ func (idx *FlatUtreexoProofIndex) NeedsInputs() bool { return true } +// consistentFlatFileState rolls back all the flat file states to the tip height. +// The data is written to the flat files directly but the index tips are cached and +// then written to disk. This may lead to states where the index tip is lower than the +// data stored in the flat files. Rolling back the flat file state to the index tip +// keep ths entire indexer consistent. +func (idx *FlatUtreexoProofIndex) consistentFlatFileState(tipHeight int32) error { + if !idx.config.Pruned { + if idx.proofState.BestHeight() != 0 && + tipHeight < idx.proofState.BestHeight() { + bestHeight := idx.proofState.BestHeight() + for tipHeight != bestHeight && bestHeight > 0 { + err := idx.proofState.DisconnectBlock(bestHeight) + if err != nil { + return err + } + bestHeight-- + } + } + } + + if idx.undoState.BestHeight() != 0 && + tipHeight < idx.undoState.BestHeight() { + bestHeight := idx.undoState.BestHeight() + for tipHeight != bestHeight && bestHeight > 0 { + err := idx.undoState.DisconnectBlock(bestHeight) + if err != nil { + return err + } + bestHeight-- + } + } + + if idx.rememberIdxState.BestHeight() != 0 && + tipHeight < idx.rememberIdxState.BestHeight() { + bestHeight := idx.rememberIdxState.BestHeight() + for tipHeight != bestHeight && bestHeight > 0 { + err := idx.rememberIdxState.DisconnectBlock(bestHeight) + if err != nil { + return err + } + bestHeight-- + } + } + if idx.proofStatsState.BestHeight() != 0 && + tipHeight < idx.proofStatsState.BestHeight() { + bestHeight := idx.proofStatsState.BestHeight() + for tipHeight != bestHeight && bestHeight > 0 { + err := idx.proofStatsState.DisconnectBlock(bestHeight) + if err != nil { + return err + } + bestHeight-- + } + } + if idx.rootsState.BestHeight() != 0 && + tipHeight < idx.rootsState.BestHeight() { + bestHeight := idx.rootsState.BestHeight() + for tipHeight != bestHeight && bestHeight > 0 { + err := idx.rootsState.DisconnectBlock(bestHeight) + if err != nil { + return err + } + bestHeight-- + } + } + + return nil +} + // Init initializes the flat utreexo proof index. This is part of the Indexer // interface. func (idx *FlatUtreexoProofIndex) Init(chain *blockchain.BlockChain, @@ -132,6 +201,11 @@ func (idx *FlatUtreexoProofIndex) Init(chain *blockchain.BlockChain, idx.utreexoState = uState idx.lastFlushTime = time.Now() + err = idx.consistentFlatFileState(tipHeight) + if err != nil { + return err + } + // Nothing to do if the node is not pruned. // // If the node is pruned, then we need to check if it started off as From a3b68d4f19fb1e54e7bf6dc367f4289d993c16ad Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Tue, 23 Jul 2024 14:36:21 +0900 Subject: [PATCH 46/50] indexers: flush utreexo state on prune For the utreexo state to be recoverable on unexpected crashes, there must be blocks available for it to reindex on crashes. If there aren't, the utreexo state is irrecoverable. To prevent this from happening, we check what the last stored block on the disk is after a prune and flush the utreexo state if the last flush happened before the last kept block. --- blockchain/indexers/addrindex.go | 2 +- blockchain/indexers/cfindex.go | 2 +- blockchain/indexers/common.go | 2 +- blockchain/indexers/flatutreexoproofindex.go | 25 ++++++++++++++++++-- blockchain/indexers/manager.go | 2 +- blockchain/indexers/ttlindex.go | 2 +- blockchain/indexers/txindex.go | 2 +- blockchain/indexers/utreexoproofindex.go | 25 ++++++++++++++++++-- 8 files changed, 52 insertions(+), 10 deletions(-) diff --git a/blockchain/indexers/addrindex.go b/blockchain/indexers/addrindex.go index 1fa678d5..cfc2a3ad 100644 --- a/blockchain/indexers/addrindex.go +++ b/blockchain/indexers/addrindex.go @@ -816,7 +816,7 @@ func (idx *AddrIndex) DisconnectBlock(dbTx database.Tx, block *btcutil.Block, // supported with pruning. // // This is part of the Indexer interface. -func (idx *AddrIndex) PruneBlock(dbTx database.Tx, blockHash *chainhash.Hash) error { +func (idx *AddrIndex) PruneBlock(_ database.Tx, _ *chainhash.Hash, _ int32) error { return nil } diff --git a/blockchain/indexers/cfindex.go b/blockchain/indexers/cfindex.go index 6ab2f093..8b5c7db0 100644 --- a/blockchain/indexers/cfindex.go +++ b/blockchain/indexers/cfindex.go @@ -261,7 +261,7 @@ func (idx *CfIndex) DisconnectBlock(dbTx database.Tx, block *btcutil.Block, // reindexing as a pruned node. // // This is part of the Indexer interface. -func (idx *CfIndex) PruneBlock(dbTx database.Tx, blockHash *chainhash.Hash) error { +func (idx *CfIndex) PruneBlock(dbTx database.Tx, blockHash *chainhash.Hash, _ int32) error { for _, key := range cfIndexKeys { err := dbDeleteFilterIdxEntry(dbTx, key, blockHash) if err != nil { diff --git a/blockchain/indexers/common.go b/blockchain/indexers/common.go index 6c3d0302..dc0ac97e 100644 --- a/blockchain/indexers/common.go +++ b/blockchain/indexers/common.go @@ -65,7 +65,7 @@ type Indexer interface { // PruneBlock is invoked when an older block is deleted after it's been // processed. - PruneBlock(database.Tx, *chainhash.Hash) error + PruneBlock(dbTx database.Tx, deletedBlock *chainhash.Hash, lastKeptHeight int32) error // Flush flushes the index. Flush(*chainhash.Hash, blockchain.FlushMode, bool) error diff --git a/blockchain/indexers/flatutreexoproofindex.go b/blockchain/indexers/flatutreexoproofindex.go index bbf4da63..d27a088b 100644 --- a/blockchain/indexers/flatutreexoproofindex.go +++ b/blockchain/indexers/flatutreexoproofindex.go @@ -888,8 +888,29 @@ func (idx *FlatUtreexoProofIndex) DisconnectBlock(dbTx database.Tx, block *btcut // processed. // // This is part of the Indexer interface. -func (idx *FlatUtreexoProofIndex) PruneBlock(dbTx database.Tx, blockHash *chainhash.Hash) error { - return nil +func (idx *FlatUtreexoProofIndex) PruneBlock(_ database.Tx, _ *chainhash.Hash, lastKeptHeight int32) error { + hash, _, err := dbFetchUtreexoStateConsistency(idx.utreexoState.utreexoStateDB) + if err != nil { + return err + } + + // It's ok to call block by hash here as the utreexo state consistency hash is always + // included in the best chain. + lastFlushHeight, err := idx.chain.BlockHeightByHash(hash) + if err != nil { + return err + } + + // If the last flushed utreexo state is the last or greater than the kept block, + // we can sync up to the tip so a flush is not required. + if lastKeptHeight <= lastFlushHeight { + return nil + } + + // It's ok to fetch the best snapshot here as the block called on pruneblock has not + // been yet connected yet on the utreexo state. So this is indeed the correct hash. + bestHash := idx.chain.BestSnapshot().Hash + return idx.Flush(&bestHash, blockchain.FlushRequired, true) } // FetchUtreexoProof returns the Utreexo proof data for the given block height. diff --git a/blockchain/indexers/manager.go b/blockchain/indexers/manager.go index b1fbfcd7..828624d7 100644 --- a/blockchain/indexers/manager.go +++ b/blockchain/indexers/manager.go @@ -652,7 +652,7 @@ func (m *Manager) PruneBlocks(dbTx database.Tx, lastKeptHeight int32, } // Notify the indexer with the connected block so it can prune it. - err = index.PruneBlock(dbTx, blockHash) + err = index.PruneBlock(dbTx, blockHash, lastKeptHeight) if err != nil { return err } diff --git a/blockchain/indexers/ttlindex.go b/blockchain/indexers/ttlindex.go index 7b3fc46f..edfa9793 100644 --- a/blockchain/indexers/ttlindex.go +++ b/blockchain/indexers/ttlindex.go @@ -97,7 +97,7 @@ func (idx *TTLIndex) DisconnectBlock(dbTx database.Tx, block *btcutil.Block, // supported with pruning. // // This is part of the Indexer interface. -func (idx *TTLIndex) PruneBlock(dbTx database.Tx, blockHash *chainhash.Hash) error { +func (idx *TTLIndex) PruneBlock(_ database.Tx, _ *chainhash.Hash, _ int32) error { return nil } diff --git a/blockchain/indexers/txindex.go b/blockchain/indexers/txindex.go index 1c8f3846..45c0bd84 100644 --- a/blockchain/indexers/txindex.go +++ b/blockchain/indexers/txindex.go @@ -437,7 +437,7 @@ func (idx *TxIndex) DisconnectBlock(dbTx database.Tx, block *btcutil.Block, // supported with pruning. // // This is part of the Indexer interface. -func (idx *TxIndex) PruneBlock(dbTx database.Tx, blockHash *chainhash.Hash) error { +func (idx *TxIndex) PruneBlock(_ database.Tx, _ *chainhash.Hash, _ int32) error { return nil } diff --git a/blockchain/indexers/utreexoproofindex.go b/blockchain/indexers/utreexoproofindex.go index a1c7ac7f..b599d9d0 100644 --- a/blockchain/indexers/utreexoproofindex.go +++ b/blockchain/indexers/utreexoproofindex.go @@ -575,8 +575,29 @@ func (idx *UtreexoProofIndex) VerifyAccProof(toProve []utreexo.Hash, // processed. // // This is part of the Indexer interface. -func (idx *UtreexoProofIndex) PruneBlock(dbTx database.Tx, blockHash *chainhash.Hash) error { - return nil +func (idx *UtreexoProofIndex) PruneBlock(_ database.Tx, _ *chainhash.Hash, lastKeptHeight int32) error { + hash, _, err := dbFetchUtreexoStateConsistency(idx.utreexoState.utreexoStateDB) + if err != nil { + return err + } + + // It's ok to call block by hash here as the utreexo state consistency hash is always + // included in the best chain. + lastFlushHeight, err := idx.chain.BlockHeightByHash(hash) + if err != nil { + return err + } + + // If the last flushed utreexo state is the last or greater than the kept block, + // we can sync up to the tip so a flush is not required. + if lastKeptHeight <= lastFlushHeight { + return nil + } + + // It's ok to fetch the best snapshot here as the block called on pruneblock has not + // been yet connected yet on the utreexo state. So this is indeed the correct hash. + bestHash := idx.chain.BestSnapshot().Hash + return idx.Flush(&bestHash, blockchain.FlushRequired, true) } // NewUtreexoProofIndex returns a new instance of an indexer that is used to create a utreexo From 6e1ad1d476906a2803a80e628c305741aa976774 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Mon, 26 Aug 2024 15:35:58 +0900 Subject: [PATCH 47/50] utreexobackends: add CachedPosition When mapping to a uint64, it wasn't possible to mark if the positions were fresh or not. If a position could be marked fresh, then we can delete it completely from the memory without it having to touch the disk. This change comes from observing that there's a lot of slowdowns coming from flushing. --- .../utreexobackends/cachedleavesmap.go | 47 ++++++++++++----- .../utreexobackends/cachedleavesmap_test.go | 8 +-- blockchain/utreexoio.go | 50 +++++++++++-------- 3 files changed, 66 insertions(+), 39 deletions(-) diff --git a/blockchain/internal/utreexobackends/cachedleavesmap.go b/blockchain/internal/utreexobackends/cachedleavesmap.go index a8064e1a..88684bc9 100644 --- a/blockchain/internal/utreexobackends/cachedleavesmap.go +++ b/blockchain/internal/utreexobackends/cachedleavesmap.go @@ -13,6 +13,27 @@ const ( cachedLeavesMapBucketSize = 16 + sizehelper.Uint64Size*chainhash.HashSize + sizehelper.Uint64Size*sizehelper.Uint64Size ) +// CachedPosition has the leaf and a flag for the status in the cache. +type CachedPosition struct { + Position uint64 + Flags CachedFlag +} + +// IsFresh returns if the cached Position has never been in the database. +func (c *CachedPosition) IsFresh() bool { + return c.Flags&Fresh == Fresh +} + +// IsModified returns if the cached leaf has been in the database and was modified in the cache. +func (c *CachedPosition) IsModified() bool { + return c.Flags&Modified == Modified +} + +// IsRemoved returns if the key for this cached leaf has been removed. +func (c *CachedPosition) IsRemoved() bool { + return c.Flags&Removed == Removed +} + // CachedLeavesMapSlice is a slice of maps for utxo entries. The slice of maps are needed to // guarantee that the map will only take up N amount of bytes. As of v1.20, the // go runtime will allocate 2^N + few extra buckets, meaning that for large N, we'll @@ -24,10 +45,10 @@ type CachedLeavesMapSlice struct { mtx *sync.Mutex // maps are the underlying maps in the slice of maps. - maps []map[utreexo.Hash]uint64 + maps []map[utreexo.Hash]CachedPosition // overflow puts the overflowed entries. - overflow map[utreexo.Hash]uint64 + overflow map[utreexo.Hash]CachedPosition // maxEntries is the maximum amount of elemnts that the map is allocated for. maxEntries []int @@ -58,11 +79,11 @@ func (ms *CachedLeavesMapSlice) Length() int { // the entry. nil and false is returned if the outpoint is not found. // // This function is safe for concurrent access. -func (ms *CachedLeavesMapSlice) Get(k utreexo.Hash) (uint64, bool) { +func (ms *CachedLeavesMapSlice) Get(k utreexo.Hash) (CachedPosition, bool) { ms.mtx.Lock() defer ms.mtx.Unlock() - var v uint64 + var v CachedPosition var found bool for _, m := range ms.maps { @@ -79,7 +100,7 @@ func (ms *CachedLeavesMapSlice) Get(k utreexo.Hash) (uint64, bool) { } } - return 0, false + return CachedPosition{}, false } // Put puts the keys and the values into one of the maps in the map slice. If the @@ -87,7 +108,7 @@ func (ms *CachedLeavesMapSlice) Get(k utreexo.Hash) (uint64, bool) { // return false. // // This function is safe for concurrent access. -func (ms *CachedLeavesMapSlice) Put(k utreexo.Hash, v uint64) bool { +func (ms *CachedLeavesMapSlice) Put(k utreexo.Hash, v CachedPosition) bool { ms.mtx.Lock() defer ms.mtx.Unlock() @@ -151,12 +172,12 @@ func (ms *CachedLeavesMapSlice) DeleteMaps() { ms.mtx.Lock() defer ms.mtx.Unlock() - ms.maps = make([]map[utreexo.Hash]uint64, len(ms.maxEntries)) + ms.maps = make([]map[utreexo.Hash]CachedPosition, len(ms.maxEntries)) for i := range ms.maxEntries { - ms.maps[i] = make(map[utreexo.Hash]uint64, ms.maxEntries[i]) + ms.maps[i] = make(map[utreexo.Hash]CachedPosition, ms.maxEntries[i]) } - ms.overflow = make(map[utreexo.Hash]uint64) + ms.overflow = make(map[utreexo.Hash]CachedPosition) } // ClearMaps clears all maps @@ -176,7 +197,7 @@ func (ms *CachedLeavesMapSlice) ClearMaps() { // ForEach loops through all the elements in the cachedleaves map slice and calls fn with the key-value pairs. // // This function is safe for concurrent access. -func (ms *CachedLeavesMapSlice) ForEach(fn func(utreexo.Hash, uint64) error) error { +func (ms *CachedLeavesMapSlice) ForEach(fn func(utreexo.Hash, CachedPosition) error) error { ms.mtx.Lock() defer ms.mtx.Unlock() @@ -222,12 +243,12 @@ func (ms *CachedLeavesMapSlice) createMaps(maxMemoryUsage int64) int64 { } // Create the maps. - ms.maps = make([]map[utreexo.Hash]uint64, len(ms.maxEntries)) + ms.maps = make([]map[utreexo.Hash]CachedPosition, len(ms.maxEntries)) for i := range ms.maxEntries { - ms.maps[i] = make(map[utreexo.Hash]uint64, ms.maxEntries[i]) + ms.maps[i] = make(map[utreexo.Hash]CachedPosition, ms.maxEntries[i]) } - ms.overflow = make(map[utreexo.Hash]uint64) + ms.overflow = make(map[utreexo.Hash]CachedPosition) return int64(totalElemCount) } diff --git a/blockchain/internal/utreexobackends/cachedleavesmap_test.go b/blockchain/internal/utreexobackends/cachedleavesmap_test.go index f4fb8c11..72715c7a 100644 --- a/blockchain/internal/utreexobackends/cachedleavesmap_test.go +++ b/blockchain/internal/utreexobackends/cachedleavesmap_test.go @@ -35,7 +35,7 @@ func TestCachedLeaveMapSliceDuplicates(t *testing.T) { m, maxElems := NewCachedLeavesMapSlice(8000) for i := 0; i < 10; i++ { for j := int64(0); j < maxElems; j++ { - if !m.Put(uint64ToHash(uint64(j)), 0) { + if !m.Put(uint64ToHash(uint64(j)), CachedPosition{}) { t.Fatalf("unexpected error on m.put") } } @@ -49,7 +49,7 @@ func TestCachedLeaveMapSliceDuplicates(t *testing.T) { // Try inserting x which should be unique. Should fail as the map is full. x := uint64(0) x -= 1 - if m.Put(uint64ToHash(x), 0) { + if m.Put(uint64ToHash(x), CachedPosition{}) { t.Fatalf("expected error but successfully called put") } @@ -57,7 +57,7 @@ func TestCachedLeaveMapSliceDuplicates(t *testing.T) { // a duplicate element. m.Delete(uint64ToHash(0)) x = uint64(maxElems) - 1 - if !m.Put(uint64ToHash(x), 0) { + if !m.Put(uint64ToHash(x), CachedPosition{}) { t.Fatalf("unexpected failure on put") } @@ -68,7 +68,7 @@ func TestCachedLeaveMapSliceDuplicates(t *testing.T) { } // Put 0 back in and then compare the map. - if !m.Put(uint64ToHash(0), 0) { + if !m.Put(uint64ToHash(0), CachedPosition{}) { t.Fatalf("didn't expect error but unsuccessfully called put") } if m.Length()-len(m.overflow) != int(maxElems) { diff --git a/blockchain/utreexoio.go b/blockchain/utreexoio.go index 68e7eb67..6116fecb 100644 --- a/blockchain/utreexoio.go +++ b/blockchain/utreexoio.go @@ -6,7 +6,6 @@ package blockchain import ( "fmt" - "math" "github.com/syndtr/goleveldb/leveldb" "github.com/utreexo/utreexo" @@ -315,11 +314,11 @@ func (m *CachedLeavesBackEnd) Get(k utreexo.Hash) (uint64, bool) { } // Even if the entry was found, if the position value is math.MaxUint64, // then it was already deleted. - if pos == math.MaxUint64 { + if pos.IsRemoved() { return 0, false } - return pos, found + return pos.Position, found } // CachedLeavesBackendPut puts a key-value pair in the given leveldb tx. @@ -333,26 +332,36 @@ func CachedLeavesBackendPut(tx *leveldb.Transaction, k utreexo.Hash, v uint64) e // Put puts the given data to the underlying cache. If the cache is full, it evicts // the earliest entries to make room. func (m *CachedLeavesBackEnd) Put(k utreexo.Hash, v uint64) { - m.cache.Put(k, v) + m.cache.Put(k, utreexobackends.CachedPosition{ + Position: v, + Flags: utreexobackends.Fresh, + }) } // Delete removes the given key from the underlying map. No-op if the key // doesn't exist. func (m *CachedLeavesBackEnd) Delete(k utreexo.Hash) { - m.cache.Put(k, math.MaxUint64) + pos, found := m.cache.Get(k) + if found && pos.IsFresh() { + m.cache.Delete(k) + return + } + p := utreexobackends.CachedPosition{ + Position: pos.Position, + Flags: pos.Flags | utreexobackends.Removed, + } + + m.cache.Put(k, p) } // Length returns the amount of items in the underlying db and the cache. func (m *CachedLeavesBackEnd) Length() int { length := 0 - m.cache.ForEach(func(k utreexo.Hash, v uint64) error { + m.cache.ForEach(func(k utreexo.Hash, v utreexobackends.CachedPosition) error { // Only operate on the entry if it's not removed and it's not already // in the database. - if v != math.MaxUint64 { - _, found := m.dbGet(k) - if !found { - length++ - } + if !v.IsRemoved() && v.IsFresh() { + length++ } return nil }) @@ -365,7 +374,7 @@ func (m *CachedLeavesBackEnd) Length() int { } k := iter.Key() val, found := m.cache.Get(*(*[chainhash.HashSize]byte)(k)) - if found && val == math.MaxUint64 { + if found && val.IsRemoved() { // Skip if the key-value pair has already been removed in the cache. continue } @@ -379,14 +388,11 @@ func (m *CachedLeavesBackEnd) Length() int { // ForEach calls the given function for each of the elements in the underlying map. func (m *CachedLeavesBackEnd) ForEach(fn func(utreexo.Hash, uint64) error) error { - m.cache.ForEach(func(k utreexo.Hash, v uint64) error { + m.cache.ForEach(func(k utreexo.Hash, v utreexobackends.CachedPosition) error { // Only operate on the entry if it's not removed and it's not already // in the database. - if v != math.MaxUint64 { - _, found := m.dbGet(k) - if !found { - fn(k, v) - } + if !v.IsRemoved() && v.IsFresh() { + fn(k, v.Position) } return nil }) @@ -401,7 +407,7 @@ func (m *CachedLeavesBackEnd) ForEach(fn func(utreexo.Hash, uint64) error) error // only valid until the next call to Next. k := iter.Key() val, found := m.cache.Get(*(*[chainhash.HashSize]byte)(k)) - if found && val == math.MaxUint64 { + if found && val.IsRemoved() { // Skip if the key-value pair has already been removed in the cache. continue } @@ -428,14 +434,14 @@ func (m *CachedLeavesBackEnd) UsageStats() (int64, int64) { // Flush resets the cache and saves all the key values onto the database. func (m *CachedLeavesBackEnd) Flush(ldbTx *leveldb.Transaction) error { - err := m.cache.ForEach(func(k utreexo.Hash, v uint64) error { - if v == math.MaxUint64 { + err := m.cache.ForEach(func(k utreexo.Hash, v utreexobackends.CachedPosition) error { + if v.IsRemoved() { err := ldbTx.Delete(k[:], nil) if err != nil { return err } } else { - err := CachedLeavesBackendPut(ldbTx, k, v) + err := CachedLeavesBackendPut(ldbTx, k, v.Position) if err != nil { return err } From 934f6cb58a49c6868183c6ef956b202d6be183fa Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Mon, 26 Aug 2024 15:51:03 +0900 Subject: [PATCH 48/50] blockchain: remove unnecessary put --- blockchain/utreexoio.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/blockchain/utreexoio.go b/blockchain/utreexoio.go index 6116fecb..d149720b 100644 --- a/blockchain/utreexoio.go +++ b/blockchain/utreexoio.go @@ -94,8 +94,6 @@ func (m *NodesBackEnd) Get(k uint64) (utreexo.Leaf, bool) { return utreexo.Leaf{}, false } - m.cache.Put(k, cLeaf) - // If we found it, return here. return cLeaf.Leaf, true } From 9f74eaf5957878cbbab8fd5e34125160d42288af Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Mon, 26 Aug 2024 15:52:31 +0900 Subject: [PATCH 49/50] blockchain: change nodesbackend deletion For NodesBackend, the cached entry is not removed from the cache even if it's being deleted as subsiquent calls would be made to fetch the key. Caching it as removed saves on disk reads and on flushes, the leaf is not attempted to be flushed if it's marked as fresh. --- blockchain/utreexoio.go | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/blockchain/utreexoio.go b/blockchain/utreexoio.go index d149720b..9fa87447 100644 --- a/blockchain/utreexoio.go +++ b/blockchain/utreexoio.go @@ -155,6 +155,9 @@ func NodesBackendDelete(tx *leveldb.Transaction, k uint64) error { // Delete removes the given key from the underlying map. No-op if the key // doesn't exist. func (m *NodesBackEnd) Delete(k uint64) { + // Don't delete as the same key may get called to be removed multiple times. + // Cache it as removed so that we don't call expensive flushes on keys that + // are not in the database. leaf, _ := m.cache.Get(k) l := utreexobackends.CachedLeaf{ Leaf: leaf.Leaf, @@ -254,15 +257,24 @@ func (m *NodesBackEnd) UsageStats() (int64, int64) { // flush saves all the cached entries to disk and resets the cache map. func (m *NodesBackEnd) Flush(ldbTx *leveldb.Transaction) error { err := m.cache.ForEach(func(k uint64, v utreexobackends.CachedLeaf) error { - if v.IsRemoved() { - err := NodesBackendDelete(ldbTx, k) - if err != nil { - return err + if v.IsFresh() { + if !v.IsRemoved() { + err := NodesBackendPut(ldbTx, k, v.Leaf) + if err != nil { + return err + } } - } else if v.IsFresh() || v.IsModified() { - err := NodesBackendPut(ldbTx, k, v.Leaf) - if err != nil { - return err + } else { + if v.IsRemoved() { + err := NodesBackendDelete(ldbTx, k) + if err != nil { + return err + } + } else if v.IsModified() { + err := NodesBackendPut(ldbTx, k, v.Leaf) + if err != nil { + return err + } } } From 154620955a5211794d71a0c4318b42d59da0086d Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Wed, 28 Aug 2024 18:07:59 +0900 Subject: [PATCH 50/50] indexers, utreexobackends: fix flush bugs --- blockchain/indexers/utreexobackend.go | 2 +- blockchain/internal/utreexobackends/cachedleavesmap.go | 4 ++++ blockchain/internal/utreexobackends/nodesmap.go | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/blockchain/indexers/utreexobackend.go b/blockchain/indexers/utreexobackend.go index d8443e11..d521e1c1 100644 --- a/blockchain/indexers/utreexobackend.go +++ b/blockchain/indexers/utreexobackend.go @@ -729,7 +729,7 @@ func InitUtreexoState(cfg *UtreexoConfig, chain *blockchain.BlockChain, isFlushNeeded = func() bool { nodesNeedsFlush := nodesDB.IsFlushNeeded() leavesNeedsFlush := cachedLeavesDB.IsFlushNeeded() - return nodesNeedsFlush && leavesNeedsFlush + return nodesNeedsFlush || leavesNeedsFlush } } else { log.Infof("loading the utreexo state from disk...") diff --git a/blockchain/internal/utreexobackends/cachedleavesmap.go b/blockchain/internal/utreexobackends/cachedleavesmap.go index 88684bc9..912ae579 100644 --- a/blockchain/internal/utreexobackends/cachedleavesmap.go +++ b/blockchain/internal/utreexobackends/cachedleavesmap.go @@ -192,6 +192,10 @@ func (ms *CachedLeavesMapSlice) ClearMaps() { delete(ms.maps[i], key) } } + + for key := range ms.overflow { + delete(ms.overflow, key) + } } // ForEach loops through all the elements in the cachedleaves map slice and calls fn with the key-value pairs. diff --git a/blockchain/internal/utreexobackends/nodesmap.go b/blockchain/internal/utreexobackends/nodesmap.go index a5f16bda..5b5909ef 100644 --- a/blockchain/internal/utreexobackends/nodesmap.go +++ b/blockchain/internal/utreexobackends/nodesmap.go @@ -208,6 +208,10 @@ func (ms *NodesMapSlice) ClearMaps() { delete(ms.maps[i], key) } } + + for key := range ms.overflow { + delete(ms.overflow, key) + } } // ForEach loops through all the elements in the nodes map slice and calls fn with the key-value pairs.