Skip to content

Commit

Permalink
fix: prevent concurrent hermes operations on the same chain
Browse files Browse the repository at this point in the history
Allowing them can result in "hermes create ..." commands failing because
there's sequence numbers being modified by simultaneous commands.
  • Loading branch information
fastfadingviolets committed Aug 19, 2024
1 parent be814e8 commit 0291f95
Show file tree
Hide file tree
Showing 3 changed files with 162 additions and 9 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ jobs:
# run tests
- name: run unit tests
# -short flag purposefully omitted because there are some longer unit tests
run: go test -race -timeout 10m -failfast -p 2 $(go list ./... | grep -v /cmd | grep -v /examples)
run: go test -race -timeout 30m -failfast -p 2 $(go list ./... | grep -v /cmd | grep -v /examples)
91 changes: 91 additions & 0 deletions interchain_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"fmt"
"testing"
"time"

"cosmossdk.io/math"
"github.com/cosmos/cosmos-sdk/codec"
Expand Down Expand Up @@ -255,6 +256,96 @@ func TestCosmosChain_BroadcastTx_HermesRelayer(t *testing.T) {
broadcastTxCosmosChainTest(t, ibc.Hermes)
}

func TestInterchain_ConcurrentRelayerOps(t *testing.T) {
type relayerTest struct {
relayer ibc.RelayerImplementation
name string
}

const (
denom = "uatom"
chains = 4
)

relayers := []relayerTest{
{
relayer: ibc.CosmosRly,
name: "Cosmos Relayer",
},
{
relayer: ibc.Hermes,
name: "Hermes",
},
}

numFullNodes := 0
numValidators := 1

for _, rly := range relayers {
rly := rly
t.Run(rly.name, func(t *testing.T) {
client, network := interchaintest.DockerSetup(t)
f, err := interchaintest.CreateLogFile(fmt.Sprintf("%d.json", time.Now().Unix()))
require.NoError(t, err)
// Reporter/logs
rep := testreporter.NewReporter(f)
eRep := rep.RelayerExecReporter(t)
ctx := context.Background()

chainSpecs := make([]*interchaintest.ChainSpec, chains)
for i := 0; i < chains; i++ {
chainSpecs[i] = &interchaintest.ChainSpec{
Name: "gaia",
ChainName: fmt.Sprintf("g%d", i+1),
Version: "v7.0.1",
NumValidators: &numValidators,
NumFullNodes: &numFullNodes,
ChainConfig: ibc.ChainConfig{
GasPrices: "0" + denom,
Denom: denom,
},
}
}
r := interchaintest.NewBuiltinRelayerFactory(rly.relayer, zaptest.NewLogger(t)).Build(
t, client, network,
)

cf := interchaintest.NewBuiltinChainFactory(zaptest.NewLogger(t), chainSpecs)
chains, err := cf.Chains(t.Name())
require.NoError(t, err)
ic := interchaintest.NewInterchain()
for _, chain := range chains {
require.NoError(t, err)
ic.AddChain(chain)
}
ic.AddRelayer(r, "relayer")
for i, chainI := range chains {
for j := i + 1; j < len(chains); j++ {
ic.AddLink(interchaintest.InterchainLink{
Chain1: chainI,
Chain2: chains[j],
Relayer: r,
Path: getIBCPath(chainI, chains[j]),
})
}
}
err = ic.Build(ctx, eRep, interchaintest.InterchainBuildOptions{
TestName: t.Name(),
Client: client,
NetworkID: network,
})
require.NoError(t, err)
t.Cleanup(func() {
ic.Close()
})
})
}
}

func getIBCPath(chainA, chainB ibc.Chain) string {
return chainA.Config().ChainID + "-" + chainB.Config().ChainID
}

func broadcastTxCosmosChainTest(t *testing.T, relayerImpl ibc.RelayerImplementation) {
if testing.Short() {
t.Skip("skipping in short mode")
Expand Down
78 changes: 70 additions & 8 deletions relayer/hermes/hermes_relayer.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"regexp"
"strings"
"sync"
"time"

"github.com/docker/docker/client"
Expand Down Expand Up @@ -35,8 +36,12 @@ var (
// Relayer is the ibc.Relayer implementation for hermes.
type Relayer struct {
*relayer.DockerRelayer

// lock protects the relayer's state
lock sync.RWMutex
paths map[string]*pathConfiguration
chainConfigs []ChainConfig
chainLocks map[string]*sync.Mutex
}

// ChainConfig holds all values required to write an entry in the "chains" section in the hermes config file.
Expand Down Expand Up @@ -72,6 +77,7 @@ func NewHermesRelayer(log *zap.Logger, testName string, cli *client.Client, netw

return &Relayer{
DockerRelayer: dr,
chainLocks: map[string]*sync.Mutex{},
}
}

Expand All @@ -87,13 +93,21 @@ func (r *Relayer) AddChainConfiguration(ctx context.Context, rep ibc.RelayerExec
return fmt.Errorf("failed to write hermes config: %w", err)
}

return r.validateConfig(ctx, rep)
if err := r.validateConfig(ctx, rep); err != nil {
return err
}
r.lock.Lock()
defer r.lock.Unlock()
r.chainLocks[chainConfig.ChainID] = &sync.Mutex{}
return nil
}

// LinkPath performs the operations that happen when a path is linked. This includes creating clients, creating connections
// and establishing a channel. This happens across multiple operations rather than a single link path cli command.
func (r *Relayer) LinkPath(ctx context.Context, rep ibc.RelayerExecReporter, pathName string, channelOpts ibc.CreateChannelOptions, clientOpts ibc.CreateClientOptions) error {
r.lock.RLock()
_, ok := r.paths[pathName]
r.lock.RUnlock()
if !ok {
return fmt.Errorf("path %s not found", pathName)
}
Expand All @@ -114,7 +128,12 @@ func (r *Relayer) LinkPath(ctx context.Context, rep ibc.RelayerExecReporter, pat
}

func (r *Relayer) CreateChannel(ctx context.Context, rep ibc.RelayerExecReporter, pathName string, opts ibc.CreateChannelOptions) error {
pathConfig := r.paths[pathName]
pathConfig, unlock, err := r.getAndLockPath(pathName)
if err != nil {
return err
}
defer unlock()

cmd := []string{hermes, "--json", "create", "channel", "--order", opts.Order.String(), "--a-chain", pathConfig.chainA.chainID, "--a-port", opts.SourcePortName, "--b-port", opts.DestPortName, "--a-connection", pathConfig.chainA.connectionID}
if opts.Version != "" {
cmd = append(cmd, "--channel-version", opts.Version)
Expand All @@ -129,7 +148,12 @@ func (r *Relayer) CreateChannel(ctx context.Context, rep ibc.RelayerExecReporter
}

func (r *Relayer) CreateConnections(ctx context.Context, rep ibc.RelayerExecReporter, pathName string) error {
pathConfig := r.paths[pathName]
pathConfig, unlock, err := r.getAndLockPath(pathName)
if err != nil {
return err
}
defer unlock()

cmd := []string{hermes, "--json", "create", "connection", "--a-chain", pathConfig.chainA.chainID, "--a-client", pathConfig.chainA.clientID, "--b-client", pathConfig.chainB.clientID}

res := r.Exec(ctx, rep, cmd, nil)
Expand All @@ -147,10 +171,12 @@ func (r *Relayer) CreateConnections(ctx context.Context, rep ibc.RelayerExecRepo
}

func (r *Relayer) UpdateClients(ctx context.Context, rep ibc.RelayerExecReporter, pathName string) error {
pathConfig, ok := r.paths[pathName]
if !ok {
return fmt.Errorf("path %s not found", pathName)
pathConfig, unlock, err := r.getAndLockPath(pathName)
if err != nil {
return err
}
defer unlock()

updateChainACmd := []string{hermes, "--json", "update", "client", "--host-chain", pathConfig.chainA.chainID, "--client", pathConfig.chainA.clientID}
res := r.Exec(ctx, rep, updateChainACmd, nil)
if res.Err != nil {
Expand All @@ -164,7 +190,12 @@ func (r *Relayer) UpdateClients(ctx context.Context, rep ibc.RelayerExecReporter
// Note: in the go relayer this can be done with a single command using the path reference,
// however in Hermes this needs to be done as two separate commands.
func (r *Relayer) CreateClients(ctx context.Context, rep ibc.RelayerExecReporter, pathName string, opts ibc.CreateClientOptions) error {
pathConfig := r.paths[pathName]
pathConfig, unlock, err := r.getAndLockPath(pathName)
if err != nil {
return err
}
defer unlock()

chainACreateClientCmd := []string{hermes, "--json", "create", "client", "--host-chain", pathConfig.chainA.chainID, "--reference-chain", pathConfig.chainB.chainID}
if opts.TrustingPeriod != "" {
chainACreateClientCmd = append(chainACreateClientCmd, "--trusting-period", opts.TrustingPeriod)
Expand Down Expand Up @@ -205,7 +236,11 @@ func (r *Relayer) CreateClients(ctx context.Context, rep ibc.RelayerExecReporter
}

func (r *Relayer) CreateClient(ctx context.Context, rep ibc.RelayerExecReporter, srcChainID, dstChainID, pathName string, opts ibc.CreateClientOptions) error {
pathConfig := r.paths[pathName]
pathConfig, unlock, err := r.getAndLockPath(pathName)
if err != nil {
return err
}
defer unlock()

createClientCmd := []string{hermes, "--json", "create", "client", "--host-chain", srcChainID, "--reference-chain", dstChainID}
if opts.TrustingPeriod != "" {
Expand Down Expand Up @@ -262,6 +297,8 @@ func (r *Relayer) RestoreKey(ctx context.Context, rep ibc.RelayerExecReporter, c
}

func (r *Relayer) UpdatePath(ctx context.Context, rep ibc.RelayerExecReporter, pathName string, opts ibc.PathUpdateOptions) error {
r.lock.Lock()
defer r.lock.Unlock()
// the concept of paths doesn't exist in hermes, but update our in-memory paths so we can use them elsewhere
path, ok := r.paths[pathName]
if !ok {
Expand Down Expand Up @@ -289,6 +326,7 @@ func (r *Relayer) UpdatePath(ctx context.Context, rep ibc.RelayerExecReporter, p
}

func (r *Relayer) Flush(ctx context.Context, rep ibc.RelayerExecReporter, pathName string, channelID string) error {
r.lock.RLock()
path := r.paths[pathName]
channels, err := r.GetChannels(ctx, rep, path.chainA.chainID)
if err != nil {
Expand All @@ -304,6 +342,7 @@ func (r *Relayer) Flush(ctx context.Context, rep ibc.RelayerExecReporter, pathNa
if portID == "" {
return fmt.Errorf("channel %s not found on chain %s", channelID, path.chainA.chainID)
}
r.lock.RUnlock()
cmd := []string{hermes, "clear", "packets", "--chain", path.chainA.chainID, "--channel", channelID, "--port", portID}
res := r.Exec(ctx, rep, cmd, nil)
return res.Err
Expand All @@ -312,6 +351,8 @@ func (r *Relayer) Flush(ctx context.Context, rep ibc.RelayerExecReporter, pathNa
// GeneratePath establishes an in memory path representation. The concept does not exist in hermes, so it is handled
// at the interchain test level.
func (r *Relayer) GeneratePath(ctx context.Context, rep ibc.RelayerExecReporter, srcChainID, dstChainID, pathName string) error {
r.lock.Lock()
defer r.lock.Unlock()
if r.paths == nil {
r.paths = map[string]*pathConfiguration{}
}
Expand All @@ -330,6 +371,8 @@ func (r *Relayer) GeneratePath(ctx context.Context, rep ibc.RelayerExecReporter,
// rather than multiple config files, we need to maintain a list of chain configs each time they are added to write the
// full correct file update calling Relayer.AddChainConfiguration.
func (r *Relayer) configContent(cfg ibc.ChainConfig, keyName, rpcAddr, grpcAddr string) ([]byte, error) {
r.lock.Lock()
defer r.lock.Unlock()
r.chainConfigs = append(r.chainConfigs, ChainConfig{
cfg: cfg,
keyName: keyName,
Expand Down Expand Up @@ -367,6 +410,25 @@ func extractJsonResult(stdout []byte) []byte {
return []byte(jsonOutput)
}

func (r *Relayer) getAndLockPath(pathName string) (*pathConfiguration, func(), error) {
// we don't get an RLock here because we could deadlock while trying to get the chain locks
r.lock.Lock()
path, ok := r.paths[pathName]
defer r.lock.Unlock()
if !ok {
return nil, nil, fmt.Errorf("path %s not found", pathName)
}
chainALock := r.chainLocks[path.chainA.chainID]
chainBLock := r.chainLocks[path.chainB.chainID]
chainALock.Lock()
chainBLock.Lock()
unlock := func() {
chainALock.Unlock()
chainBLock.Unlock()
}
return path, unlock, nil
}

// GetClientIdFromStdout extracts the client ID from stdout.
func GetClientIdFromStdout(stdout []byte) (string, error) {
var clientCreationResult ClientCreationResponse
Expand Down

0 comments on commit 0291f95

Please sign in to comment.