Skip to content

Commit

Permalink
Don't require container to be down in dcc.Down
Browse files Browse the repository at this point in the history
The check can be racy, let the caller do whatever checks it wants to do.
daemon.go already has a check when it decides if dcc.Down is needed or not
Now we also add a check to RestartServiceWithHaltHeight before it calls Down,
with an appropriate error message.
  • Loading branch information
sin3point14 authored and mkaczanowski committed Dec 17, 2024
1 parent 6c74975 commit de06dac
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 11 deletions.
4 changes: 4 additions & 0 deletions internal/pkg/daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,10 @@ func (d *Daemon) performUpgrade(
return errors.Wrapf(err, "failed to check if service is running")
}

// This check is prone to race conditions, the image could be up at this point
// but exits before dcc.Down is called. However, at this point we are certain
// that upgrade height has been hit, so, it should be safe to Down an exited
// container.
if isRunning {
logger.Info("Executing compose down").Notifyf(ctx, "Shutting down chain to perform upgrade. Current image: %s, new image: %s", currImage, newImage)
if err = d.dcc.Down(ctx, serviceName, compose.DownTimeout); err != nil {
Expand Down
27 changes: 16 additions & 11 deletions internal/pkg/docker/compose.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,26 +182,18 @@ func (dcc *ComposeClient) upgradeImageInComposeFile(ctx context.Context, service
}

func (dcc *ComposeClient) Down(ctx context.Context, serviceName string, timeout time.Duration) error {
isImageContainerRunning, err := dcc.IsServiceRunning(ctx, serviceName, timeout)
if err != nil {
return errors.Wrapf(err, "check for container running failed")
}
if !isImageContainerRunning {
return errors.Wrapf(ErrContainerNotRunning, "expected the container to run before calling docker compose down")
}

// 5 seconds buffer to handle the case when docker timeout (-t)
// takes slightly longer than defined timeout (thus delay context cancellation)
deadline := timeout + 5*time.Second
timeoutSeconds := int(math.Round(timeout.Seconds()))

err = cmd.ExecuteWithDeadlineAndLog(ctx, deadline, []string{}, "docker", "compose", "-f", dcc.composeFile, "down", "--remove-orphans", "-t", strconv.Itoa(timeoutSeconds))
err := cmd.ExecuteWithDeadlineAndLog(ctx, deadline, []string{}, "docker", "compose", "-f", dcc.composeFile, "down", "--remove-orphans", "-t", strconv.Itoa(timeoutSeconds))
if err != nil {
return errors.Wrapf(err, "docker compose down failed")
}

// verify from docker api that the container is down
isImageContainerRunning, err = dcc.IsServiceRunning(ctx, serviceName, timeout)
isImageContainerRunning, err := dcc.IsServiceRunning(ctx, serviceName, timeout)
if err != nil {
return errors.Wrapf(err, "check for container running failed")
}
Expand Down Expand Up @@ -243,7 +235,20 @@ func (dcc *ComposeClient) Up(ctx context.Context, serviceName string, timeout ti
}

func (dcc *ComposeClient) RestartServiceWithHaltHeight(ctx context.Context, composeConfig *config.ComposeCli, serviceName string, upgradeHeight int64) error {
err := dcc.Down(ctx, serviceName, composeConfig.DownTimeout)
isImageContainerRunning, err := dcc.IsServiceRunning(ctx, serviceName, composeConfig.DownTimeout)
if err != nil {
return errors.Wrapf(err, "check for container running failed")
}
if !isImageContainerRunning {
return errors.Wrapf(ErrContainerNotRunning, "expected the container to run before restarting with halt height")
}
// The check above is prone to race conditions and the
// container can exit after the check. That should be super rare
// and it should be safe to Down it anyways, since, we are sure that we want
// to register halt height and restart.
// If the container crashed due to some issue, let it crash again after the restart
// blazar can't do anything about that
err = dcc.Down(ctx, serviceName, composeConfig.DownTimeout)
if err != nil && !errors.Is(err, ErrContainerNotRunning) {
return errors.Wrapf(err, "docker compose down failed")
}
Expand Down

0 comments on commit de06dac

Please sign in to comment.