From 456636a08883263367eec4e1dffec9cd9e2df4ba Mon Sep 17 00:00:00 2001 From: Andrea Mazzotti Date: Fri, 11 Oct 2024 13:08:13 +0200 Subject: [PATCH] Implement elemental-register upgrade Signed-off-by: Andrea Mazzotti --- cmd/register/main.go | 1 + cmd/register/upgrade.go | 131 ++++++++++++++++++++++++ pkg/elementalcli/elementalcli.go | 106 ++++++++++++++++++- pkg/elementalcli/mocks/elementalcli.go | 30 ++++++ pkg/install/install.go | 134 +++++++++++++++++++++++++ pkg/install/mocks/install.go | 16 +++ 6 files changed, 417 insertions(+), 1 deletion(-) create mode 100644 cmd/register/upgrade.go diff --git a/cmd/register/main.go b/cmd/register/main.go index c184f507..86ac2c4c 100644 --- a/cmd/register/main.go +++ b/cmd/register/main.go @@ -74,6 +74,7 @@ func main() { cmd.AddCommand( newVersionCommand(), newDumpDataCommand(), + newUpgradeCommand(), ) if err := cmd.Execute(); err != nil { log.Fatalf("FATAL: %s", err) diff --git a/cmd/register/upgrade.go b/cmd/register/upgrade.go new file mode 100644 index 00000000..d5aa33b9 --- /dev/null +++ b/cmd/register/upgrade.go @@ -0,0 +1,131 @@ +/* +Copyright © 2022 - 2024 SUSE LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "errors" + "fmt" + "os" + "os/exec" + + "github.com/rancher/elemental-operator/pkg/elementalcli" + "github.com/rancher/elemental-operator/pkg/install" + "github.com/rancher/elemental-operator/pkg/log" + "github.com/spf13/cobra" + "github.com/twpayne/go-vfs" +) + +var ( + ErrRebooting = errors.New("Machine needs reboot after upgrade") + ErrAlreadyShuttingDown = errors.New("System is already shutting down") +) + +func newUpgradeCommand() *cobra.Command { + var hostDir string + var cloudConfigPath string + var recovery bool + var recoveryOnly bool + var debug bool + var system string + var correlationID string + + cmd := &cobra.Command{ + Use: "upgrade", + Short: "Upgrades the machine", + RunE: func(_ *cobra.Command, _ []string) error { + // If the system is shutting down, return an error so we can try again on next reboot. + alreadyShuttingDown, err := isSystemShuttingDown() + if err != nil { + return fmt.Errorf("determining if system is running: %w", err) + } + if alreadyShuttingDown { + return ErrAlreadyShuttingDown + } + + // If system is not shutting down we can proceed. + upgradeConfig := elementalcli.UpgradeConfig{ + Debug: debug, + Recovery: recovery, + RecoveryOnly: recoveryOnly, + System: system, + Bootloader: true, + } + upgradeContext := install.UpgradeContext{ + Config: upgradeConfig, + HostDir: hostDir, + CloudConfigPath: cloudConfigPath, + CorrelationID: correlationID, + } + + log.Infof("Upgrade context: %+v", upgradeContext) + + installer := install.NewInstaller(vfs.OSFS, nil, nil) + + needsReboot, err := installer.UpgradeElemental(upgradeContext) + // If the upgrade could not be applied or verified, + // then this command will fail but the machine will not reboot. + if err != nil { + return fmt.Errorf("upgrading machine: %w", err) + } + // If the machine needs a reboot after an upgrade has been applied, + // so that consumers can try again after reboot to validate the upgrade has been applied successfully. + if needsReboot { + log.Infof("Rebooting machine after %s upgrade", correlationID) + reboot() + return ErrRebooting + } + // Upgrade has been applied successfully, nothing to do. + log.Infof("Upgrade %s applied successfully", correlationID) + return nil + }, + } + + cmd.Flags().StringVar(&hostDir, "host-dir", "/host", "The machine root directory where to apply the upgrade") + cmd.Flags().StringVar(&cloudConfigPath, "cloud-config", "/run/data/cloud-config", "The path of a cloud-config file to install on the machine during upgrade") + cmd.Flags().StringVar(&system, "system", "dir:/", "The system image uri or filesystem location to upgrade to") + cmd.Flags().StringVar(&correlationID, "correlation-id", "", "A correlationID to label the upgrade snapshot with") + cmd.Flags().BoolVar(&recovery, "recovery", false, "Upgrades the recovery partition together with the system") + cmd.Flags().BoolVar(&recoveryOnly, "recovery-only", false, "Upgrades the recovery partition only") + cmd.Flags().BoolVar(&debug, "debug", true, "Prints debug logs when performing upgrade") + return cmd +} + +func isSystemShuttingDown() (bool, error) { + cmd := exec.Command("nsenter") + cmd.Stdin = os.Stdin + cmd.Stderr = os.Stderr + cmd.Args = []string{"-i", "-m", "-t", "1", "--", "systemctl is-system-running"} + output, err := cmd.Output() + if err != nil { + return false, fmt.Errorf("running: systemctl is-system-running: %w", err) + } + if string(output) == "stopping" { + return true, nil + } + return false, nil +} + +func reboot() { + cmd := exec.Command("nsenter") + cmd.Stdin = os.Stdin + cmd.Stderr = os.Stderr + cmd.Stdout = os.Stdout + cmd.Args = []string{"-i", "-m", "-t", "1", "--", "reboot"} + if err := cmd.Run(); err != nil { + log.Errorf("Could not reboot: %s", err) + } +} diff --git a/pkg/elementalcli/elementalcli.go b/pkg/elementalcli/elementalcli.go index ceaafc40..22066a3b 100644 --- a/pkg/elementalcli/elementalcli.go +++ b/pkg/elementalcli/elementalcli.go @@ -22,16 +22,46 @@ import ( "os/exec" "strconv" "strings" + "time" elementalv1 "github.com/rancher/elemental-operator/api/v1beta1" "github.com/rancher/elemental-operator/pkg/log" + "gopkg.in/yaml.v3" ) -const TempCloudInitDir = "/tmp/elemental/cloud-init" +const ( + TempCloudInitDir = "/tmp/elemental/cloud-init" + UpgradeLockFile = "/run/elemental/upgrade.lock" + UpgradeLockTimeout = 60 * time.Second +) + +type UpgradeConfig struct { + Debug bool + Recovery bool + RecoveryOnly bool + System string + Bootloader bool + SnapshotLabels map[string]string +} + +type State struct { + StatePartition PartitionState `yaml:"state,omitempty"` +} + +type PartitionState struct { + Snapshots map[int]*Snapshot `yaml:"snapshots,omitempty"` +} + +type Snapshot struct { + Active bool `yaml:"active,omitempty"` + Labels map[string]string `yaml:"labels,omitempty"` +} type Runner interface { Install(elementalv1.Install) error Reset(elementalv1.Reset) error + Upgrade(UpgradeConfig) error + GetState() (State, error) } func NewRunner() Runner { @@ -86,6 +116,59 @@ func (r *runner) Reset(conf elementalv1.Reset) error { return cmd.Run() } +func (r *runner) Upgrade(conf UpgradeConfig) error { + installerOpts := []string{"elemental"} + // There are no env var bindings in elemental-cli for elemental root options + // so root flags should be passed within the command line + if conf.Debug { + installerOpts = append(installerOpts, "--debug") + } + + // Actual subcommand + if conf.RecoveryOnly { + installerOpts = append(installerOpts, "upgrade-recovery") + } else { + installerOpts = append(installerOpts, "upgrade") + } + + if conf.Bootloader { + installerOpts = append(installerOpts, "--bootloader") + } + + cmd := exec.Command("elemental") + environmentVariables := mapToUpgradeEnv(conf) + cmd.Env = append(os.Environ(), environmentVariables...) + cmd.Stdout = os.Stdout + cmd.Args = installerOpts + cmd.Stdin = os.Stdin + cmd.Stderr = os.Stderr + log.Debugf("running: %s\n with ENV:\n%s", strings.Join(installerOpts, " "), strings.Join(environmentVariables, "\n")) + return cmd.Run() +} + +func (r *runner) GetState() (State, error) { + state := State{} + + log.Debug("Getting elemental state") + installerOpts := []string{"elemental", "state"} + cmd := exec.Command("elemental") + cmd.Args = installerOpts + cmd.Stdin = os.Stdin + cmd.Stderr = os.Stderr + log.Debugf("running: %s", strings.Join(installerOpts, " ")) + + var commandOutput []byte + var err error + if commandOutput, err = cmd.Output(); err != nil { + return state, fmt.Errorf("running elemental state: %w", err) + } + if err := yaml.Unmarshal(commandOutput, &state); err != nil { + return state, fmt.Errorf("unmarshalling elemental state: %w", err) + } + + return state, nil +} + func mapToInstallEnv(conf elementalv1.Install) []string { var variables []string // See GetInstallKeyEnvMap() in https://github.com/rancher/elemental-toolkit/blob/main/pkg/constants/constants.go @@ -120,6 +203,27 @@ func mapToResetEnv(conf elementalv1.Reset) []string { return variables } +func mapToUpgradeEnv(conf UpgradeConfig) []string { + var variables []string + // See GetUpgradeKeyEnvMap() in https://github.com/rancher/elemental-toolkit/blob/main/pkg/constants/constants.go + variables = append(variables, formatEV("ELEMENTAL_UPGRADE_RECOVERY", strconv.FormatBool(conf.Recovery))) + variables = append(variables, formatEV("ELEMENTAL_UPGRADE_SNAPSHOT_LABELS", formatSnapshotLabels(conf.SnapshotLabels))) + if conf.RecoveryOnly { + variables = append(variables, formatEV("ELEMENTAL_UPGRADE_RECOVERY_SYSTEM", conf.System)) + } else { + variables = append(variables, formatEV("ELEMENTAL_UPGRADE_SYSTEM", conf.System)) + } + return variables +} + func formatEV(key string, value string) string { return fmt.Sprintf("%s=%s", key, value) } + +func formatSnapshotLabels(labels map[string]string) string { + formattedLabels := []string{} + for key, value := range labels { + formattedLabels = append(formattedLabels, fmt.Sprintf("%s=%s", key, value)) + } + return strings.Join(formattedLabels, ",") +} diff --git a/pkg/elementalcli/mocks/elementalcli.go b/pkg/elementalcli/mocks/elementalcli.go index 0b40aa25..90283f57 100644 --- a/pkg/elementalcli/mocks/elementalcli.go +++ b/pkg/elementalcli/mocks/elementalcli.go @@ -31,6 +31,7 @@ import ( reflect "reflect" v1beta1 "github.com/rancher/elemental-operator/api/v1beta1" + elementalcli "github.com/rancher/elemental-operator/pkg/elementalcli" gomock "go.uber.org/mock/gomock" ) @@ -57,6 +58,21 @@ func (m *MockRunner) EXPECT() *MockRunnerMockRecorder { return m.recorder } +// GetState mocks base method. +func (m *MockRunner) GetState() (elementalcli.State, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetState") + ret0, _ := ret[0].(elementalcli.State) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetState indicates an expected call of GetState. +func (mr *MockRunnerMockRecorder) GetState() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetState", reflect.TypeOf((*MockRunner)(nil).GetState)) +} + // Install mocks base method. func (m *MockRunner) Install(arg0 v1beta1.Install) error { m.ctrl.T.Helper() @@ -84,3 +100,17 @@ func (mr *MockRunnerMockRecorder) Reset(arg0 any) *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Reset", reflect.TypeOf((*MockRunner)(nil).Reset), arg0) } + +// Upgrade mocks base method. +func (m *MockRunner) Upgrade(arg0 elementalcli.UpgradeConfig) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Upgrade", arg0) + ret0, _ := ret[0].(error) + return ret0 +} + +// Upgrade indicates an expected call of Upgrade. +func (mr *MockRunnerMockRecorder) Upgrade(arg0 any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Upgrade", reflect.TypeOf((*MockRunner)(nil).Upgrade), arg0) +} diff --git a/pkg/install/install.go b/pkg/install/install.go index e154b3b1..54a81ea2 100644 --- a/pkg/install/install.go +++ b/pkg/install/install.go @@ -17,11 +17,14 @@ limitations under the License. package install import ( + "bytes" "encoding/json" "errors" "fmt" "os" + "os/exec" "path/filepath" + "strings" "github.com/jaypipes/ghw" "github.com/jaypipes/ghw/pkg/block" @@ -66,13 +69,29 @@ const ( // OEM is mounted on different paths depending if we are resetting (from recovery) or installing (from live media) installOEMMount = "/run/elemental/oem" resetOEMMount = "/oem" + + // Upgrade constants + upgradeCloudConfigPath = "/oem/90_operator.yaml" + correlationIDLabelKey = "correlationID" +) + +var ( + upgradeMounts = []string{"/dev", "/run"} ) +type UpgradeContext struct { + Config elementalcli.UpgradeConfig + HostDir string + CloudConfigPath string + CorrelationID string +} + type Installer interface { ResetElemental(config elementalv1.Config, state register.State, networkConfig elementalv1.NetworkConfig) error ResetElementalNetwork() error InstallElemental(config elementalv1.Config, state register.State, networkConfig elementalv1.NetworkConfig) error WriteLocalSystemAgentConfig(config elementalv1.Elemental) error + UpgradeElemental(context UpgradeContext) (bool, error) } func NewInstaller(fs vfs.FS, disks []*block.Disk, networkConfigurator network.Configurator) Installer { @@ -584,3 +603,118 @@ func (i *installer) cleanupResetPlan() error { } return i.fs.Remove(controllers.LocalResetPlanPath) } + +func (i *installer) UpgradeElemental(context UpgradeContext) (bool, error) { + log.Infof("Applying upgrade: %s", context.CorrelationID) + + runner := elementalcli.NewRunner() + + if err := i.applyCloudConfig(context.HostDir, context.CloudConfigPath); err != nil { + return false, fmt.Errorf("applying upgrade cloud config: %w", err) + } + + elementalState, err := runner.GetState() + if err != nil { + return false, fmt.Errorf("reading installation state: %w", err) + } + + if i.isCorrelationIDFound(elementalState, context.CorrelationID) { + log.Infof("Upgrade '%s' successfully applied", context.CorrelationID) + return false, nil + } + + log.Infof("Applying upgrade %s", context.CorrelationID) + if err := i.mountDirs(upgradeMounts, context.HostDir); err != nil { + return false, fmt.Errorf("mounting host directories: %w", err) + } + if err := runner.Upgrade(context.Config); err != nil { + return false, fmt.Errorf("applying upgrade '%s': %w", context.CorrelationID, err) + } + return true, nil +} + +func (i *installer) applyCloudConfig(hostDir string, cloudConfigPath string) error { + hostCloudConfigPath := filepath.Join(hostDir, upgradeCloudConfigPath) + + cloudConfigBytes, err := os.ReadFile(cloudConfigPath) + if os.IsNotExist(err) { + log.Infof("Upgrade cloud config '%s' is missing. Removing previously applied config in '%s', if any.", cloudConfigPath, hostCloudConfigPath) + if err := os.Remove(hostCloudConfigPath); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("removing file '%s': %w", hostCloudConfigPath, err) + } + return nil + } + if err != nil { + return fmt.Errorf("reading file '%s': %w", cloudConfigPath, err) + } + + hostCloudConfigBytes, err := os.ReadFile(hostCloudConfigPath) + if os.IsNotExist(err) || err == nil { + if !bytes.Equal(hostCloudConfigBytes, cloudConfigBytes) { + log.Infof("Applying upgrade cloud config to: %s", hostCloudConfigPath) + if err := os.WriteFile(hostCloudConfigPath, cloudConfigBytes, os.ModePerm); err != nil { + return fmt.Errorf("writing file '%s': %w", hostCloudConfigPath, err) + } + } + } else { + return fmt.Errorf("reading file '%s': %w", hostCloudConfigPath, err) + } + + return nil +} + +func (i *installer) isCorrelationIDFound(elementalState elementalcli.State, correlationID string) bool { + // This is normally not supposed to happen, as we expect at least the first snapshot to be present after install. + // However we can still try to upgrade in this case, hoping the upgrade snapshot will be created after that. + if elementalState.StatePartition.Snapshots == nil { + log.Info("Could not find correlationID in empty snapshots list") + return false + } + + correlationIDFound := false + correlationIDFoundInActiveSnapshot := false + for _, snapshot := range elementalState.StatePartition.Snapshots { + if snapshot.Labels[correlationIDLabelKey] == correlationID { + correlationIDFound = true + correlationIDFoundInActiveSnapshot = snapshot.Active + break + } + } + + // If the upgrade was already applied, but somehow the system was reverted to a different snapshot, + // do not apply the upgrade again. This will prevent a cascade loop effect, for example when the + // revert is automatically applied by the boot assessment mechanism. + if correlationIDFound && !correlationIDFoundInActiveSnapshot { + log.Infof("CorrelationID %s found on a passive snapshot. Not upgrading again.", correlationID) + return true + } + + // Found on the active snapshot. All good, nothing to do. + if correlationIDFound && correlationIDFoundInActiveSnapshot { + return true + } + + log.Infof("Could not find snapshot with correlationID %s", correlationID) + return false +} + +func (i *installer) mountDirs(mounts []string, hostDir string) error { + if hostDir == "/" { + return nil + } + + for _, mount := range mounts { + hostMount := filepath.Join(hostDir, mount) + cmd := exec.Command("mount") + cmd.Stdout = os.Stdout + cmd.Stdin = os.Stdin + cmd.Stderr = os.Stderr + cmd.Args = []string{"--rbind", hostMount, mount} + log.Debugf("running: mount %s", strings.Join(cmd.Args, " ")) + if err := cmd.Run(); err != nil { + return fmt.Errorf("mounting '%s': %w", hostMount, err) + } + } + + return nil +} diff --git a/pkg/install/mocks/install.go b/pkg/install/mocks/install.go index 40aa7a39..00a5f491 100644 --- a/pkg/install/mocks/install.go +++ b/pkg/install/mocks/install.go @@ -31,6 +31,7 @@ import ( reflect "reflect" v1beta1 "github.com/rancher/elemental-operator/api/v1beta1" + install "github.com/rancher/elemental-operator/pkg/install" register "github.com/rancher/elemental-operator/pkg/register" gomock "go.uber.org/mock/gomock" ) @@ -100,6 +101,21 @@ func (mr *MockInstallerMockRecorder) ResetElementalNetwork() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ResetElementalNetwork", reflect.TypeOf((*MockInstaller)(nil).ResetElementalNetwork)) } +// UpgradeElemental mocks base method. +func (m *MockInstaller) UpgradeElemental(arg0 install.UpgradeContext) (bool, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "UpgradeElemental", arg0) + ret0, _ := ret[0].(bool) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// UpgradeElemental indicates an expected call of UpgradeElemental. +func (mr *MockInstallerMockRecorder) UpgradeElemental(arg0 any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpgradeElemental", reflect.TypeOf((*MockInstaller)(nil).UpgradeElemental), arg0) +} + // WriteLocalSystemAgentConfig mocks base method. func (m *MockInstaller) WriteLocalSystemAgentConfig(arg0 v1beta1.Elemental) error { m.ctrl.T.Helper()