From 2136358d65ddf6ad040ed62c835b335f99a59399 Mon Sep 17 00:00:00 2001 From: Utku Ozdemir Date: Wed, 9 Oct 2024 23:12:46 +0200 Subject: [PATCH] feat: introduce metal agent mode Introduce a new operating mode called the metal agent mode. The mode is activated by the presence of a `/usr/local/etc/is-metal-agent` file under the root FS. In this mode, Talos will: - Only run the Initialize sequence, won't follow it up with the install/boot sequences - Mark STATE partitions as `missing`, so Talos will always be in "not installed" state. - Block applying configuration via API while in maintenance mode. This mode can be used, e.g., to collect hardware information from bare-metal servers. Signed-off-by: Utku Ozdemir --- .../pkg/controllers/block/volume_config.go | 15 +++++++++++-- .../runtime/maintenance_service.go | 6 ++++-- internal/app/machined/pkg/runtime/mode.go | 18 ++++++++++++---- .../runtime/v1alpha1/platform/metal/metal.go | 8 ++++++- .../pkg/runtime/v1alpha1/platform/platform.go | 6 +++++- .../runtime/v1alpha1/v1alpha1_sequencer.go | 9 +++++++- .../v1alpha1/v1alpha1_sequencer_tasks.go | 9 ++++++++ .../runtime/v1alpha2/v1alpha2_controller.go | 8 +++++-- internal/app/maintenance/server.go | 21 ++++++++++++++++++- pkg/machinery/cel/celenv/celenv.go | 10 +++++++++ pkg/machinery/constants/constants.go | 3 +++ 11 files changed, 99 insertions(+), 14 deletions(-) diff --git a/internal/app/machined/pkg/controllers/block/volume_config.go b/internal/app/machined/pkg/controllers/block/volume_config.go index 0d61124a43..7ebfeffe19 100644 --- a/internal/app/machined/pkg/controllers/block/volume_config.go +++ b/internal/app/machined/pkg/controllers/block/volume_config.go @@ -16,6 +16,7 @@ import ( "github.com/siderolabs/go-blockdevice/v2/encryption" "go.uber.org/zap" + machinedruntime "github.com/siderolabs/talos/internal/app/machined/pkg/runtime" "github.com/siderolabs/talos/internal/pkg/partition" "github.com/siderolabs/talos/pkg/machinery/cel" "github.com/siderolabs/talos/pkg/machinery/cel/celenv" @@ -28,8 +29,12 @@ import ( "github.com/siderolabs/talos/pkg/machinery/resources/runtime" ) +var noMatch = cel.MustExpression(cel.ParseBooleanExpression("false", celenv.Empty())) + // VolumeConfigController provides volume configuration based on Talos defaults and machine configuration. -type VolumeConfigController struct{} +type VolumeConfigController struct { + V1Alpha1Mode machinedruntime.Mode +} // Name implements controller.Controller interface. func (ctrl *VolumeConfigController) Name() string { @@ -291,8 +296,14 @@ func (ctrl *VolumeConfigController) manageStateNoConfig(encryptionMeta *runtime. TargetPath: constants.StateMountPoint, } + match := labelVolumeMatchAndNonEmpty(constants.StatePartitionLabel) + if ctrl.V1Alpha1Mode.IsAgent() { // mark as missing + match = noMatch + } + + // check here - make match false vc.TypedSpec().Locator = block.LocatorSpec{ - Match: labelVolumeMatchAndNonEmpty(constants.StatePartitionLabel), + Match: match, } if encryptionMeta != nil { diff --git a/internal/app/machined/pkg/controllers/runtime/maintenance_service.go b/internal/app/machined/pkg/controllers/runtime/maintenance_service.go index a2a7b81bee..8236ab9fa5 100644 --- a/internal/app/machined/pkg/controllers/runtime/maintenance_service.go +++ b/internal/app/machined/pkg/controllers/runtime/maintenance_service.go @@ -25,6 +25,7 @@ import ( "google.golang.org/grpc" "google.golang.org/grpc/credentials" + machinedruntime "github.com/siderolabs/talos/internal/app/machined/pkg/runtime" "github.com/siderolabs/talos/internal/app/maintenance" "github.com/siderolabs/talos/pkg/grpc/factory" "github.com/siderolabs/talos/pkg/grpc/middleware/authz" @@ -37,6 +38,7 @@ import ( // MaintenanceServiceController runs the maintenance service based on the configuration. type MaintenanceServiceController struct { SiderolinkPeerCheckFunc authz.SideroLinkPeerCheckFunc + V1Alpha1Mode machinedruntime.Mode } // Name implements controller.Controller interface. @@ -122,7 +124,7 @@ func (ctrl *MaintenanceServiceController) Run(ctx context.Context, r controller. defer shutdownServer(context.Background()) cfgCh := make(chan machineryconfig.Provider) - srv := maintenance.New(cfgCh) + srv := maintenance.New(cfgCh, ctrl.V1Alpha1Mode) injector := &authz.Injector{ Mode: authz.ReadOnlyWithAdminOnSiderolink, @@ -289,7 +291,7 @@ func (ctrl *MaintenanceServiceController) Run(ctx context.Context, r controller. lastCertificateFingerprint = fingerprint } - if !usagePrinted && len(reachableAddresses) > 0 && lastCertificateFingerprint != "" { + if !usagePrinted && len(reachableAddresses) > 0 && lastCertificateFingerprint != "" && !ctrl.V1Alpha1Mode.IsAgent() { firstIP := reachableAddresses[0] logger.Sugar().Info("upload configuration using talosctl:") diff --git a/internal/app/machined/pkg/runtime/mode.go b/internal/app/machined/pkg/runtime/mode.go index ec6ca69e55..6c40b2bec7 100644 --- a/internal/app/machined/pkg/runtime/mode.go +++ b/internal/app/machined/pkg/runtime/mode.go @@ -21,6 +21,8 @@ const ( ModeContainer // ModeMetal is the metal runtime mode. ModeMetal + // ModeMetalAgent is the metal agent runtime mode. + ModeMetalAgent ) const ( @@ -37,14 +39,15 @@ const ( ) const ( - cloud = "cloud" - container = "container" - metal = "metal" + cloud = "cloud" + container = "container" + metal = "metal" + metalAgent = "metal-agent" ) // String returns the string representation of a Mode. func (m Mode) String() string { - return [...]string{cloud, container, metal}[m] + return [...]string{cloud, container, metal, metalAgent}[m] } // RequiresInstall implements config.RuntimeMode. @@ -62,6 +65,11 @@ func (m Mode) Supports(feature ModeCapability) bool { return (m.capabilities() & uint64(feature)) != 0 } +// IsAgent returns true if the mode is an agent mode (i.e. metal agent mode). +func (m Mode) IsAgent() bool { + return m == ModeMetalAgent +} + // ParseMode returns a `Mode` that matches the specified string. func ParseMode(s string) (mod Mode, err error) { switch s { @@ -71,6 +79,8 @@ func ParseMode(s string) (mod Mode, err error) { mod = ModeContainer case metal: mod = ModeMetal + case metalAgent: + mod = ModeMetalAgent default: return mod, fmt.Errorf("unknown runtime mode: %q", s) } diff --git a/internal/app/machined/pkg/runtime/v1alpha1/platform/metal/metal.go b/internal/app/machined/pkg/runtime/v1alpha1/platform/metal/metal.go index 0f9d2ff695..45f56a1015 100644 --- a/internal/app/machined/pkg/runtime/v1alpha1/platform/metal/metal.go +++ b/internal/app/machined/pkg/runtime/v1alpha1/platform/metal/metal.go @@ -43,7 +43,9 @@ const ( ) // Metal is a discoverer for non-cloud environments. -type Metal struct{} +type Metal struct { + IsAgent bool +} // Name implements the platform.Platform interface. func (m *Metal) Name() string { @@ -118,6 +120,10 @@ func (m *Metal) Configuration(ctx context.Context, r state.State) ([]byte, error // Mode implements the platform.Platform interface. func (m *Metal) Mode() runtime.Mode { + if m.IsAgent { + return runtime.ModeMetalAgent + } + return runtime.ModeMetal } diff --git a/internal/app/machined/pkg/runtime/v1alpha1/platform/platform.go b/internal/app/machined/pkg/runtime/v1alpha1/platform/platform.go index 073542edfa..9edd6c7c38 100644 --- a/internal/app/machined/pkg/runtime/v1alpha1/platform/platform.go +++ b/internal/app/machined/pkg/runtime/v1alpha1/platform/platform.go @@ -113,7 +113,11 @@ func newPlatform(platform string) (p runtime.Platform, err error) { case "hcloud": p = &hcloud.Hcloud{} case constants.PlatformMetal: - p = &metal.Metal{} + _, metalAgentCheckErr := os.Stat(constants.MetalAgentModeFlagPath) + + p = &metal.Metal{ + IsAgent: metalAgentCheckErr == nil, + } case "opennebula": p = &opennebula.OpenNebula{} case "openstack": diff --git a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer.go b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer.go index 13b6ab5cf8..4733f6d380 100644 --- a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer.go +++ b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer.go @@ -65,9 +65,12 @@ func (p PhaseList) AppendList(list PhaseList) PhaseList { // Initialize is the initialize sequence. The primary goals of this sequence is // to load the config and enforce kernel security requirements. func (*Sequencer) Initialize(r runtime.Runtime) []runtime.Phase { + mode := r.State().Platform().Mode() phases := PhaseList{} - switch r.State().Platform().Mode() { //nolint:exhaustive + phases = phases.Append("logMode", LogMode) + + switch mode { //nolint:exhaustive case runtime.ModeContainer: phases = phases.Append( "systemRequirements", @@ -118,6 +121,10 @@ func (*Sequencer) Initialize(r runtime.Runtime) []runtime.Phase { ReloadMeta, ).AppendWithDeferredCheck( func() bool { + if mode == runtime.ModeMetalAgent { + return false + } + disabledStr := procfs.ProcCmdline().Get(constants.KernelParamDashboardDisabled).First() disabled, _ := strconv.ParseBool(pointer.SafeDeref(disabledStr)) //nolint:errcheck diff --git a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go index 6b33c26010..8957db77b1 100644 --- a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go +++ b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go @@ -120,6 +120,15 @@ func WaitForUSB(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) { }, "waitForUSB" } +// LogMode represents the LogMode task. +func LogMode(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) { + return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) error { + logger.Printf("running in mode: %s", r.State().Platform().Mode()) + + return nil + }, "logMode" +} + // EnforceKSPPRequirements represents the EnforceKSPPRequirements task. func EnforceKSPPRequirements(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) { return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) (err error) { diff --git a/internal/app/machined/pkg/runtime/v1alpha2/v1alpha2_controller.go b/internal/app/machined/pkg/runtime/v1alpha2/v1alpha2_controller.go index 93ea26117a..18ee53caca 100644 --- a/internal/app/machined/pkg/runtime/v1alpha2/v1alpha2_controller.go +++ b/internal/app/machined/pkg/runtime/v1alpha2/v1alpha2_controller.go @@ -96,7 +96,9 @@ func (ctrl *Controller) Run(ctx context.Context, drainer *runtime.Drainer) error &block.LVMActivationController{}, &block.SystemDiskController{}, &block.UserDiskConfigController{}, - &block.VolumeConfigController{}, + &block.VolumeConfigController{ + V1Alpha1Mode: ctrl.v1alpha1Runtime.State().Platform().Mode(), + }, &block.VolumeManagerController{}, &cluster.AffiliateMergeController{}, cluster.NewConfigController(), @@ -314,7 +316,9 @@ func (ctrl *Controller) Run(ctx context.Context, drainer *runtime.Drainer) error Drainer: drainer, }, &runtimecontrollers.MaintenanceConfigController{}, - &runtimecontrollers.MaintenanceServiceController{}, + &runtimecontrollers.MaintenanceServiceController{ + V1Alpha1Mode: ctrl.v1alpha1Runtime.State().Platform().Mode(), + }, &runtimecontrollers.MachineStatusController{ V1Alpha1Events: ctrl.v1alpha1Runtime.Events(), }, diff --git a/internal/app/maintenance/server.go b/internal/app/maintenance/server.go index ad95d42d86..2c5c469537 100644 --- a/internal/app/maintenance/server.go +++ b/internal/app/maintenance/server.go @@ -44,10 +44,12 @@ type Server struct { controller runtime.Controller cfgCh chan<- config.Provider server *grpc.Server + + mode runtime.Mode } // New initializes and returns a [Server]. -func New(cfgCh chan<- config.Provider) *Server { +func New(cfgCh chan<- config.Provider, mode runtime.Mode) *Server { if runtimeController == nil { panic("runtime controller is not set") } @@ -55,6 +57,7 @@ func New(cfgCh chan<- config.Provider) *Server { return &Server{ controller: runtimeController, cfgCh: cfgCh, + mode: mode, } } @@ -73,6 +76,10 @@ func (s *Server) Register(obj *grpc.Server) { // ApplyConfiguration implements [machine.MachineServiceServer]. func (s *Server) ApplyConfiguration(_ context.Context, in *machine.ApplyConfigurationRequest) (*machine.ApplyConfigurationResponse, error) { + if s.mode.IsAgent() { + return nil, status.Error(codes.Unimplemented, "API is not implemented in agent mode") + } + //nolint:exhaustive switch in.Mode { case machine.ApplyConfigurationRequest_TRY: @@ -117,6 +124,10 @@ Node is running in maintenance mode and does not have a config yet.` // GenerateConfiguration implements the [machine.MachineServiceServer] interface. func (s *Server) GenerateConfiguration(ctx context.Context, in *machine.GenerateConfigurationRequest) (*machine.GenerateConfigurationResponse, error) { + if s.mode.IsAgent() { + return nil, status.Error(codes.Unimplemented, "API is not implemented in agent mode") + } + if in.MachineConfig == nil { return nil, errors.New("invalid generate request") } @@ -162,6 +173,10 @@ func (s *Server) Version(ctx context.Context, _ *emptypb.Empty) (*machine.Versio // Upgrade initiates an upgrade. func (s *Server) Upgrade(ctx context.Context, in *machine.UpgradeRequest) (reply *machine.UpgradeResponse, err error) { + if s.mode.IsAgent() { + return nil, status.Error(codes.Unimplemented, "API is not implemented in agent mode") + } + if err = s.assertAdminRole(ctx); err != nil { return nil, err } @@ -211,6 +226,10 @@ func (s *Server) Upgrade(ctx context.Context, in *machine.UpgradeRequest) (reply // //nolint:gocyclo func (s *Server) Reset(ctx context.Context, in *machine.ResetRequest) (*machine.ResetResponse, error) { + if s.mode.IsAgent() { + return nil, status.Error(codes.Unimplemented, "API is not implemented in agent mode") + } + if err := s.assertAdminRole(ctx); err != nil { return nil, err } diff --git a/pkg/machinery/cel/celenv/celenv.go b/pkg/machinery/cel/celenv/celenv.go index 3842d75556..b63797db49 100644 --- a/pkg/machinery/cel/celenv/celenv.go +++ b/pkg/machinery/cel/celenv/celenv.go @@ -16,6 +16,16 @@ import ( "github.com/siderolabs/talos/pkg/machinery/api/resource/definitions/block" ) +// Empty is an empty CEL environment. +var Empty = sync.OnceValue(func() *cel.Env { + env, err := cel.NewEnv() + if err != nil { + panic(err) + } + + return env +}) + // DiskLocator is a disk locator CEL environment. var DiskLocator = sync.OnceValue(func() *cel.Env { var diskSpec block.DiskSpec diff --git a/pkg/machinery/constants/constants.go b/pkg/machinery/constants/constants.go index 97dc58d4c0..e288eb68e2 100644 --- a/pkg/machinery/constants/constants.go +++ b/pkg/machinery/constants/constants.go @@ -1094,6 +1094,9 @@ const ( // // Note: 116 = 't' and 108 = 'l' in ASCII. HostDNSAddress = "169.254.116.108" + + // MetalAgentModeFlagPath is the path to the file indicating if the node is running in Metal Agent mode. + MetalAgentModeFlagPath = "/usr/local/etc/is-metal-agent" ) // See https://linux.die.net/man/3/klogctl