Skip to content

Commit

Permalink
feat: introduce metal agent mode
Browse files Browse the repository at this point in the history
Introduce a new operating mode called the metal agent mode.

The mode is activated by the presence of a `/usr/local/etc/is-metal-agent` file under the root FS.

In this mode, Talos will:
- Only run the Initialize sequence, won't follow it up with the install/boot sequences
- Mark STATE partitions as `missing`, so Talos will always be in "not installed" state.
- Block applying configuration via API while in maintenance mode.

This mode can be used, e.g., to collect hardware information from bare-metal servers.

Signed-off-by: Utku Ozdemir <utku.ozdemir@siderolabs.com>
  • Loading branch information
utkuozdemir committed Oct 28, 2024
1 parent 0e15955 commit 2136358
Show file tree
Hide file tree
Showing 11 changed files with 99 additions and 14 deletions.
15 changes: 13 additions & 2 deletions internal/app/machined/pkg/controllers/block/volume_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/siderolabs/go-blockdevice/v2/encryption"
"go.uber.org/zap"

machinedruntime "github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
"github.com/siderolabs/talos/internal/pkg/partition"
"github.com/siderolabs/talos/pkg/machinery/cel"
"github.com/siderolabs/talos/pkg/machinery/cel/celenv"
Expand All @@ -28,8 +29,12 @@ import (
"github.com/siderolabs/talos/pkg/machinery/resources/runtime"
)

var noMatch = cel.MustExpression(cel.ParseBooleanExpression("false", celenv.Empty()))

// VolumeConfigController provides volume configuration based on Talos defaults and machine configuration.
type VolumeConfigController struct{}
type VolumeConfigController struct {
V1Alpha1Mode machinedruntime.Mode
}

// Name implements controller.Controller interface.
func (ctrl *VolumeConfigController) Name() string {
Expand Down Expand Up @@ -291,8 +296,14 @@ func (ctrl *VolumeConfigController) manageStateNoConfig(encryptionMeta *runtime.
TargetPath: constants.StateMountPoint,
}

match := labelVolumeMatchAndNonEmpty(constants.StatePartitionLabel)
if ctrl.V1Alpha1Mode.IsAgent() { // mark as missing
match = noMatch
}

// check here - make match false
vc.TypedSpec().Locator = block.LocatorSpec{
Match: labelVolumeMatchAndNonEmpty(constants.StatePartitionLabel),
Match: match,
}

if encryptionMeta != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"

machinedruntime "github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
"github.com/siderolabs/talos/internal/app/maintenance"
"github.com/siderolabs/talos/pkg/grpc/factory"
"github.com/siderolabs/talos/pkg/grpc/middleware/authz"
Expand All @@ -37,6 +38,7 @@ import (
// MaintenanceServiceController runs the maintenance service based on the configuration.
type MaintenanceServiceController struct {
SiderolinkPeerCheckFunc authz.SideroLinkPeerCheckFunc
V1Alpha1Mode machinedruntime.Mode
}

// Name implements controller.Controller interface.
Expand Down Expand Up @@ -122,7 +124,7 @@ func (ctrl *MaintenanceServiceController) Run(ctx context.Context, r controller.
defer shutdownServer(context.Background())

cfgCh := make(chan machineryconfig.Provider)
srv := maintenance.New(cfgCh)
srv := maintenance.New(cfgCh, ctrl.V1Alpha1Mode)

injector := &authz.Injector{
Mode: authz.ReadOnlyWithAdminOnSiderolink,
Expand Down Expand Up @@ -289,7 +291,7 @@ func (ctrl *MaintenanceServiceController) Run(ctx context.Context, r controller.
lastCertificateFingerprint = fingerprint
}

if !usagePrinted && len(reachableAddresses) > 0 && lastCertificateFingerprint != "" {
if !usagePrinted && len(reachableAddresses) > 0 && lastCertificateFingerprint != "" && !ctrl.V1Alpha1Mode.IsAgent() {
firstIP := reachableAddresses[0]

logger.Sugar().Info("upload configuration using talosctl:")
Expand Down
18 changes: 14 additions & 4 deletions internal/app/machined/pkg/runtime/mode.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ const (
ModeContainer
// ModeMetal is the metal runtime mode.
ModeMetal
// ModeMetalAgent is the metal agent runtime mode.
ModeMetalAgent
)

const (
Expand All @@ -37,14 +39,15 @@ const (
)

const (
cloud = "cloud"
container = "container"
metal = "metal"
cloud = "cloud"
container = "container"
metal = "metal"
metalAgent = "metal-agent"
)

// String returns the string representation of a Mode.
func (m Mode) String() string {
return [...]string{cloud, container, metal}[m]
return [...]string{cloud, container, metal, metalAgent}[m]
}

// RequiresInstall implements config.RuntimeMode.
Expand All @@ -62,6 +65,11 @@ func (m Mode) Supports(feature ModeCapability) bool {
return (m.capabilities() & uint64(feature)) != 0
}

// IsAgent returns true if the mode is an agent mode (i.e. metal agent mode).
func (m Mode) IsAgent() bool {
return m == ModeMetalAgent
}

// ParseMode returns a `Mode` that matches the specified string.
func ParseMode(s string) (mod Mode, err error) {
switch s {
Expand All @@ -71,6 +79,8 @@ func ParseMode(s string) (mod Mode, err error) {
mod = ModeContainer
case metal:
mod = ModeMetal
case metalAgent:
mod = ModeMetalAgent
default:
return mod, fmt.Errorf("unknown runtime mode: %q", s)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ const (
)

// Metal is a discoverer for non-cloud environments.
type Metal struct{}
type Metal struct {
IsAgent bool
}

// Name implements the platform.Platform interface.
func (m *Metal) Name() string {
Expand Down Expand Up @@ -118,6 +120,10 @@ func (m *Metal) Configuration(ctx context.Context, r state.State) ([]byte, error

// Mode implements the platform.Platform interface.
func (m *Metal) Mode() runtime.Mode {
if m.IsAgent {
return runtime.ModeMetalAgent
}

return runtime.ModeMetal
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,11 @@ func newPlatform(platform string) (p runtime.Platform, err error) {
case "hcloud":
p = &hcloud.Hcloud{}
case constants.PlatformMetal:
p = &metal.Metal{}
_, metalAgentCheckErr := os.Stat(constants.MetalAgentModeFlagPath)

p = &metal.Metal{
IsAgent: metalAgentCheckErr == nil,
}
case "opennebula":
p = &opennebula.OpenNebula{}
case "openstack":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,12 @@ func (p PhaseList) AppendList(list PhaseList) PhaseList {
// Initialize is the initialize sequence. The primary goals of this sequence is
// to load the config and enforce kernel security requirements.
func (*Sequencer) Initialize(r runtime.Runtime) []runtime.Phase {
mode := r.State().Platform().Mode()
phases := PhaseList{}

switch r.State().Platform().Mode() { //nolint:exhaustive
phases = phases.Append("logMode", LogMode)

switch mode { //nolint:exhaustive
case runtime.ModeContainer:
phases = phases.Append(
"systemRequirements",
Expand Down Expand Up @@ -118,6 +121,10 @@ func (*Sequencer) Initialize(r runtime.Runtime) []runtime.Phase {
ReloadMeta,
).AppendWithDeferredCheck(
func() bool {
if mode == runtime.ModeMetalAgent {
return false
}

disabledStr := procfs.ProcCmdline().Get(constants.KernelParamDashboardDisabled).First()
disabled, _ := strconv.ParseBool(pointer.SafeDeref(disabledStr)) //nolint:errcheck

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,15 @@ func WaitForUSB(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) {
}, "waitForUSB"
}

// LogMode represents the LogMode task.
func LogMode(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) {
return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) error {
logger.Printf("running in mode: %s", r.State().Platform().Mode())

return nil
}, "logMode"
}

// EnforceKSPPRequirements represents the EnforceKSPPRequirements task.
func EnforceKSPPRequirements(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) {
return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) (err error) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,9 @@ func (ctrl *Controller) Run(ctx context.Context, drainer *runtime.Drainer) error
&block.LVMActivationController{},
&block.SystemDiskController{},
&block.UserDiskConfigController{},
&block.VolumeConfigController{},
&block.VolumeConfigController{
V1Alpha1Mode: ctrl.v1alpha1Runtime.State().Platform().Mode(),
},
&block.VolumeManagerController{},
&cluster.AffiliateMergeController{},
cluster.NewConfigController(),
Expand Down Expand Up @@ -314,7 +316,9 @@ func (ctrl *Controller) Run(ctx context.Context, drainer *runtime.Drainer) error
Drainer: drainer,
},
&runtimecontrollers.MaintenanceConfigController{},
&runtimecontrollers.MaintenanceServiceController{},
&runtimecontrollers.MaintenanceServiceController{
V1Alpha1Mode: ctrl.v1alpha1Runtime.State().Platform().Mode(),
},
&runtimecontrollers.MachineStatusController{
V1Alpha1Events: ctrl.v1alpha1Runtime.Events(),
},
Expand Down
21 changes: 20 additions & 1 deletion internal/app/maintenance/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,20 @@ type Server struct {
controller runtime.Controller
cfgCh chan<- config.Provider
server *grpc.Server

mode runtime.Mode
}

// New initializes and returns a [Server].
func New(cfgCh chan<- config.Provider) *Server {
func New(cfgCh chan<- config.Provider, mode runtime.Mode) *Server {
if runtimeController == nil {
panic("runtime controller is not set")
}

return &Server{
controller: runtimeController,
cfgCh: cfgCh,
mode: mode,
}
}

Expand All @@ -73,6 +76,10 @@ func (s *Server) Register(obj *grpc.Server) {

// ApplyConfiguration implements [machine.MachineServiceServer].
func (s *Server) ApplyConfiguration(_ context.Context, in *machine.ApplyConfigurationRequest) (*machine.ApplyConfigurationResponse, error) {
if s.mode.IsAgent() {
return nil, status.Error(codes.Unimplemented, "API is not implemented in agent mode")
}

//nolint:exhaustive
switch in.Mode {
case machine.ApplyConfigurationRequest_TRY:
Expand Down Expand Up @@ -117,6 +124,10 @@ Node is running in maintenance mode and does not have a config yet.`

// GenerateConfiguration implements the [machine.MachineServiceServer] interface.
func (s *Server) GenerateConfiguration(ctx context.Context, in *machine.GenerateConfigurationRequest) (*machine.GenerateConfigurationResponse, error) {
if s.mode.IsAgent() {
return nil, status.Error(codes.Unimplemented, "API is not implemented in agent mode")
}

if in.MachineConfig == nil {
return nil, errors.New("invalid generate request")
}
Expand Down Expand Up @@ -162,6 +173,10 @@ func (s *Server) Version(ctx context.Context, _ *emptypb.Empty) (*machine.Versio

// Upgrade initiates an upgrade.
func (s *Server) Upgrade(ctx context.Context, in *machine.UpgradeRequest) (reply *machine.UpgradeResponse, err error) {
if s.mode.IsAgent() {
return nil, status.Error(codes.Unimplemented, "API is not implemented in agent mode")
}

if err = s.assertAdminRole(ctx); err != nil {
return nil, err
}
Expand Down Expand Up @@ -211,6 +226,10 @@ func (s *Server) Upgrade(ctx context.Context, in *machine.UpgradeRequest) (reply
//
//nolint:gocyclo
func (s *Server) Reset(ctx context.Context, in *machine.ResetRequest) (*machine.ResetResponse, error) {
if s.mode.IsAgent() {
return nil, status.Error(codes.Unimplemented, "API is not implemented in agent mode")
}

if err := s.assertAdminRole(ctx); err != nil {
return nil, err
}
Expand Down
10 changes: 10 additions & 0 deletions pkg/machinery/cel/celenv/celenv.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,16 @@ import (
"github.com/siderolabs/talos/pkg/machinery/api/resource/definitions/block"
)

// Empty is an empty CEL environment.
var Empty = sync.OnceValue(func() *cel.Env {
env, err := cel.NewEnv()
if err != nil {
panic(err)
}

return env
})

// DiskLocator is a disk locator CEL environment.
var DiskLocator = sync.OnceValue(func() *cel.Env {
var diskSpec block.DiskSpec
Expand Down
3 changes: 3 additions & 0 deletions pkg/machinery/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -1094,6 +1094,9 @@ const (
//
// Note: 116 = 't' and 108 = 'l' in ASCII.
HostDNSAddress = "169.254.116.108"

// MetalAgentModeFlagPath is the path to the file indicating if the node is running in Metal Agent mode.
MetalAgentModeFlagPath = "/usr/local/etc/is-metal-agent"
)

// See https://linux.die.net/man/3/klogctl
Expand Down

0 comments on commit 2136358

Please sign in to comment.