From 0f6b73e9d0b8a84dd9950c7c33026b983a8ddc8d Mon Sep 17 00:00:00 2001 From: nixpig <143995476+nixpig@users.noreply.github.com> Date: Sat, 26 Oct 2024 05:02:59 +0100 Subject: [PATCH] feat: implement oom score adj --- README.md | 2 +- cmd/cmd.go | 3 ++ internal/container/container.go | 57 +++++++++++++++++------- internal/container/filesystem/devices.go | 15 ++++++- internal/container/filesystem/rootfs.go | 10 ++++- oci.sh | 1 + 6 files changed, 68 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 46fc334..9b19ea2 100644 --- a/README.md +++ b/README.md @@ -164,6 +164,7 @@ My goal is for `brownie` to (eventually) pass all tests in the [opencontainers O - [x] prestart_fail - [x] process - [x] process_capabilities +- [x] process_oom_score_adj - [x] start - [x] state @@ -207,7 +208,6 @@ My goal is for `brownie` to (eventually) pass all tests in the [opencontainers O - [ ] poststop - [ ] poststop_fail - [ ] process_capabilities_fail -- [ ] process_oom_score_adj - [ ] process_rlimits - [ ] process_rlimits_fail - [ ] process_user diff --git a/cmd/cmd.go b/cmd/cmd.go index a969304..4975a79 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -193,12 +193,15 @@ func forkCmd(log *zerolog.Logger, db *sql.DB) *cobra.Command { ConsoleSocketFD: consoleSocketFD, } + log.Info().Msg("loading container") cntr, err := container.Load(opts.ID, log, db) if err != nil { return err } + log.Info().Msg("forking container") if err := cntr.Fork(opts, log, db); err != nil { + log.Error().Err(err).Msg("failed to fork container") cntr.State.Status = specs.StateStopped if err := cntr.Save(); err != nil { log.Error().Err(err).Msg("failed to write state file") diff --git a/internal/container/container.go b/internal/container/container.go index d47cb36..d182fe5 100644 --- a/internal/container/container.go +++ b/internal/container/container.go @@ -203,22 +203,24 @@ func (c *Container) Init(opts *InitOpts, log *zerolog.Logger) error { var uidMappings []syscall.SysProcIDMap var gidMappings []syscall.SysProcIDMap + var unshareFlags uintptr // TODO: review if this is needed - // if c.Spec.Process != nil { - // cloneFlags |= syscall.CLONE_NEWUSER - - // uidMappings = append(uidMappings, syscall.SysProcIDMap{ - // ContainerID: int(c.Spec.Process.User.UID), - // HostID: os.Geteuid(), - // Size: 1, - // }) - // - // gidMappings = append(gidMappings, syscall.SysProcIDMap{ - // ContainerID: int(c.Spec.Process.User.GID), - // HostID: os.Getegid(), - // Size: 1, - // }) - // } + if c.Spec.Process != nil { + // cloneFlags |= syscall.CLONE_NEWUSER + // unshareFlags |= syscall.CLONE_NEWUSER + // + // uidMappings = append(uidMappings, syscall.SysProcIDMap{ + // ContainerID: int(c.Spec.Process.User.UID), + // HostID: os.Geteuid(), + // Size: 1, + // }) + // + // gidMappings = append(gidMappings, syscall.SysProcIDMap{ + // ContainerID: int(c.Spec.Process.User.GID), + // HostID: os.Getegid(), + // Size: 1, + // }) + } if c.Spec.Linux.UIDMappings != nil { for _, uidMapping := range c.Spec.Linux.UIDMappings { @@ -243,7 +245,7 @@ func (c *Container) Init(opts *InitOpts, log *zerolog.Logger) error { c.forkCmd.SysProcAttr = &syscall.SysProcAttr{ AmbientCaps: ambientCapsFlags, Cloneflags: cloneFlags, - Unshareflags: syscall.CLONE_NEWNS, + Unshareflags: unshareFlags | syscall.CLONE_NEWNS, GidMappingsEnableSetgroups: false, UidMappings: uidMappings, GidMappings: gidMappings, @@ -309,6 +311,7 @@ func (c *Container) Init(opts *InitOpts, log *zerolog.Logger) error { func (c *Container) Fork(opts *ForkOpts, log *zerolog.Logger, db *sql.DB) error { var err error + log.Info().Msg("creating new init sender") c.initIPC.ch, c.initIPC.closer, err = ipc.NewSender(opts.InitSockAddr) if err != nil { log.Error().Err(err).Msg("failed creating ipc sender") @@ -317,16 +320,19 @@ func (c *Container) Fork(opts *ForkOpts, log *zerolog.Logger, db *sql.DB) error defer c.initIPC.closer() if opts.ConsoleSocketFD != 0 { + log.Info().Msg("creating new terminal pty") pty, err := terminal.NewPty() if err != nil { return err } defer pty.Close() + log.Info().Msg("connecting to terminal pty") if err := pty.Connect(); err != nil { return err } + log.Info().Msg("opening terminal pty socket") consoleSocketPty := terminal.OpenPtySocket( opts.ConsoleSocketFD, opts.ConsoleSocketPath, @@ -334,18 +340,21 @@ func (c *Container) Fork(opts *ForkOpts, log *zerolog.Logger, db *sql.DB) error defer consoleSocketPty.Close() // FIXME: how do we pass ptysocket struct between fork? + log.Info().Msg("send message over terminal pty socket") if err := consoleSocketPty.SendMsg(pty); err != nil { return err } } // set up the socket _before_ pivot root + log.Info().Msg("remove existing container socket") if err := os.RemoveAll( filepath.Join(c.State.Bundle, containerSockFilename), ); err != nil { return err } + log.Info().Msg("create new container socket receiver") listCh, listCloser, err := ipc.NewReceiver(filepath.Join(c.State.Bundle, containerSockFilename)) if err != nil { log.Error().Err(err).Msg("failed to create new ipc receiver") @@ -353,13 +362,27 @@ func (c *Container) Fork(opts *ForkOpts, log *zerolog.Logger, db *sql.DB) error } defer listCloser() - if err := filesystem.SetupRootfs(c.State.Bundle, c.Spec); err != nil { + log.Info().Msg("setup root filesystem") + if err := filesystem.SetupRootfs(c.State.Bundle, c.Spec, log); err != nil { log.Error().Err(err).Msg("failed to setup rootfs") return err } + if c.Spec.Process != nil && c.Spec.Process.OOMScoreAdj != nil { + if err := os.WriteFile( + "/proc/self/oom_score_adj", + []byte(strconv.Itoa(*c.Spec.Process.OOMScoreAdj)), + 0644, + ); err != nil { + log.Error().Err(err).Msg("failed to write oom_score_adj") + return err + } + } + + log.Info().Msg("sending 'ready' msg") c.initIPC.ch <- []byte("ready") + log.Info().Msg("waiting for 'start' msg") startErr := ipc.WaitForMsg(listCh, "start", func() error { if err := filesystem.PivotRoot(c.State.Bundle); err != nil { log.Error().Err(err).Msg("failed to pivot root") diff --git a/internal/container/filesystem/devices.go b/internal/container/filesystem/devices.go index 70e2945..6ed1d2b 100644 --- a/internal/container/filesystem/devices.go +++ b/internal/container/filesystem/devices.go @@ -6,6 +6,7 @@ import ( "strings" "github.com/opencontainers/runtime-spec/specs-go" + "github.com/rs/zerolog" "golang.org/x/sys/unix" ) @@ -92,8 +93,10 @@ func mountDefaultDevices(rootfs string) error { return mountDevices(defaultDevices, rootfs) } -func mountSpecDevices(devices []specs.LinuxDevice, rootfs string) error { +func mountSpecDevices(devices []specs.LinuxDevice, rootfs string, log *zerolog.Logger) error { for _, dev := range devices { + log.Info().Any("dev", dev).Msg("setup device") + var absPath string if strings.Index(dev.Path, "/") == 0 { relPath := strings.TrimPrefix(dev.Path, "/") @@ -102,6 +105,11 @@ func mountSpecDevices(devices []specs.LinuxDevice, rootfs string) error { absPath = filepath.Join(rootfs, dev.Path) } + log.Info(). + Str("path", absPath). + Uint32("filemode", uint32(*dev.FileMode)). + Int("dev", int(unix.Mkdev(uint32(dev.Major), uint32(dev.Minor)))). + Msg("make node") if err := unix.Mknod( absPath, uint32(*dev.FileMode), @@ -110,6 +118,11 @@ func mountSpecDevices(devices []specs.LinuxDevice, rootfs string) error { return err } + log.Info(). + Str("path", absPath). + Int("uid", int(*dev.UID)). + Int("gid", int(*dev.GID)). + Msg("chown") if err := os.Chown( absPath, int(*dev.UID), diff --git a/internal/container/filesystem/rootfs.go b/internal/container/filesystem/rootfs.go index 3ac965b..bd3fe52 100644 --- a/internal/container/filesystem/rootfs.go +++ b/internal/container/filesystem/rootfs.go @@ -5,23 +5,27 @@ import ( "path/filepath" "github.com/opencontainers/runtime-spec/specs-go" + "github.com/rs/zerolog" ) -func SetupRootfs(root string, spec *specs.Spec) error { +func SetupRootfs(root string, spec *specs.Spec, log *zerolog.Logger) error { rootfs := root if spec.Root != nil { rootfs = filepath.Join(root, spec.Root.Path) } + log.Info().Msg("mount rootfs") if err := mountRootfs(rootfs); err != nil { return fmt.Errorf("mount rootfs: %w", err) } + log.Info().Msg("mount proc") if err := mountProc(rootfs); err != nil { return fmt.Errorf("mount proc: %w", err) } + log.Info().Msg("mount spec mounts") if err := mountSpecMounts( spec.Mounts, rootfs, @@ -29,17 +33,21 @@ func SetupRootfs(root string, spec *specs.Spec) error { return fmt.Errorf("mount spec mounts: %w", err) } + log.Info().Msg("mount default devices") if err := mountDefaultDevices(rootfs); err != nil { return fmt.Errorf("mount default devices: %w", err) } + log.Info().Msg("mount spec devices") if err := mountSpecDevices( spec.Linux.Devices, rootfs, + log, ); err != nil { return fmt.Errorf("mount spec devices: %w", err) } + log.Info().Msg("create default symlinks") if err := createSymlinks( defaultSymlinks, rootfs, diff --git a/oci.sh b/oci.sh index 960e80b..48653ed 100755 --- a/oci.sh +++ b/oci.sh @@ -15,6 +15,7 @@ RUNTIME=${RUNTIME:-./brownie} ./validation/prestart_fail/prestart_fail.t 2>&1 | tee -a results.tap ./validation/process/process.t 2>&1 | tee -a results.tap ./validation/process_capabilities/process_capabilities.t 2>&1 | tee -a results.tap +./validation/process_oom_score_adj/process_oom_score_adj.t 2>&1 | tee -a results.tap ./validation/start/start.t 2>&1 | tee -a results.tap ./validation/state/state.t 2>&1 | tee -a results.tap (! grep -F "not ok" results.tap)