-
Notifications
You must be signed in to change notification settings - Fork 381
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: prevent false positives in already running k0s instance detection #5400
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,6 +28,7 @@ import ( | |
"github.com/k0sproject/k0s/internal/pkg/dir" | ||
"github.com/k0sproject/k0s/pkg/apis/k0s/v1beta1" | ||
"github.com/k0sproject/k0s/pkg/constant" | ||
"github.com/shirou/gopsutil/v4/process" | ||
|
||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
|
||
|
@@ -97,9 +98,14 @@ func LoadRuntimeConfig(k0sVars *CfgVars) (*RuntimeConfigSpec, error) { | |
// an error is returned, which allows the controller startup to proceed to | ||
// initialize a new runtime config. | ||
if spec.Pid != 0 { | ||
if err := checkPid(spec.Pid); err != nil { | ||
isRunning, err := checkInstanceRunning(spec.Pid) | ||
if !isRunning || err != nil { | ||
defer func() { _ = spec.Cleanup() }() | ||
return nil, errors.Join(ErrK0sNotRunning, err) | ||
if !isRunning { | ||
return nil, errors.Join(ErrK0sNotRunning, err) | ||
} else if err != nil { | ||
return nil, fmt.Errorf("%w: failed to check if instance is running", err) | ||
} | ||
} | ||
} | ||
|
||
|
@@ -192,3 +198,56 @@ func (r *RuntimeConfigSpec) Cleanup() error { | |
} | ||
return nil | ||
} | ||
|
||
// returns: | ||
// - true, nil if the pid is the same k0s executable and running | ||
// - false, nil if the pid is not running or another executable | ||
// - false, error on any kind of internal error checking the status | ||
// | ||
// process does not exist -> error | ||
// process exists and is same executable -> error | ||
// process exists and is not same executable -> nil | ||
func checkInstanceRunning(pid int) (bool, error) { | ||
// create the process object | ||
proc, err := process.NewProcess(int32(pid)) | ||
if err != nil { | ||
return false, fmt.Errorf("failed to create process object: %w", err) | ||
} | ||
|
||
// first check if the pid is running | ||
isRunning, err := proc.IsRunning() | ||
if err != nil { | ||
return false, fmt.Errorf("failed to check if process is running: %w", err) | ||
} | ||
if !isRunning { | ||
return false, nil | ||
} | ||
|
||
// Get the executable path of the target pid | ||
// - no need to resolve symlinks here, it is already a resolved path | ||
exeTarget, err := proc.Exe() | ||
if err != nil { | ||
return false, fmt.Errorf("failed to get process executable path: %w", err) | ||
} | ||
|
||
// Get the executable path of the current process | ||
currentExe, err := os.Executable() | ||
if err != nil { | ||
return false, fmt.Errorf("failed to get current executable: %w", err) | ||
} | ||
|
||
// Resolve symlinks | ||
currentExe, err = filepath.EvalSymlinks(currentExe) | ||
if err != nil { | ||
return false, fmt.Errorf("failed to resolve current executable symlinks: %w", err) | ||
} | ||
|
||
// check that the executable is the same in order to issue an error, if it would be a different | ||
// than it can't be a running instance of k0s, which the check is all about | ||
if exeTarget != currentExe { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we take this approach, there is a chance that another running copy of the k0s binary won't be detected as already running. Say k0s was started by the init system and is running in the background. Then someone grabs a fresh binary and runs |
||
return false, nil | ||
} | ||
|
||
// return that k0s is running witht he same executable image | ||
return true, nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,7 +21,10 @@ import ( | |
"testing" | ||
|
||
"github.com/k0sproject/k0s/pkg/apis/k0s/v1beta1" | ||
"github.com/shirou/gopsutil/v4/process" | ||
"github.com/stretchr/testify/assert" | ||
"github.com/stretchr/testify/require" | ||
"sigs.k8s.io/yaml" | ||
) | ||
|
||
func TestLoadRuntimeConfig_Legacy(t *testing.T) { | ||
|
@@ -86,6 +89,12 @@ spec: | |
assert.ErrorIs(t, err, ErrK0sNotRunning) | ||
} | ||
|
||
// The idea behind this test is: | ||
// - test to create a new runtime config from scratch | ||
// - actually test if k0s is still running by checking against: | ||
// - the same executable with the same pid as the runtime config | ||
// - a different executable with the same pid as the runtime config | ||
// - a pid that is not running anymore | ||
Comment on lines
+94
to
+97
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice. It would be super helpful to have all of those in separate tests. |
||
func TestNewRuntimeConfig(t *testing.T) { | ||
// create a temporary directory for k0s files | ||
tempDir, err := os.MkdirTemp("", "k0s") | ||
|
@@ -110,6 +119,7 @@ func TestNewRuntimeConfig(t *testing.T) { | |
} | ||
|
||
// create a new runtime config and check if it's valid | ||
// this will set the pid property to the current process id of the test run | ||
spec, err := NewRuntimeConfig(k0sVars) | ||
assert.NoError(t, err) | ||
assert.NotNil(t, spec) | ||
|
@@ -121,7 +131,94 @@ func TestNewRuntimeConfig(t *testing.T) { | |
assert.Equal(t, "10.0.0.1", cfg.Spec.API.Address) | ||
|
||
// try to create a new runtime config when one is already active and check if it returns an error | ||
// this will fail with ErrK0sAlreadyRunning since the pid property is this currently | ||
// executing test process (with the unit test) | ||
_, err = NewRuntimeConfig(k0sVars) | ||
assert.Error(t, err) | ||
assert.ErrorIs(t, err, ErrK0sAlreadyRunning) | ||
|
||
// Start a process in the background with a shell that sleeps for a long time | ||
// (infinity is not available on default macOS zsh), this simulates | ||
// a different running process with the same pid as stored in the runtime config, but | ||
// from a different executable image | ||
cmd := getBackgroundCommand() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For these kinds of tests, you could use |
||
err = cmd.Start() | ||
require.NoError(t, err, "Failed to start process") | ||
|
||
// remember the newly started PID | ||
pid := cmd.Process.Pid | ||
cleanupPid := pid | ||
|
||
// create the process object | ||
proc, err := process.NewProcess(int32(pid)) | ||
require.NoError(t, err, "failed to create process object") | ||
|
||
// check that the pid is running | ||
isRunning, err := proc.IsRunning() | ||
require.NoError(t, err, "IsRunning works correctly for newly started background process") | ||
require.True(t, isRunning, "background process is running") | ||
|
||
// this is the cleanup function that cleans the process in case of a test failure | ||
cleanupFunc := func() { | ||
if cleanupPid != 0 { | ||
// Shut down the child process (killing it is fine here) | ||
err = proc.Kill() | ||
require.NoError(t, err, "Failed to shut down child process") | ||
|
||
// Wait for the command to exit (ingore the error, since we're just cleaning up) | ||
_ = cmd.Wait() | ||
|
||
// set the cleanupPid to 0 to indicate successful termination and not clean up twice | ||
cleanupPid = 0 | ||
} | ||
} | ||
defer cleanupFunc() | ||
|
||
// modify the pid in the runtime config | ||
err = updateRuntimeConfigPID(k0sVars.RuntimeConfigPath, pid) | ||
require.NoError(t, err) | ||
|
||
// create a new runtime config, but this time with a pid of a process that actually exists | ||
// but is from a different executable image | ||
_, err = NewRuntimeConfig(k0sVars) | ||
require.NoError(t, err) | ||
|
||
// now kill the other process in order to test for the same pid, with the difference | ||
// that we now know it's from a process that is definitely not running anymore | ||
cleanupFunc() | ||
|
||
// modify the pid in the runtime config (again to the same pid, which is not running anymore) | ||
// this needs to be done since the previous PID value was overwritten with the new | ||
// config from the last call to NewRuntimeConfig) | ||
err = updateRuntimeConfigPID(k0sVars.RuntimeConfigPath, pid) | ||
require.NoError(t, err) | ||
|
||
// create a new runtime config, this should also succeed since the pid is not running | ||
_, err = NewRuntimeConfig(k0sVars) | ||
assert.NoError(t, err) | ||
} | ||
|
||
func updateRuntimeConfigPID(path string, pid int) error { | ||
content, err := os.ReadFile(path) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
config := &RuntimeConfig{} | ||
err = yaml.Unmarshal(content, config) | ||
if err != nil { | ||
return err | ||
} | ||
config.Spec.Pid = pid | ||
|
||
content, err = yaml.Marshal(config) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
err = os.WriteFile(path, content, 0600) | ||
if err != nil { | ||
return err | ||
} | ||
return nil | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd rather hardcode something like
os.Stat(filepath.Join("/proc", pid, "exe"))
here rather than pulling in a rather large dependency. This wouldn't work on Windows, but I'd revisit this another time. Ifruntime.GOOS
is not"linux"
, you could simply skip this function call, or return successful unconditionally, or return an error that will unwrap toerrors.ErrUnsupported
and check for this on the call sites ...There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe
os.Args[0]
would be a platform-agnostic alternative?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually my first PR commit had just a linux impl using the procfs. @twz123 the reason for this using procfs or something else to find the executable path, is actually that you need to find the executable path of a foreign pid too, not just your own.
I also think it would be much better to just implement something for Linux, or maybe even drop the PR completely, if you plan to change the runtime config parts anyway soon. Here was the original minimal change to checkPid that will only work on Linux:
e19014e#diff-406aeff8e9fcab83e1f62a534a0b5f9292b2cad0bf7d874a526fc15a9f6e8fe8L27
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
... of course 🤦♂️
Not sure about the soon part. It's definitely something worth doing, but that issue is lying around for years already, and is not really on the roadmap. You could try to tackle it, If you're interested! OTOH, we could also try to get this PR merged in the meantime.