Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fail if the backup command in the annotation exits != 0 #1027

Merged
merged 1 commit into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/operator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ var (
&cli.StringFlag{Destination: &cfg.Config.FileExtensionAnnotation, Name: "fileextensionannotation", EnvVars: []string{"BACKUP_FILEEXTENSIONANNOTATION"}, Value: "k8up.io/file-extension", Usage: "set the annotation name where the file extension is stored for backup commands"},

&cli.IntFlag{Destination: &cfg.Config.GlobalKeepJobs, Hidden: true, Name: "globalkeepjobs", EnvVars: []string{"BACKUP_GLOBALKEEPJOBS"}, Value: -1, DefaultText: "unlimited", Usage: "set the number of old jobs to keep when cleaning up, applies to all job types"},
&cli.IntFlag{Destination: &cfg.Config.GlobalBackoffLimit, Name: "global-backoff-limit", EnvVars: []string{"BACKUP_GLOBAL_BACKOFF_LIMIT"}, Value: 6, Usage: "set the backoff limit for all backup jobs"},
&cli.IntFlag{Destination: &cfg.Config.GlobalFailedJobsHistoryLimit, Name: "global-failed-jobs-history-limit", EnvVars: []string{"BACKUP_GLOBAL_FAILED_JOBS_HISTORY_LIMIT"}, Value: 3, Usage: "set the number of old, failed jobs to keep when cleaning up, applies to all job types"},
&cli.IntFlag{Destination: &cfg.Config.GlobalSuccessfulJobsHistoryLimit, Name: "global-successful-jobs-history-limit", EnvVars: []string{"BACKUP_GLOBAL_SUCCESSFUL_JOBS_HISTORY_LIMIT"}, Value: 3, Usage: "set the number of old, successful jobs to keep when cleaning up, applies to all job types"},
&cli.IntFlag{Destination: &cfg.Config.GlobalConcurrentArchiveJobsLimit, Name: "global-concurrent-archive-jobs-limit", EnvVars: []string{"BACKUP_GLOBAL_CONCURRENT_ARCHIVE_JOBS_LIMIT"}, DefaultText: "unlimited", Usage: "set the limit of concurrent archive jobs"},
Expand Down
49 changes: 49 additions & 0 deletions e2e/definitions/annotated-subject/deployment-error.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: annotated-subject-deployment
namespace: k8up-e2e-subject
spec:
replicas: 1
selector:
matchLabels:
app: subject
template:
metadata:
labels:
app: subject
annotations:
k8up.io/backupcommand: 'invalid'
k8up.io/backupcommand-container: subject-container
spec:
containers:
- image: busybox
imagePullPolicy: IfNotPresent
name: dummy-container-blocking-first-position
command:
- "/bin/sh"
- "-c"
- "sleep infinity"
- name: subject-container
image: quay.io/prometheus/busybox:latest
imagePullPolicy: IfNotPresent
args:
- sh
- -c
- |
sleep infinity
securityContext:
runAsUser: $ID
volumeMounts:
- name: volume
mountPath: /data
env:
- name: BACKUP_FILE_CONTENT
value: ""
- name: BACKUP_FILE_NAME
value: ""
volumes:
- name: volume
persistentVolumeClaim:
claimName: subject-pvc
4 changes: 0 additions & 4 deletions e2e/definitions/operator/deploy.yaml

This file was deleted.

2 changes: 2 additions & 0 deletions e2e/definitions/operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ k8up:
envVars:
- name: K8UP_DEBUG
value: "true"
- name: BACKUP_GLOBAL_BACKOFF_LIMIT
value: "2"
23 changes: 23 additions & 0 deletions e2e/lib/k8up.bash
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,15 @@ given_an_annotated_subject() {
echo "✅ The annotated subject is ready"
}

given_a_broken_annotated_subject() {
require_args 2 ${#}

kubectl apply -f definitions/pv/pvc.yaml
yq e '.spec.template.spec.containers[1].securityContext.runAsUser='$(id -u)' ' definitions/annotated-subject/deployment-error.yaml | kubectl apply -f -

echo "✅ The annotated subject is ready"
}

given_an_annotated_subject_pod() {
require_args 2 ${#}

Expand Down Expand Up @@ -457,6 +466,20 @@ wait_until() {
kubectl -n "${ns}" wait --timeout 5m --for "condition=${condition}" "${object}"
}

wait_for_until_jsonpath() {
require_args 3 ${#}

local object condition ns
object=${1}
until=${2}
jsonpath=${3}
ns=${NAMESPACE=${DETIK_CLIENT_NAMESPACE}}

echo "Waiting for '${object}' in namespace '${ns}' to become '${condition}' ..."
kubectl -n "${ns}" wait --timeout "${until}" --for "${jsonpath}" "${object}"
}


expect_file_in_container() {
require_args 4 ${#}

Expand Down
31 changes: 31 additions & 0 deletions e2e/test-12-annotated-failure.bats
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env bats

load "lib/utils"
load "lib/detik"
load "lib/k8up"

# shellcheck disable=SC2034
DETIK_CLIENT_NAME="kubectl"
# shellcheck disable=SC2034
DETIK_CLIENT_NAMESPACE="k8up-e2e-subject"
# shellcheck disable=SC2034
DEBUG_DETIK="true"

@test "Annotated app, When creating a backup, Then expect Error" {
expected_content="expected content: $(timestamp)"
expected_filename="expected_filename.txt"

given_a_running_operator
given_a_clean_ns
given_s3_storage
given_a_broken_annotated_subject "${expected_filename}" "${expected_content}"

kubectl apply -f definitions/secrets
yq e '.spec.podSecurityContext.runAsUser='$(id -u)'' definitions/backup/backup.yaml | kubectl apply -f -

try "at most 10 times every 5s to get backup named 'k8up-backup' and verify that '.status.started' is 'true'"
verify_object_value_by_label job 'k8up.io/owned-by=backup_k8up-backup' '.status.active' 1 true

wait_for_until_jsonpath backup/k8up-backup 2m 'jsonpath={.status.conditions[?(@.type=="Completed")].reason}=Failed'

}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/k8up-io/k8up/v2

go 1.21
go 1.23

require (
github.com/firepear/qsplit/v2 v2.5.0
Expand Down
8 changes: 5 additions & 3 deletions operator/backupcontroller/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/utils/ptr"
controllerruntime "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
)
Expand Down Expand Up @@ -236,7 +237,7 @@ func (b *BackupExecutor) startBackup(ctx context.Context) error {
}

index := 0
for _, batchJob := range backupJobs {
for name, batchJob := range backupJobs {
_, err = controllerruntime.CreateOrUpdate(ctx, b.Generic.Config.Client, batchJob.job, func() error {
mutateErr := job.MutateBatchJob(ctx, batchJob.job, b.backup, b.Generic.Config, b.Client)
if mutateErr != nil {
Expand All @@ -262,16 +263,17 @@ func (b *BackupExecutor) startBackup(ctx context.Context) error {
}
// each job sleeps for index seconds to avoid concurrent restic repository creation. Not the prettiest way but it works and a repository
// is created only once usually.
if index > 0 {
if name == "prebackup" || index != 0 {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This fixes another small but unrelated issue.

If there's a PVC backup AND prebackup commands, it can happen that both want to restic init at the same time, which will corrupt the repo.

batchJob.job.Spec.Template.Spec.Containers[0].Env = append(batchJob.job.Spec.Template.Spec.Containers[0].Env, corev1.EnvVar{
Name: "SLEEP_DURATION",
Value: (time.Duration(index) * time.Second).String(),
Value: (3 * time.Second).String(),
})
}
b.backup.Spec.AppendEnvFromToContainer(&batchJob.job.Spec.Template.Spec.Containers[0])
batchJob.job.Spec.Template.Spec.Volumes = append(batchJob.job.Spec.Template.Spec.Volumes, batchJob.volumes...)
batchJob.job.Spec.Template.Spec.Volumes = append(batchJob.job.Spec.Template.Spec.Volumes, utils.AttachTLSVolumes(b.backup.Spec.Volumes)...)
batchJob.job.Spec.Template.Spec.Containers[0].VolumeMounts = append(b.newVolumeMounts(batchJob.volumes), b.attachTLSVolumeMounts()...)
batchJob.job.Spec.BackoffLimit = ptr.To(int32(cfg.Config.GlobalBackoffLimit))

batchJob.job.Spec.Template.Spec.Containers[0].Args = b.setupArgs()

Expand Down
1 change: 1 addition & 0 deletions operator/cfg/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ type Configuration struct {
GlobalKeepJobs int
GlobalFailedJobsHistoryLimit int
GlobalSuccessfulJobsHistoryLimit int
GlobalBackoffLimit int
GlobalRepoPassword string
GlobalRestoreS3AccessKey string
GlobalRestoreS3Bucket string
Expand Down
3 changes: 3 additions & 0 deletions restic/kubernetes/pod_exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"fmt"
"io"
"os"
"strings"

"github.com/firepear/qsplit/v2"
Expand Down Expand Up @@ -73,6 +74,8 @@ func PodExec(pod BackupPod, log logr.Logger) (*ExecData, error) {

if err != nil {
execLogger.Error(err, "streaming data failed", "namespace", pod.Namespace, "pod", pod.PodName)
// we just completely hard fail the whole backup pod
os.Exit(1)
Kidswiss marked this conversation as resolved.
Show resolved Hide resolved
return
}
}()
Expand Down
Loading