diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml new file mode 100644 index 0000000..80d85c4 --- /dev/null +++ b/.github/dependabot.yaml @@ -0,0 +1,14 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + open-pull-requests-limit: 5 + schedule: + interval: 'weekly' + day: 'tuesday' + - package-ecosystem: 'gomod' + directory: '/' + schedule: + interval: 'weekly' + day: 'tuesday' + open-pull-requests-limit: 5 \ No newline at end of file diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000..e8e56e6 --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,42 @@ +name: Go + +on: [push] +permissions: + contents: read + pull-requests: read + checks: write +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Go + uses: actions/setup-go@v4 + with: + go-version-file: go.mod + + - name: Verify + run: go mod verify + + - name: Build + run: go build ./... + + - name: golangci-lint + uses: golangci/golangci-lint-action@v4 + with: + version: latest + install-mode: binary + + - name: Prepare manifests for linting + run: | + mkdir manifests + go run deploy/main.go my-images v0.0.8 vanilla > manifests/vanilla.yaml + go run deploy/main.go my-images v0.0.8 ocp > manifests/ocp.yaml + go run deploy/main.go my-images v0.0.8 vanilla my-secret > manifests/vanilla-with-secret.yaml + go run deploy/main.go my-images v0.0.8 ocp my-secret > manifests/ocp-with-secret.yaml + + - name: kube-linter + uses: stackrox/kube-linter-action@v1.0.4 + with: + directory: manifests diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..71ce8d7 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,8 @@ +FROM golang:1.22 AS builder +LABEL authors="porridge@redhat.com" +COPY . /image-prefetcher +RUN cd /image-prefetcher && CGO_ENABLED=0 go build -a -ldflags '-extldflags "-static"' . && find . -ls + +FROM scratch +COPY --from=builder /image-prefetcher/image-prefetcher / +CMD ["image-prefetcher"] \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..9930685 --- /dev/null +++ b/README.md @@ -0,0 +1,83 @@ +# Image prefetcher + +This is a utility for quickly fetching OCI images onto Kubernetes cluster nodes. + +Talks directly to Container Runtime Interface ([CRI](https://kubernetes.io/docs/concepts/architecture/cri/)) API to: +- fetch all images on all nodes in parallel, +- retry pulls with increasingly longer timeouts. This prevents getting stuck on stalled connections to image registry. + +## Architecture + +### `image-prefetcher` + +- main binary, +- meant to be run in pods of a DaemonSet, +- shipped as an OCI image, +- provides two subcommands: + - `fetch`: runs the actual image pulls via CRI, meant to run as an init container, + Requires access to the CRI UNIX domain socket from the host. + - `sleep`: just sleeps forever, meant to run as the main container, + +### `deploy` + +- a helper command-line utility for generating `image-prefetcher` manifests, +- separate go module, with no dependencies outside Go standard library. + +## Usage + +1. First, run the `deploy` binary to generate a manifest for an instance of `image-prefetcher`. + + You can run many instances independently. + + It requires a few arguments: + - **name** of the instance. + This also determines the name of a `ConfigMap` supplying names of images to fetch. + - `image-prefetcher` OCI image **version**. See [list of existing tags](https://quay.io/repository/mowsiany/image-prefetcher?tab=tags). + - **cluster flavor**. Currently one of: + - `vanilla`: a generic Kubernetes distribution without additional restrictions. + - `ocp`: OpenShift, which requires explicitly granting special privileges. + - optional **image pull `Secret` name**. Required if the images are not pullable anonymously. + This image pull secret should be usable for all images fetched by the given instance. + If provided, it must be of type `kubernetes.io/dockerconfigjson` and exist in the same namespace. + + Example: + + ``` + go run github.com/stackrox/image-prefetcher/deploy@main my-images v0.0.8 vanilla > manifest.yaml + ``` + +2. Prepare an image list. This should be a plain text file with one image name per line. + Lines starting with `#` and blank ones are ignored. + ``` + echo debian:latest >> image-list.txt + echo quay.io/strimzi/kafka:latest-kafka-3.7.0 >> image-list.txt + ``` + +3. Deploy: + ``` + kubectl create namespace prefetch-images + kubectl create -n prefetch-images configmap my-images --from-file="images.txt=image-list.txt" + kubectl apply -n prefetch-images -f manifest.yaml + ``` + +4. Wait for the pull to complete, with a timeout: + ``` + kubectl rollout -n prefetch-images status daemonset my-images --timeout 5m + ``` + +5. If something goes wrong, look at logs: + ``` + kubectl logs -n prefetch-images daemonset/my-images -c prefetch + ``` + +### Customization + +You can tweak certain parameters such as timeouts by editing `args` in the above manifest. +See the [fetch command](./cmd/fetch.go) for accepted flags. + +## Limitations + +This utility was designed for small, ephemeral test clusters, in order to improve reliability and speed of end-to-end tests. + +If deployed on larger clusters, it may have a "thundering herd" effect on the OCI registries it pulls from. +This is because all images are pulled from all nodes in parallel. diff --git a/cmd/fetch.go b/cmd/fetch.go new file mode 100644 index 0000000..4312451 --- /dev/null +++ b/cmd/fetch.go @@ -0,0 +1,94 @@ +package cmd + +import ( + "log/slog" + "os" + "strings" + "time" + + "github.com/stackrox/image-prefetcher/internal" + + "github.com/spf13/cobra" +) + +// fetchCmd represents the fetch command +var fetchCmd = &cobra.Command{ + Use: "fetch", + Short: "Fetch images using CRI.", + Long: `This subcommand is intended to run in an init container of pods of a DaemonSet. + +It talks to Container Runtime Interface API to pull images in parallel, with retries.`, + RunE: func(cmd *cobra.Command, args []string) error { + opts := &slog.HandlerOptions{AddSource: true} + if debug { + opts.Level = slog.LevelDebug + } + logger := slog.New(slog.NewTextHandler(os.Stderr, opts)) + timing := internal.TimingConfig{ + ImageListTimeout: imageListTimeout, + InitialPullAttemptTimeout: initialPullAttemptTimeout, + MaxPullAttemptTimeout: maxPullAttemptTimeout, + OverallTimeout: overallTimeout, + InitialPullAttemptDelay: initialPullAttemptDelay, + MaxPullAttemptDelay: maxPullAttemptDelay, + } + imageList, err := loadImageNamesFromFile(imageListFile) + if err != nil { + return err + } + imageList = append(imageList, args...) + return internal.Run(logger, criSocket, dockerConfigJSONPath, timing, imageList...) + }, +} + +var ( + criSocket string + dockerConfigJSONPath string + imageListFile string + debug bool + imageListTimeout = time.Minute + initialPullAttemptTimeout = 30 * time.Second + maxPullAttemptTimeout = 5 * time.Minute + overallTimeout = 20 * time.Minute + initialPullAttemptDelay = time.Second + maxPullAttemptDelay = 10 * time.Minute +) + +func init() { + rootCmd.AddCommand(fetchCmd) + + fetchCmd.Flags().StringVar(&criSocket, "cri-socket", "/run/containerd/containerd.sock", "Path to CRI UNIX socket.") + fetchCmd.Flags().StringVar(&dockerConfigJSONPath, "docker-config", "", "Path to docker config json file.") + fetchCmd.Flags().StringVar(&imageListFile, "image-list-file", "", "Path to text file containing images to pull (one per line).") + fetchCmd.Flags().BoolVar(&debug, "debug", false, "Whether to enable debug logging.") + + fetchCmd.Flags().DurationVar(&imageListTimeout, "image-list-timeout", imageListTimeout, "Timeout for image list calls (for debugging).") + fetchCmd.Flags().DurationVar(&initialPullAttemptTimeout, "initial-pull-attempt-timeout", initialPullAttemptTimeout, "Timeout for initial image pull call. Each subsequent attempt doubles it until max.") + fetchCmd.Flags().DurationVar(&maxPullAttemptTimeout, "max-pull-attempt-timeout", maxPullAttemptTimeout, "Maximum timeout for image pull call.") + fetchCmd.Flags().DurationVar(&overallTimeout, "overall-timeout", overallTimeout, "Overall timeout for a single run.") + fetchCmd.Flags().DurationVar(&initialPullAttemptDelay, "initial-pull-attempt-delay", initialPullAttemptDelay, "Timeout for initial delay between pulls of the same image. Each subsequent attempt doubles it until max.") + fetchCmd.Flags().DurationVar(&maxPullAttemptDelay, "max-pull-attempt-delay", maxPullAttemptDelay, "Maximum delay between pulls of the same image.") +} + +func loadImageNamesFromFile(fileName string) ([]string, error) { + if fileName == "" { + return nil, nil + } + bytes, err := os.ReadFile(fileName) + if err != nil { + return nil, err + } + return parseImageNames(bytes), nil +} + +func parseImageNames(bytes []byte) []string { + var imageNames []string + for _, line := range strings.Split(string(bytes), "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + imageNames = append(imageNames, line) + } + return imageNames +} diff --git a/cmd/root.go b/cmd/root.go new file mode 100644 index 0000000..36215eb --- /dev/null +++ b/cmd/root.go @@ -0,0 +1,23 @@ +package cmd + +import ( + "fmt" + "github.com/spf13/cobra" + "log" +) + +// rootCmd represents the base command when called without any subcommands +var rootCmd = &cobra.Command{ + Use: "image-prefetcher", + Short: "An image prefetching utility.", + Run: func(cmd *cobra.Command, args []string) { + fmt.Println("Please use one of the subcommands. See --help") + }, +} + +// Execute is the entry point to this program. +func Execute() { + if err := rootCmd.Execute(); err != nil { + log.Fatal(err) + } +} diff --git a/cmd/sleep.go b/cmd/sleep.go new file mode 100644 index 0000000..049d43b --- /dev/null +++ b/cmd/sleep.go @@ -0,0 +1,26 @@ +package cmd + +import ( + "github.com/spf13/cobra" + "os" + "os/signal" + "syscall" +) + +// sleepCmd represents the sleep command +var sleepCmd = &cobra.Command{ + Use: "sleep", + Short: "Sleep forever.", + Long: `This can be used as main container of a DaemonSet to avoid having to pull another image.`, + Run: func(cmd *cobra.Command, args []string) { + println("Sleeping...") + cancelChan := make(chan os.Signal, 1) + signal.Notify(cancelChan, syscall.SIGTERM, syscall.SIGINT) + s := <-cancelChan + println("Terminating due to", s) + }, +} + +func init() { + rootCmd.AddCommand(sleepCmd) +} diff --git a/deploy/deployment.yaml.gotpl b/deploy/deployment.yaml.gotpl new file mode 100644 index 0000000..220a506 --- /dev/null +++ b/deploy/deployment.yaml.gotpl @@ -0,0 +1,123 @@ +{{ if .NeedsPrivileged }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: privileged-scc-use +rules: +- apiGroups: + - security.openshift.io + resourceNames: + - privileged + resources: + - securitycontextconstraints + verbs: + - use +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: prefetcher-privileged +subjects: + - kind: ServiceAccount + name: default +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: privileged-scc-use +--- +{{ end }} +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ .Name }} + annotations: + ignore-check.kube-linter.io/privilege-escalation-container: "Needs access to CRI socket." + ignore-check.kube-linter.io/privileged-container: "Needs access to CRI socket." + ignore-check.kube-linter.io/run-as-non-root: "Needs access to CRI socket." +spec: + selector: + matchLabels: + app: {{ .Name }} + template: + metadata: + labels: + app: {{ .Name }} + {{ if .NeedsPrivileged }} + annotations: + openshift.io/required-scc: privileged + {{ end }} + spec: + tolerations: + # Broad toleration to match stackrox collector. + - operator: "Exists" + initContainers: + - name: prefetch + image: {{ .Image }}:{{ .Version }} + args: + - "/image-prefetcher" + - "fetch" + {{ if .Secret }} + - "--docker-config=/tmp/pull-secret/.dockerconfigjson" + {{ end }} + - "--image-list-file=/tmp/list/images.txt" + {{ if .IsCRIO }} + - "--cri-socket=/tmp/cri/crio.sock" + {{ else }} + - "--cri-socket=/tmp/cri/containerd.sock" + {{ end }} + resources: + requests: + cpu: "20m" + memory: "16Mi" + limits: + cpu: "1" + memory: "256Mi" + volumeMounts: + - name: cri-socket-dir + mountPath: "/tmp/cri" + readOnly: true + - name: image-list + mountPath: "/tmp/list" + readOnly: true + {{ if .Secret }} + - mountPath: /tmp/pull-secret + name: pull-secret + readOnly: true + {{ end }} + securityContext: + readOnlyRootFilesystem: true + {{ if .NeedsPrivileged }} + allowPrivilegeEscalation: true + privileged: true + {{ end }} + containers: + - name: sleep + image: {{ .Image }}:{{ .Version }} + args: + - "/image-prefetcher" + - "sleep" + resources: + requests: + cpu: "5m" + memory: "16Mi" + limits: + cpu: "100m" + memory: "64Mi" + securityContext: + readOnlyRootFilesystem: true + volumes: + - name: cri-socket-dir + hostPath: + {{ if .IsCRIO }} + path: "/var/run/crio" + {{ else }} + path: "/var/run/containerd" + {{ end }} + - name: image-list + configMap: + name: {{ .Name }} + {{ if .Secret }} + - name: pull-secret + secret: + secretName: {{ .Secret }} + {{ end }} diff --git a/deploy/go.mod b/deploy/go.mod new file mode 100644 index 0000000..bf6c5e6 --- /dev/null +++ b/deploy/go.mod @@ -0,0 +1,3 @@ +module github.com/stackrox/image-prefetcher/deploy + +go 1.21 diff --git a/deploy/main.go b/deploy/main.go new file mode 100644 index 0000000..235f7bb --- /dev/null +++ b/deploy/main.go @@ -0,0 +1,50 @@ +package main + +import ( + _ "embed" + "log" + "os" + "text/template" +) + +type settings struct { + Name string + Image string + Version string + Secret string + IsCRIO bool + NeedsPrivileged bool +} + +// TODO(porridge): change to a dedicated org once it's created. +const imageRepo = "quay.io/mowsiany/image-prefetcher" + +//go:embed deployment.yaml.gotpl +var daemonSetTemplate string + +func main() { + if len(os.Args) < 4 { + println("Usage:", os.Args[0], " vanilla|ocp [secret]") + os.Exit(1) + } + name := os.Args[1] + version := os.Args[2] + isOcp := os.Args[3] == "ocp" + secret := "" + if len(os.Args) > 4 { + secret = os.Args[4] + } + + s := settings{ + Name: name, + Image: imageRepo, + Version: version, + Secret: secret, + IsCRIO: isOcp, + NeedsPrivileged: isOcp, + } + tmpl := template.Must(template.New("deployment").Parse(daemonSetTemplate)) + if err := tmpl.Execute(os.Stdout, s); err != nil { + log.Fatal(err) + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..cd0d33b --- /dev/null +++ b/go.mod @@ -0,0 +1,66 @@ +module github.com/stackrox/image-prefetcher + +go 1.21 + +toolchain go1.21.7 + +require ( + github.com/spf13/cobra v1.8.0 + google.golang.org/grpc v1.58.3 + k8s.io/cri-api v0.29.3 + k8s.io/kubernetes v1.29.3 +) + +// This set of replaces is needed to use k8s.io/kubernetes +// See https://github.com/kubernetes/kubernetes/issues/79384#issuecomment-505627280 +// for more background. +// TODO(porridge): upgrade to 1.30.x and document the process +replace ( + k8s.io/cli-runtime => k8s.io/cli-runtime v1.29.3 + k8s.io/cloud-provider => k8s.io/cloud-provider v1.29.3 + k8s.io/cluster-bootstrap => k8s.io/cluster-bootstrap v1.29.3 + k8s.io/component-helpers => k8s.io/component-helpers v1.29.3 + k8s.io/controller-manager => k8s.io/controller-manager v1.29.3 + k8s.io/csi-translation-lib => k8s.io/csi-translation-lib v1.29.3 + k8s.io/dynamic-resource-allocation => k8s.io/dynamic-resource-allocation v1.29.3 + k8s.io/endpointslice => k8s.io/endpointslice v1.29.3 + k8s.io/kube-aggregator => k8s.io/kube-aggregator v1.29.3 + k8s.io/kube-controller-manager => k8s.io/kube-controller-manager v1.29.3 + k8s.io/kube-proxy => k8s.io/kube-proxy v1.29.3 + k8s.io/kube-scheduler => k8s.io/kube-scheduler v1.29.3 + k8s.io/kubectl => k8s.io/kubectl v1.29.3 + k8s.io/kubelet => k8s.io/kubelet v1.29.3 + k8s.io/legacy-cloud-providers => k8s.io/legacy-cloud-providers v1.29.3 + k8s.io/metrics => k8s.io/metrics v1.29.3 + k8s.io/mount-utils => k8s.io/mount-utils v1.29.3 + k8s.io/pod-security-admission => k8s.io/pod-security-admission v1.29.3 + k8s.io/sample-apiserver => k8s.io/sample-apiserver v1.29.3 +) + +require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/blang/semver/v4 v4.0.0 // indirect + github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/go-logr/logr v1.3.0 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/kr/text v0.2.0 // indirect + github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect + github.com/prometheus/client_golang v1.16.0 // indirect + github.com/prometheus/client_model v0.4.0 // indirect + github.com/prometheus/common v0.44.0 // indirect + github.com/prometheus/procfs v0.10.1 // indirect + github.com/spf13/pflag v1.0.5 // indirect + golang.org/x/net v0.19.0 // indirect + golang.org/x/sys v0.15.0 // indirect + golang.org/x/text v0.14.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect + google.golang.org/protobuf v1.33.0 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + k8s.io/apiextensions-apiserver v0.29.3 // indirect + k8s.io/apimachinery v0.29.3 // indirect + k8s.io/apiserver v0.29.3 // indirect + k8s.io/component-base v0.29.3 // indirect + k8s.io/klog/v2 v2.110.1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..b7211ba --- /dev/null +++ b/go.sum @@ -0,0 +1,111 @@ +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= +github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= +github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= +github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY= +github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= +github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.16.0 h1:yk/hx9hDbrGHovbci4BY+pRMfSuuat626eFsHb7tmT8= +github.com/prometheus/client_golang v1.16.0/go.mod h1:Zsulrv/L9oM40tJ7T815tM89lFEugiJ9HzIqaAx4LKc= +github.com/prometheus/client_model v0.4.0 h1:5lQXD3cAg1OXBf4Wq03gTrXHeaV0TQvGfUooCfx1yqY= +github.com/prometheus/client_model v0.4.0/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU= +github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY= +github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY= +github.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+PymziUAg= +github.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= +github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c= +golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U= +golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc= +golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d h1:uvYuEyMHKNt+lT4K3bN6fGswmK8qSvcreM3BwjDh+y4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M= +google.golang.org/grpc v1.58.3 h1:BjnpXut1btbtgN/6sp+brB2Kbm2LjNXnidYujAVbSoQ= +google.golang.org/grpc v1.58.3/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0= +google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/apiextensions-apiserver v0.29.3 h1:9HF+EtZaVpFjStakF4yVufnXGPRppWFEQ87qnO91YeI= +k8s.io/apiextensions-apiserver v0.29.3/go.mod h1:po0XiY5scnpJfFizNGo6puNU6Fq6D70UJY2Cb2KwAVc= +k8s.io/apimachinery v0.29.3 h1:2tbx+5L7RNvqJjn7RIuIKu9XTsIZ9Z5wX2G22XAa5EU= +k8s.io/apimachinery v0.29.3/go.mod h1:hx/S4V2PNW4OMg3WizRrHutyB5la0iCUbZym+W0EQIU= +k8s.io/apiserver v0.29.3 h1:xR7ELlJ/BZSr2n4CnD3lfA4gzFivh0wwfNfz9L0WZcE= +k8s.io/apiserver v0.29.3/go.mod h1:hrvXlwfRulbMbBgmWRQlFru2b/JySDpmzvQwwk4GUOs= +k8s.io/component-base v0.29.3 h1:Oq9/nddUxlnrCuuR2K/jp6aflVvc0uDvxMzAWxnGzAo= +k8s.io/component-base v0.29.3/go.mod h1:Yuj33XXjuOk2BAaHsIGHhCKZQAgYKhqIxIjIr2UXYio= +k8s.io/cri-api v0.29.3 h1:ppKSui+hhTJW774Mou6x+/ealmzt2jmTM0vsEQVWrjI= +k8s.io/cri-api v0.29.3/go.mod h1:3X7EnhsNaQnCweGhQCJwKNHlH7wHEYuKQ19bRvXMoJY= +k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0= +k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= +k8s.io/kubernetes v1.29.3 h1:EuOAKN4zpiP+kBx/0e9yS5iBkPSyLml19juOqZxBtDw= +k8s.io/kubernetes v1.29.3/go.mod h1:CP+Z+S9haxyB7J+nV6ywYry4dqlphArPXjcc0CsBVXc= +k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= +k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= diff --git a/internal/main.go b/internal/main.go new file mode 100644 index 0000000..05f67bd --- /dev/null +++ b/internal/main.go @@ -0,0 +1,151 @@ +package internal + +import ( + "context" + "encoding/json" + "fmt" + "log/slog" + "os" + "sync" + "time" + + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + criV1 "k8s.io/cri-api/pkg/apis/runtime/v1" + "k8s.io/kubernetes/pkg/credentialprovider" +) + +type TimingConfig struct { + ImageListTimeout time.Duration + InitialPullAttemptTimeout time.Duration + MaxPullAttemptTimeout time.Duration + OverallTimeout time.Duration + InitialPullAttemptDelay time.Duration + MaxPullAttemptDelay time.Duration +} + +func Run(logger *slog.Logger, criSocketPath string, dockerConfigJSONPath string, timing TimingConfig, imageNames ...string) error { + ctx, cancel := context.WithTimeout(context.Background(), timing.OverallTimeout) + defer cancel() + + clientConn, err := grpc.DialContext(ctx, "unix://"+criSocketPath, grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + return fmt.Errorf("failed to dial CRI socket %q: %w", criSocketPath, err) + } + client := criV1.NewImageServiceClient(clientConn) + + if err := listImagesForDebugging(ctx, logger, client, timing.ImageListTimeout, "before"); err != nil { + return fmt.Errorf("failed to list images for debugging before pulling: %w", err) + } + + kr := credentialprovider.BasicDockerKeyring{} + if err := loadPullSecret(logger, &kr, dockerConfigJSONPath); err != nil { + return fmt.Errorf("failed to load image pull secrets: %w", err) + } + + var wg sync.WaitGroup + for _, imageName := range imageNames { + auths := getAuthsForImage(ctx, logger, &kr, imageName) + for i, auth := range auths { + wg.Add(1) + request := &criV1.PullImageRequest{ + Image: &criV1.ImageSpec{ + Image: imageName, + }, + Auth: auth, + } + go pullImageWithRetries(ctx, logger.With("image", imageName, "authNum", i), &wg, client, request, timing) + } + } + wg.Wait() + logger.Info("pulling images finished") + if err := listImagesForDebugging(ctx, logger, client, timing.ImageListTimeout, "after"); err != nil { + return fmt.Errorf("failed to list images for debugging after pulling: %w", err) + } + return nil +} + +func listImagesForDebugging(ctx context.Context, logger *slog.Logger, client criV1.ImageServiceClient, timeout time.Duration, stage string) error { + if !logger.Enabled(ctx, slog.LevelDebug) { + return nil + } + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + logger.DebugContext(ctx, "starting to list images") + imagesResp, err := client.ListImages(ctx, &criV1.ListImagesRequest{}) + if err != nil { + return fmt.Errorf("failed to call ListImages: %w", err) + } + logger.DebugContext(ctx, "finished listing images") + for _, i := range imagesResp.Images { + logger.DebugContext(ctx, "image present in runtime", "image", i, "stage", stage) + } + return nil +} + +func loadPullSecret(logger *slog.Logger, kr *credentialprovider.BasicDockerKeyring, dockerConfigJSONPath string) error { + if dockerConfigJSONPath == "" { + logger.Info("no image pull secret path provided, will pull without credentials") + return nil + } + f, err := os.ReadFile(dockerConfigJSONPath) + if err != nil { + return fmt.Errorf("failed read %q: %w", dockerConfigJSONPath, err) + } + dockerConfigJSON := credentialprovider.DockerConfigJSON{} + if err := json.Unmarshal(f, &dockerConfigJSON); err != nil { + return fmt.Errorf("unmarshalling docker config failed: %w", err) + } + kr.Add(dockerConfigJSON.Auths) + return nil +} + +func getAuthsForImage(ctx context.Context, logger *slog.Logger, kr credentialprovider.DockerKeyring, imageName string) []*criV1.AuthConfig { + credsList, _ := kr.Lookup(imageName) + var auths []*criV1.AuthConfig + if len(credsList) == 0 { + logger.DebugContext(ctx, "no credentials present for image", "image", imageName) + // un-authenticated pull + auths = append(auths, nil) + } + for _, creds := range credsList { + auth := &criV1.AuthConfig{ + Username: creds.Username, + Password: creds.Password, + Auth: creds.Auth, + ServerAddress: creds.ServerAddress, + IdentityToken: creds.IdentityToken, + RegistryToken: creds.RegistryToken, + } + auths = append(auths, auth) + } + return auths +} + +func pullImageWithRetries(ctx context.Context, logger *slog.Logger, wg *sync.WaitGroup, client criV1.ImageServiceClient, request *criV1.PullImageRequest, timing TimingConfig) { + defer wg.Done() + attemptTimeout := timing.InitialPullAttemptTimeout + delay := timing.InitialPullAttemptDelay + for { + logger.Info("attempting image pull", "timeout", attemptTimeout) + attemptCtx, cancel := context.WithTimeout(ctx, attemptTimeout) + start := time.Now() + response, err := client.PullImage(attemptCtx, request) + elapsed := time.Since(start) + cancel() + if err == nil { + logger.InfoContext(ctx, "image pulled successfully", "response", response, "elapsed", elapsed) + return + } + logger.ErrorContext(ctx, "image failed to pull", "error", err, "timeout", attemptTimeout, "elapsed", elapsed) + if ctx.Err() != nil { + logger.ErrorContext(ctx, "not retrying any more", "error", ctx.Err()) + return + } + // Be exponentially more patient on each attempt, but prevent overflows. + attemptTimeout = min(attemptTimeout*2, timing.MaxPullAttemptDelay) + logger.InfoContext(ctx, "sleeping before retry", "timeout", delay) + time.Sleep(delay) + delay = delay * 2 + } +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..865d12a --- /dev/null +++ b/main.go @@ -0,0 +1,7 @@ +package main + +import "github.com/stackrox/image-prefetcher/cmd" + +func main() { + cmd.Execute() +}