Upstream contributions from Union.ai (#5769)

* Overlap create execution blob store reads/writes This change modifies launch paths stemming from `launchExecutionAndPrepareModel` to overlap blob store write and read calls, which dominate end-to-end latency (as seen in the traces below). Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * Overlap FutureFileReader blob store writes/reads This change updates `FutureFileReader.Cache` and `FutureFileReader.RetrieveCache` to use overlapped write and reads, respectively, to reduce end-to-end latency. The read path is a common operation on each iteration of the propeller `Handle` loop for dynamic nodes. Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * Fix async notifications tests I didn't chase down why assumptions changed here and why these tests broke, but fixing them with more explicit checks. Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * Overlap fetching input and output data This change updates `GetExecutionData`, `GetNodeExecutionData`, and `GetTaskExecutionData` to use overlapped reads when fetching input and output data. Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * Add configuration for launchplan cache resync duration Currently, the launchplan cache resync duration uses the DownstreamEval duration configuration which is also used for the sync period on the k8s client. This means if we want to configure a more aggressive launchplan cache resync, we would also incur overhead in syncing all k8s resources (ex. Pods from `PodPlugin`). By adding a separate configuration value we can update these independently. Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * Enqueue owner on launchplan terminal state This PR enqueues the owner workflow for evaluation when the launchplan auto refresh cache detects a launchplan in a terminal state. Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * Add client-go metrics Register a few metric callbacks with the client-go metrics interface so that we can monitor request latencies and rate limiting of kubeclient. ``` ❯ curl http://localhost:10254/metrics | rg k8s_client k8s_client_rate_limiter_latency_bucket{verb="GET",le="0.005"} 84 k8s_client_rate_limiter_latency_bucket{verb="GET",le="0.01"} 87 k8s_client_rate_limiter_latency_bucket{verb="GET",le="0.025"} 89 k8s_client_rate_limiter_latency_bucket{verb="GET",le="0.05"} 99 k8s_client_rate_limiter_latency_bucket{verb="GET",le="0.1"} 114 k8s_client_rate_limiter_latency_bucket{verb="GET",le="0.25"} 117 k8s_client_rate_limiter_latency_bucket{verb="GET",le="0.5"} 117 k8s_client_rate_limiter_latency_bucket{verb="GET",le="1"} 117 k8s_client_rate_limiter_latency_bucket{verb="GET",le="2.5"} 117 k8s_client_rate_limiter_latency_bucket{verb="GET",le="5"} 117 k8s_client_rate_limiter_latency_bucket{verb="GET",le="10"} 117 k8s_client_rate_limiter_latency_bucket{verb="GET",le="+Inf"} 117 k8s_client_rate_limiter_latency_sum{verb="GET"} 1.9358371670000003 k8s_client_rate_limiter_latency_count{verb="GET"} 117 k8s_client_rate_limiter_latency_bucket{verb="POST",le="0.005"} 6 k8s_client_rate_limiter_latency_bucket{verb="POST",le="0.01"} 6 k8s_client_rate_limiter_latency_bucket{verb="POST",le="0.025"} 6 k8s_client_rate_limiter_latency_bucket{verb="POST",le="0.05"} 6 k8s_client_rate_limiter_latency_bucket{verb="POST",le="0.1"} 6 k8s_client_rate_limiter_latency_bucket{verb="POST",le="0.25"} 6 k8s_client_rate_limiter_latency_bucket{verb="POST",le="0.5"} 6 k8s_client_rate_limiter_latency_bucket{verb="POST",le="1"} 6 k8s_client_rate_limiter_latency_bucket{verb="POST",le="2.5"} 6 k8s_client_rate_limiter_latency_bucket{verb="POST",le="5"} 6 k8s_client_rate_limiter_latency_bucket{verb="POST",le="10"} 6 k8s_client_rate_limiter_latency_bucket{verb="POST",le="+Inf"} 6 k8s_client_rate_limiter_latency_sum{verb="POST"} 1.0542e-05 k8s_client_rate_limiter_latency_count{verb="POST"} 6 k8s_client_rate_limiter_latency_bucket{verb="PUT",le="0.005"} 1 k8s_client_rate_limiter_latency_bucket{verb="PUT",le="0.01"} 1 k8s_client_rate_limiter_latency_bucket{verb="PUT",le="0.025"} 1 k8s_client_rate_limiter_latency_bucket{verb="PUT",le="0.05"} 1 k8s_client_rate_limiter_latency_bucket{verb="PUT",le="0.1"} 1 k8s_client_rate_limiter_latency_bucket{verb="PUT",le="0.25"} 1 k8s_client_rate_limiter_latency_bucket{verb="PUT",le="0.5"} 1 k8s_client_rate_limiter_latency_bucket{verb="PUT",le="1"} 1 k8s_client_rate_limiter_latency_bucket{verb="PUT",le="2.5"} 1 k8s_client_rate_limiter_latency_bucket{verb="PUT",le="5"} 1 k8s_client_rate_limiter_latency_bucket{verb="PUT",le="10"} 1 k8s_client_rate_limiter_latency_bucket{verb="PUT",le="+Inf"} 1 k8s_client_rate_limiter_latency_sum{verb="PUT"} 5e-07 k8s_client_rate_limiter_latency_count{verb="PUT"} 1 k8s_client_request_latency_bucket{verb="GET",le="0.005"} 84 k8s_client_request_latency_bucket{verb="GET",le="0.01"} 86 k8s_client_request_latency_bucket{verb="GET",le="0.025"} 89 k8s_client_request_latency_bucket{verb="GET",le="0.05"} 99 k8s_client_request_latency_bucket{verb="GET",le="0.1"} 112 k8s_client_request_latency_bucket{verb="GET",le="0.25"} 117 k8s_client_request_latency_bucket{verb="GET",le="0.5"} 117 k8s_client_request_latency_bucket{verb="GET",le="1"} 117 k8s_client_request_latency_bucket{verb="GET",le="2.5"} 117 k8s_client_request_latency_bucket{verb="GET",le="5"} 117 k8s_client_request_latency_bucket{verb="GET",le="10"} 117 k8s_client_request_latency_bucket{verb="GET",le="+Inf"} 117 k8s_client_request_latency_sum{verb="GET"} 2.1254330859999997 k8s_client_request_latency_count{verb="GET"} 117 k8s_client_request_latency_bucket{verb="POST",le="0.005"} 5 k8s_client_request_latency_bucket{verb="POST",le="0.01"} 5 k8s_client_request_latency_bucket{verb="POST",le="0.025"} 5 k8s_client_request_latency_bucket{verb="POST",le="0.05"} 6 k8s_client_request_latency_bucket{verb="POST",le="0.1"} 6 k8s_client_request_latency_bucket{verb="POST",le="0.25"} 6 k8s_client_request_latency_bucket{verb="POST",le="0.5"} 6 k8s_client_request_latency_bucket{verb="POST",le="1"} 6 k8s_client_request_latency_bucket{verb="POST",le="2.5"} 6 k8s_client_request_latency_bucket{verb="POST",le="5"} 6 k8s_client_request_latency_bucket{verb="POST",le="10"} 6 k8s_client_request_latency_bucket{verb="POST",le="+Inf"} 6 k8s_client_request_latency_sum{verb="POST"} 0.048558582 k8s_client_request_latency_count{verb="POST"} 6 k8s_client_request_latency_bucket{verb="PUT",le="0.005"} 1 k8s_client_request_latency_bucket{verb="PUT",le="0.01"} 1 k8s_client_request_latency_bucket{verb="PUT",le="0.025"} 1 k8s_client_request_latency_bucket{verb="PUT",le="0.05"} 1 k8s_client_request_latency_bucket{verb="PUT",le="0.1"} 1 k8s_client_request_latency_bucket{verb="PUT",le="0.25"} 1 k8s_client_request_latency_bucket{verb="PUT",le="0.5"} 1 k8s_client_request_latency_bucket{verb="PUT",le="1"} 1 k8s_client_request_latency_bucket{verb="PUT",le="2.5"} 1 k8s_client_request_latency_bucket{verb="PUT",le="5"} 1 k8s_client_request_latency_bucket{verb="PUT",le="10"} 1 k8s_client_request_latency_bucket{verb="PUT",le="+Inf"} 1 k8s_client_request_latency_sum{verb="PUT"} 0.002381375 k8s_client_request_latency_count{verb="PUT"} 1 k8s_client_request_total{code="200",method="GET"} 120 k8s_client_request_total{code="200",method="PUT"} 1 k8s_client_request_total{code="409",method="POST"} 6 ``` Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * Histogram Bucket Options Add abstraction to be able to pass buckets custom defined to histogram vectors. Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * Add org to CreateUploadLocation Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * Add config for grpc MaxMessageSizeBytes We need to make the grpc max recv message size in propeller's admin client configurable to match the server-side configuration we support in admin. Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * Move storage cache settings to correct location Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * added lock to memstore make threadsafe Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * Add read replica host config and connection - Add a new field to the postgres db config struct, `readReplicaHost`. - Add a new endpoint in the `database` package to enable establishing a connection with a db without creating it if it doesn't exist Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * Fix type assertion when an event is missed while connection to apiser… …ver was severed Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * Log and monitor failures to validate access tokens Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * Dask dashboard should have a separate log config Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * adjust Dask LogName to (Dask Runner Logs) Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * Fix k3d local setup prefix I was trying to use `setup_local_dev.sh`, and it wasn't working out of the box. Looks like it expects `k3d-` prefix for the kubecontext Ran `setup_local_dev.sh` Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * Override ArrayNode log links with map plugin This PR adds a configuration option to override ArrayNode log links with those defined in the map plugin. The map plugin contains it's own configuration for log links, which may differ from those defined on the PodPlugin. ArrayNode, executing subNodes as regular tasks (ie. using the PodPlugin) means that it uses the default PodPlugin log templates. Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * Add histogram stopwatch to stow storage This change * Adds a new `HistogramStopWatch` to promutils. This [allows for aggregating latencies](https://prometheus.io/docs/practices/histograms/#quantiles) across pods and computing quantiles at query time * Adds `HistogramStopWatch` latency metrics for stow so that we can reason about storage latencies in aggregate. Existing latency metrics remain. - [x] Added unittests Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * Fix metrics scale division in timer * Fix metrics scale division in timer Signed-off-by: Iaroslav Ciupin <iaroslav@union.ai> Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * CreateDownloadLink: Head before signing Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * Unexpectedly deleted pod metrics * Count when we see unexpectedly terminated pods Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * Don't send inputURI for start-node * send empty `inputUri` for `start-node` in node execution event to flyteadmin and therefore, GetNodeExecutionData will not attempt to download non-existing inputUri as it was doing before this change. * add DB migration to clear `input_uri` in existing `node_executions` table for start nodes. Signed-off-by: Andrew Dye <andrewwdye@gmail.com> * Fix cluster pool assignment validation Signed-off-by: Andrew Dye <andrewwdye@gmail.com> --------- Signed-off-by: Andrew Dye <andrewwdye@gmail.com> Signed-off-by: Eduardo Apolinario <eapolinario@users.noreply.github.com> Co-authored-by: Dan Rammer <daniel@union.ai> Co-authored-by: Joe Eschen <126913098+squiishyy@users.noreply.github.com> Co-authored-by: Katrina Rogan <katroganGH@gmail.com> Co-authored-by: Michael Barrientos <michael@union.ai> Co-authored-by: Haytham Abuelfutuh <haytham@afutuh.com> Co-authored-by: Jan Fiedler <89976021+fiedlerNr9@users.noreply.github.com> Co-authored-by: Iaroslav Ciupin <iaroslav@union.ai> Co-authored-by: Eduardo Apolinario <eapolinario@users.noreply.github.com>
flyteorg · Oct 22, 2024 · e4d29f8 · e4d29f8
1 parent 71386f8
commit e4d29f8
Show file tree

Hide file tree

Showing 109 changed files with 3,134 additions and 683 deletions.
diff --git a/charts/flyte-core/README.md b/charts/flyte-core/README.md
@@ -230,7 +230,6 @@ helm install gateway bitnami/contour -n flyte
 | flytepropeller.additionalVolumeMounts | list | `[]` | Appends additional volume mounts to the main container's spec. May include template values. |
 | flytepropeller.additionalVolumes | list | `[]` | Appends additional volumes to the deployment spec. May include template values. |
 | flytepropeller.affinity | object | `{}` | affinity for Flytepropeller deployment |
-| flytepropeller.cacheSizeMbs | int | `0` |  |
 | flytepropeller.clusterName | string | `""` | Defines the cluster name used in events sent to Admin |
 | flytepropeller.configPath | string | `"/etc/flyte/config/*.yaml"` | Default regex string for searching configuration files |
 | flytepropeller.createCRDs | bool | `true` | Whether to install the flyteworkflows CRD with helm |
@@ -292,7 +291,7 @@ helm install gateway bitnami/contour -n flyte
 | sparkoperator.enabled | bool | `false` | - enable or disable Sparkoperator deployment installation |
 | sparkoperator.plugin_config | object | `{"plugins":{"spark":{"spark-config-default":[{"spark.hadoop.fs.s3a.aws.credentials.provider":"com.amazonaws.auth.DefaultAWSCredentialsProviderChain"},{"spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version":"2"},{"spark.kubernetes.allocation.batch.size":"50"},{"spark.hadoop.fs.s3a.acl.default":"BucketOwnerFullControl"},{"spark.hadoop.fs.s3n.impl":"org.apache.hadoop.fs.s3a.S3AFileSystem"},{"spark.hadoop.fs.AbstractFileSystem.s3n.impl":"org.apache.hadoop.fs.s3a.S3A"},{"spark.hadoop.fs.s3.impl":"org.apache.hadoop.fs.s3a.S3AFileSystem"},{"spark.hadoop.fs.AbstractFileSystem.s3.impl":"org.apache.hadoop.fs.s3a.S3A"},{"spark.hadoop.fs.s3a.impl":"org.apache.hadoop.fs.s3a.S3AFileSystem"},{"spark.hadoop.fs.AbstractFileSystem.s3a.impl":"org.apache.hadoop.fs.s3a.S3A"},{"spark.hadoop.fs.s3a.multipart.threshold":"536870912"},{"spark.blacklist.enabled":"true"},{"spark.blacklist.timeout":"5m"},{"spark.task.maxfailures":"8"}]}}}` | Spark plugin configuration |
 | sparkoperator.plugin_config.plugins.spark.spark-config-default | list | `[{"spark.hadoop.fs.s3a.aws.credentials.provider":"com.amazonaws.auth.DefaultAWSCredentialsProviderChain"},{"spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version":"2"},{"spark.kubernetes.allocation.batch.size":"50"},{"spark.hadoop.fs.s3a.acl.default":"BucketOwnerFullControl"},{"spark.hadoop.fs.s3n.impl":"org.apache.hadoop.fs.s3a.S3AFileSystem"},{"spark.hadoop.fs.AbstractFileSystem.s3n.impl":"org.apache.hadoop.fs.s3a.S3A"},{"spark.hadoop.fs.s3.impl":"org.apache.hadoop.fs.s3a.S3AFileSystem"},{"spark.hadoop.fs.AbstractFileSystem.s3.impl":"org.apache.hadoop.fs.s3a.S3A"},{"spark.hadoop.fs.s3a.impl":"org.apache.hadoop.fs.s3a.S3AFileSystem"},{"spark.hadoop.fs.AbstractFileSystem.s3a.impl":"org.apache.hadoop.fs.s3a.S3A"},{"spark.hadoop.fs.s3a.multipart.threshold":"536870912"},{"spark.blacklist.enabled":"true"},{"spark.blacklist.timeout":"5m"},{"spark.task.maxfailures":"8"}]` | Spark default configuration |
-| storage | object | `{"bucketName":"my-s3-bucket","custom":{},"enableMultiContainer":false,"gcs":null,"limits":{"maxDownloadMBs":10},"s3":{"accessKey":"","authType":"iam","region":"us-east-1","secretKey":""},"type":"sandbox"}` | ----------------------------------------------------  STORAGE SETTINGS  |
+| storage | object | `{"bucketName":"my-s3-bucket","cache":{"maxSizeMBs":0,"targetGCPercent":70},"custom":{},"enableMultiContainer":false,"gcs":null,"limits":{"maxDownloadMBs":10},"s3":{"accessKey":"","authType":"iam","region":"us-east-1","secretKey":""},"type":"sandbox"}` | ----------------------------------------------------  STORAGE SETTINGS  |
 | storage.bucketName | string | `"my-s3-bucket"` | bucketName defines the storage bucket flyte will use. Required for all types except for sandbox. |
 | storage.custom | object | `{}` | Settings for storage type custom. See https://github.com/graymeta/stow for supported storage providers/settings. |
 | storage.enableMultiContainer | bool | `false` | toggles multi-container storage config |

diff --git a/charts/flyte-core/templates/_helpers.tpl b/charts/flyte-core/templates/_helpers.tpl
@@ -237,4 +237,7 @@ storage:
   enable-multicontainer: {{ .Values.storage.enableMultiContainer }}
   limits:
     maxDownloadMBs: {{ .Values.storage.limits.maxDownloadMBs }}
+  cache:
+    max_size_mbs: {{ .Values.storage.cache.maxSizeMBs }}
+    target_gc_percent: {{ .Values.storage.cache.targetGCPercent }}
 {{- end }}
diff --git a/charts/flyte-core/templates/propeller/configmap.yaml b/charts/flyte-core/templates/propeller/configmap.yaml
@@ -47,11 +47,6 @@ data:
 {{- end }}
 {{- end }}
   storage.yaml: | {{ tpl (include "storage" .) $ | nindent 4 }}
-  cache.yaml: |
-    storage:
-      cache:
-        max_size_mbs: {{ .Values.flytepropeller.cacheSizeMbs }}
-        target_gc_percent: 70
 {{- with .Values.configmap.task_logs }}
   task_logs.yaml: | {{ tpl (toYaml .) $ | nindent 4 }}
 {{- end }}

diff --git a/charts/flyte-core/values-eks.yaml b/charts/flyte-core/values-eks.yaml
@@ -102,7 +102,6 @@ flytepropeller:
       cpu: 1
       ephemeral-storage: 1Gi
       memory: 2Gi
-  cacheSizeMbs: 1024
   # -- Sets priorityClassName for propeller pod(s).
   priorityClassName: "system-cluster-critical"
   affinity:
@@ -191,6 +190,8 @@ storage:
   bucketName: "{{ .Values.userSettings.bucketName }}"
   s3:
     region: "{{ .Values.userSettings.accountRegion }}"
+  cache:
+    maxSizeMBs: 1024
 
 db:
   datacatalog:

diff --git a/charts/flyte-core/values-keycloak-idp-flyteclients-without-browser.yaml b/charts/flyte-core/values-keycloak-idp-flyteclients-without-browser.yaml
@@ -215,7 +215,6 @@ flytepropeller:
       cpu: 10m
       ephemeral-storage: 50Mi
       memory: 100Mi
-  cacheSizeMbs: 0
   # -- Default regex string for searching configuration files
   configPath: /etc/flyte/config/*.yaml
 
@@ -399,6 +398,8 @@ storage:
   # -- default limits being applied to storage config
   limits:
     maxDownloadMBs: 10
+  cache:
+    maxSizeMBs: 0
 
 # Database configuration(These are the values for a pgdb instance with hostname of postgres-flyte and postgres/password creds)
 db:

diff --git a/charts/flyte-core/values.yaml b/charts/flyte-core/values.yaml
@@ -319,7 +319,6 @@ flytepropeller:
       cpu: 10m
       ephemeral-storage: 50Mi
       memory: 100Mi
-  cacheSizeMbs: 0
   # -- Error reporting
   terminationMessagePolicy: FallbackToLogsOnError
   # -- Default regex string for searching configuration files
@@ -610,6 +609,9 @@ storage:
   # -- default limits being applied to storage config
   limits:
     maxDownloadMBs: 10
+  cache:
+    maxSizeMBs: 0
+    targetGCPercent: 70
 
 # Database configuration
 db:

diff --git a/charts/flyte/README.md b/charts/flyte/README.md
diff --git a/charts/flyte/values.yaml b/charts/flyte/values.yaml
@@ -190,7 +190,6 @@ flyte:
         cpu: 10m
         ephemeral-storage: 50Mi
         memory: 50Mi
-    cacheSizeMbs: 0
     # -- Default regex string for searching configuration files
     configPath: /etc/flyte/config/*.yaml
 
@@ -333,6 +332,9 @@ flyte:
     # serviceAccountKey: ""
     # -- Settings for storage type custom. See https://github.com/graymeta/stow for supported storage providers/settings.
     custom: {}
+    cache:
+      maxSizeMBs: 0
+      targetGCPercent: 70
 
   # Database configuration
   db:

diff --git a/cmd/single/root.go b/cmd/single/root.go
@@ -8,10 +8,12 @@ import (
 
 	"github.com/flyteorg/flyte/flytestdlib/logger"
 
-	"github.com/flyteorg/flyte/flytestdlib/config"
-	"github.com/flyteorg/flyte/flytestdlib/config/viper"
 	"github.com/spf13/cobra"
 	"github.com/spf13/pflag"
+
+	"github.com/flyteorg/flyte/flytestdlib/config"
+	"github.com/flyteorg/flyte/flytestdlib/config/viper"
+	_ "github.com/flyteorg/flyte/flytestdlib/promutils"
 )
 
 var (

diff --git a/datacatalog/go.mod b/datacatalog/go.mod
@@ -38,7 +38,7 @@ require (
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/cenkalti/backoff/v4 v4.2.1 // indirect
 	github.com/cespare/xxhash v1.1.0 // indirect
-	github.com/cespare/xxhash/v2 v2.2.0 // indirect
+	github.com/cespare/xxhash/v2 v2.3.0 // indirect
 	github.com/coocood/freecache v1.1.1 // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/emicklei/go-restful/v3 v3.9.0 // indirect
@@ -86,7 +86,6 @@ require (
 	github.com/mattn/go-colorable v0.1.12 // indirect
 	github.com/mattn/go-isatty v0.0.14 // indirect
 	github.com/mattn/go-sqlite3 v1.14.17 // indirect
-	github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
@@ -96,10 +95,10 @@ require (
 	github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
-	github.com/prometheus/client_golang v1.16.0 // indirect
-	github.com/prometheus/client_model v0.4.0 // indirect
-	github.com/prometheus/common v0.44.0 // indirect
-	github.com/prometheus/procfs v0.10.1 // indirect
+	github.com/prometheus/client_golang v1.19.1 // indirect
+	github.com/prometheus/client_model v0.6.1 // indirect
+	github.com/prometheus/common v0.53.0 // indirect
+	github.com/prometheus/procfs v0.15.1 // indirect
 	github.com/sirupsen/logrus v1.9.3 // indirect
 	github.com/spf13/afero v1.8.2 // indirect
 	github.com/spf13/cast v1.4.1 // indirect
@@ -119,9 +118,9 @@ require (
 	go.opentelemetry.io/otel/trace v1.24.0 // indirect
 	go.opentelemetry.io/proto/otlp v1.1.0 // indirect
 	golang.org/x/crypto v0.25.0 // indirect
-	golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e // indirect
+	golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8 // indirect
 	golang.org/x/net v0.27.0 // indirect
-	golang.org/x/oauth2 v0.16.0 // indirect
+	golang.org/x/oauth2 v0.18.0 // indirect
 	golang.org/x/sync v0.7.0 // indirect
 	golang.org/x/sys v0.22.0 // indirect
 	golang.org/x/term v0.22.0 // indirect
@@ -132,7 +131,7 @@ require (
 	google.golang.org/genproto v0.0.0-20240123012728-ef4313101c80 // indirect
 	google.golang.org/genproto/googleapis/api v0.0.0-20240123012728-ef4313101c80 // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80 // indirect
-	google.golang.org/protobuf v1.33.0 // indirect
+	google.golang.org/protobuf v1.34.1 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
 	gopkg.in/ini.v1 v1.66.4 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect

diff --git a/datacatalog/go.sum b/datacatalog/go.sum
@@ -73,8 +73,8 @@ github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyY
 github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
 github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko=
 github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
-github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
-github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
+github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/cheekybits/is v0.0.0-20150225183255-68e9c0620927 h1:SKI1/fuSdodxmNNyVBR8d7X/HuLnRpvvFO0AgyQk764=
 github.com/cheekybits/is v0.0.0-20150225183255-68e9c0620927/go.mod h1:h/aW8ynjgkuj+NQRlZcDbAbM1ORAbXjXX77sX7T289U=
 github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
@@ -301,8 +301,6 @@ github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9
 github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
 github.com/mattn/go-sqlite3 v1.14.17 h1:mCRHCLDUBXgpKAqIKsaAaAsrAlbkeomtRFKXh2L6YIM=
 github.com/mattn/go-sqlite3 v1.14.17/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
-github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo=
-github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
 github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
 github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
@@ -330,15 +328,15 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE
 github.com/pkg/sftp v1.13.1/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qRg=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/prometheus/client_golang v1.16.0 h1:yk/hx9hDbrGHovbci4BY+pRMfSuuat626eFsHb7tmT8=
-github.com/prometheus/client_golang v1.16.0/go.mod h1:Zsulrv/L9oM40tJ7T815tM89lFEugiJ9HzIqaAx4LKc=
+github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE=
+github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho=
 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
-github.com/prometheus/client_model v0.4.0 h1:5lQXD3cAg1OXBf4Wq03gTrXHeaV0TQvGfUooCfx1yqY=
-github.com/prometheus/client_model v0.4.0/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU=
-github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY=
-github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY=
-github.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+PymziUAg=
-github.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM=
+github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
+github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
+github.com/prometheus/common v0.53.0 h1:U2pL9w9nmJwJDa4qqLQ3ZaePJ6ZTwt7cMD3AG3+aLCE=
+github.com/prometheus/common v0.53.0/go.mod h1:BrxBKv3FWBIGXw89Mg1AeBq7FSyRzXWI3l3e7W3RN5U=
+github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
+github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
 github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
 github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
 github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
@@ -439,8 +437,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0
 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
 golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
 golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
-golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e h1:+WEEuIdZHnUeJJmEUjyYC2gfUMj69yZXw17EnHg/otA=
-golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e/go.mod h1:Kr81I6Kryrl9sr8s2FK3vxD90NdsKWRuOIl2O4CvYbA=
+golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8 h1:aAcj0Da7eBAtrTp03QXWvm88pSyOt+UgdZw2BFZ+lEw=
+golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8/go.mod h1:CQ1k9gNrJ50XIzaKCRR2hssIjF07kZFEiieALBM/ARQ=
 golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
 golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
 golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
@@ -509,8 +507,8 @@ golang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43/go.mod h1:KelEdhl1UZF7XfJ
 golang.org/x/oauth2 v0.0.0-20201109201403-9fd604954f58/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
 golang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
 golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
-golang.org/x/oauth2 v0.16.0 h1:aDkGMBSYxElaoP81NpoUoz2oo2R2wHdZpGToUxfyQrQ=
-golang.org/x/oauth2 v0.16.0/go.mod h1:hqZ+0LWXsiVoZpeld6jVt06P3adbS2Uu911W1SsJv2o=
+golang.org/x/oauth2 v0.18.0 h1:09qnuIAgzdx1XplqJvW6CQqMCtGZykZWcXzPMPUusvI=
+golang.org/x/oauth2 v0.18.0/go.mod h1:Wf7knwG0MPoWIMMBgFlEaSUDaKskp0dCfrlJRJXbBi8=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -749,8 +747,8 @@ google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGj
 google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
-google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
-google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
+google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
+google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=

diff --git a/deployment/eks/flyte_aws_scheduler_helm_generated.yaml b/deployment/eks/flyte_aws_scheduler_helm_generated.yaml
@@ -189,6 +189,9 @@ data:
       enable-multicontainer: false
       limits:
         maxDownloadMBs: 10
+      cache:
+        max_size_mbs: 1024
+        target_gc_percent: 70
   task_resource_defaults.yaml: | 
     task_resources:
       defaults:
@@ -398,6 +401,9 @@ data:
       enable-multicontainer: false
       limits:
         maxDownloadMBs: 10
+      cache:
+        max_size_mbs: 1024
+        target_gc_percent: 70
 ---
 # Source: flyte-core/templates/propeller/configmap.yaml
 apiVersion: v1
@@ -512,8 +518,6 @@ data:
       enable-multicontainer: false
       limits:
         maxDownloadMBs: 10
-  cache.yaml: |
-    storage:
       cache:
         max_size_mbs: 1024
         target_gc_percent: 70
@@ -857,7 +861,7 @@ spec:
   template:
     metadata:
       annotations:
-        configChecksum: "618a516ca42e8bbe5222a76f7865a0a444b6048002d7fcc06144c9188f3fd3d"
+        configChecksum: "c943b200cd0bed97fe456c0c713dd79cdc4e22133495cac89db3fc55e9b79c7"
       labels: 
         app.kubernetes.io/name: flyteadmin
         app.kubernetes.io/instance: flyte
@@ -1177,7 +1181,7 @@ spec:
   template:
     metadata:
       annotations:
-        configChecksum: "c2a15ce5dc2fa465986d6006f93450723da58166b3ad5ee35a91cb37d5c39da"
+        configChecksum: "ded28f3a68d22eb8e5af14a44cc0d14326f10060405268aac5a3665fb86c8bc"
       labels: 
         app.kubernetes.io/name: datacatalog
         app.kubernetes.io/instance: flyte
@@ -1279,7 +1283,7 @@ spec:
   template:
     metadata:
       annotations:
-        configChecksum: "6c8b21f7f9e96d92cfc0932e5a4289e969380662d96d3e6728a142bf01291c1"
+        configChecksum: "6572aa999f8e6842b4dba120e12e6ccb8cdfa506373de2a267b62a63146ccde"
         prometheus.io/path: "/metrics"
         prometheus.io/port: "10254"
       labels: 
@@ -1363,7 +1367,7 @@ spec:
         app.kubernetes.io/name: flyte-pod-webhook
         app.kubernetes.io/version: v1.13.2
       annotations:
-        configChecksum: "6c8b21f7f9e96d92cfc0932e5a4289e969380662d96d3e6728a142bf01291c1"
+        configChecksum: "6572aa999f8e6842b4dba120e12e6ccb8cdfa506373de2a267b62a63146ccde"
         prometheus.io/path: "/metrics"
         prometheus.io/port: "10254"
     spec: