Skip to content

Commit

Permalink
Allow custom annotations & labels (#1123)
Browse files Browse the repository at this point in the history
Allow annotations and labels to be specified in
`KubernetesResourceRequirements` IF they have been allow-listed using a
server config. The reason we need to have an allow-list is that allowing
the user to arbitrarily specify annotations and labels can contain
foot-guns (ex: a user specifying a label of `sematic.ai/component: api`
might wind up having their pod be considered part of the Sematic k8s
service object). In the worst case, they can contain security holes (ex:
an annotation of `kubernetes.io/psp: eks.privileged`).

Testing
-------

Deployed a server with this code to staging. Then:

- re-ran an old pipeline execution from the UI to confirm it still
worked.
- Ran the testing pipeline with custom runner resources and confirmed
that the specified annotations and labels appeared (except for the ones
that were not allow-listed).
- Modified the testing pipeline to launch a worker pod with resources
specifying custom annotations and labels. Confirmed the approved ones
appeared on the resulting pod.

---------

Co-authored-by: Josh Bauer <josh@sematic.dev>
  • Loading branch information
augray and Josh Bauer authored Jun 13, 2024
1 parent ec20aba commit c6259e2
Show file tree
Hide file tree
Showing 9 changed files with 113 additions and 4 deletions.
14 changes: 14 additions & 0 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,20 @@ Information on the Kubernetes resources required.
administrator has enabled the `ALLOW_HOST_PATH_MOUNTING` Server setting. More details
can be found here: https://kubernetes.io/docs/concepts/storage/volumes/#hostpath

- `annotations`: Dict[str, str]

The annotations that should be applied to the Kubernetes job (and therefore pod).
Note that ONLY annotations whose keys have been explicitly allowed by your
Sematic cluster administrator will actually be applied. Any annotations whose
keys have not been approved will be ignored.

- `labels`: Dict[str, str]

The labels that should be applied to the Kubernetes job (and therefore pod).
Note that ONLY labels whose keys have been explicitly allowed by your
Sematic cluster administrator will actually be applied. Any labels whose
keys have not been approved will be ignored.

### `KubernetesSecretMount`

Information about how to expose Kubernetes secrets when running a Sematic func.
Expand Down
2 changes: 2 additions & 0 deletions helm/sematic-server/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ data:
{{ end }}
KUBERNETES_NAMESPACE: {{ .Release.Namespace }}
SEMATIC_WORKER_KUBERNETES_SA: {{ .Values.worker.service_account.name | quote }}
SEMATIC_WORKER_ALLOWED_ANNOTATION_KEYS: {{ toJson .Values.worker.allowed_annotation_keys | quote }}
SEMATIC_WORKER_ALLOWED_LABEL_KEYS: {{ toJson .Values.worker.allowed_label_keys | quote }}
ALLOW_CUSTOM_SECURITY_CONTEXTS: {{ .Values.worker.can_customize_security_context | quote }}
ALLOW_HOST_PATH_MOUNTING: {{ .Values.worker.can_mount_host_paths | quote }}
WORKER_IMAGE_PULL_SECRETS: {{ toJson .Values.worker.image_pull_secrets | quote }}
Expand Down
2 changes: 2 additions & 0 deletions helm/sematic-server/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ worker:
can_customize_security_context: false
can_mount_host_paths: false
image_pull_secrets: null
allowed_annotation_keys: []
allowed_label_keys: []

service:
create: true
Expand Down
8 changes: 8 additions & 0 deletions sematic/config/server_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ class ServerSettingsVar(AbstractPluginSettingsVar):
# uses for jobs.
SEMATIC_WORKER_KUBERNETES_SA = "SEMATIC_WORKER_KUBERNETES_SA"

# Controls which Kubernetes annotations pipeline authors
# can apply to their jobs.
SEMATIC_WORKER_ALLOWED_ANNOTATION_KEYS = "SEMATIC_WORKER_ALLOWED_ANNOTATION_KEYS"

# Controls which Kubernetes labels pipeline authors
# can apply to their jobs.
SEMATIC_WORKER_ALLOWED_LABEL_KEYS = "SEMATIC_WORKER_ALLOWED_LABEL_KEYS"

# What, if any, imagePullSecrets should be used for runner and
# standalone jobs?
WORKER_IMAGE_PULL_SECRETS = "WORKER_IMAGE_PULL_SECRETS"
Expand Down
10 changes: 10 additions & 0 deletions sematic/examples/testing_pipeline/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,16 @@ def _get_runner(args: argparse.Namespace) -> StateMachineRunner:
extra_kwargs["resources"] = ResourceRequirements(
kubernetes=KubernetesResourceRequirements(
requests={"memory": "3Gi"},
annotations={
"allowed-annotation-1": "42",
"allowed-annotation-2": "foo",
"forbidden-annotation": "bad-wolf",
},
labels={
"allowed-label-1": "43",
"allowed-label-2": "yo",
"forbidden-label": "1337-hax",
},
)
)
return CloudRunner(
Expand Down
2 changes: 1 addition & 1 deletion sematic/examples/testing_pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ def testing_pipeline(

if expand_shared_memory:
k8_resource_requirements = KubernetesResourceRequirements(
mount_expanded_shared_memory=True
mount_expanded_shared_memory=True,
)
resource_requirements = ResourceRequirements(
kubernetes=k8_resource_requirements
Expand Down
12 changes: 12 additions & 0 deletions sematic/resolvers/resource_requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,14 @@ class KubernetesResourceRequirements:
Sematic cluster administrator has enabled the `ALLOW_HOST_PATH_MOUNTING` Server
setting. Defaults to an empty list. More details can be found here:
https://kubernetes.io/docs/concepts/storage/volumes/#hostpath
annotations: Dict[str, str]
Kubernetes annotations to apply to the created pod. Only annotation keys
which have been allowed by your Sematic administrator can be used here.
Others will be ignored.
labels: Dict[str, str]
Kubernetes labels to apply to the created pod. Only label keys
which have been allowed by your Sematic administrator can be used here.
Others will be ignored.
"""

node_selector: Dict[str, str] = field(default_factory=dict)
Expand All @@ -344,6 +352,8 @@ class KubernetesResourceRequirements:
mount_expanded_shared_memory: bool = field(default=False)
security_context: Optional[KubernetesSecurityContext] = field(default=None)
host_path_mounts: List[KubernetesHostPathMount] = field(default_factory=list)
annotations: Dict[str, str] = field(default_factory=dict)
labels: Dict[str, str] = field(default_factory=dict)

def clone(self) -> "KubernetesResourceRequirements":
"""Deep copy these requirements."""
Expand All @@ -354,6 +364,8 @@ def clone(self) -> "KubernetesResourceRequirements":
requests=dict(self.requests),
tolerations=[t for t in self.tolerations],
host_path_mounts=[m for m in self.host_path_mounts],
annotations=dict(self.annotations),
labels=dict(self.labels),
)


Expand Down
51 changes: 48 additions & 3 deletions sematic/scheduling/kubernetes.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,8 @@ def _schedule_kubernetes_job(
security_context = None
image_pull_secrets = _get_image_pull_secrets()

annotations = {"cluster-autoscaler.kubernetes.io/safe-to-evict": "false"}
labels: Dict[str, str] = {}
if resource_requirements is not None:
node_selector = resource_requirements.kubernetes.node_selector
resource_requests = resource_requirements.kubernetes.requests
Expand Down Expand Up @@ -645,6 +647,11 @@ def _schedule_kubernetes_job(
volumes.append(volume)
volume_mounts.append(mount)

annotations.update(
_sanitize_annotations(resource_requirements.kubernetes.annotations)
)
labels.update(_sanitize_labels(resource_requirements.kubernetes.labels))

secret_env_vars.extend(
_environment_secrets(resource_requirements.kubernetes.secret_mounts)
)
Expand All @@ -662,6 +669,8 @@ def _schedule_kubernetes_job(
logger.debug("kubernetes environment secrets: %s", secret_env_vars)
logger.debug("kubernetes tolerations: %s", tolerations)
logger.debug("kubernetes security context: %s", security_context)
logger.debug("kubernetes annotations: %s", annotations)
logger.debug("kubernetes labels: %s", labels)

pod_name_env_var = kubernetes.client.V1EnvVar( # type: ignore
name=KUBERNETES_POD_NAME_ENV_VAR,
Expand All @@ -681,9 +690,8 @@ def _schedule_kubernetes_job(
spec=kubernetes.client.V1JobSpec( # type: ignore
template=kubernetes.client.V1PodTemplateSpec( # type: ignore
metadata=kubernetes.client.V1ObjectMeta( # type: ignore
annotations={
"cluster-autoscaler.kubernetes.io/safe-to-evict": "false"
},
annotations=annotations,
labels=labels,
),
spec=kubernetes.client.V1PodSpec( # type: ignore
node_selector=node_selector,
Expand Down Expand Up @@ -1060,3 +1068,40 @@ def _validate_rfc1123_label(label_value: str, label_name: str) -> None:
f"{label_name} must start with an alphanumeric character and only contain "
f"alphanumeric characters and dashes. Got: '{label_value}'"
)


def _sanitize_annotations(annotations: Dict[str, str]) -> Dict[str, str]:
allowed_keys = set(
get_json_server_setting(
ServerSettingsVar.SEMATIC_WORKER_ALLOWED_ANNOTATION_KEYS, []
)
)
results: Dict[str, str] = {}
for key, value in annotations.items():
if key not in allowed_keys:
logger.error(
"User requested illegal annotation (will be ignored): '%s': '%s'",
key,
value,
)
continue
results[key] = value
return results


def _sanitize_labels(labels: Dict[str, str]) -> Dict[str, str]:
allowed_keys = set(
get_json_server_setting(ServerSettingsVar.SEMATIC_WORKER_ALLOWED_LABEL_KEYS, [])
)
allowed_keys.difference_update({"job-name"}) # reserved for Sematic
results: Dict[str, str] = {}
for key, value in labels.items():
if key not in allowed_keys:
logger.error(
"User requested illegal label (will be ignored): '%s': '%s'",
key,
value,
)
continue
results[key] = value
return results
16 changes: 16 additions & 0 deletions sematic/scheduling/tests/test_kubernetes.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,16 @@ def test_schedule_kubernetes_job(k8s_batch_client, mock_kube_config):
type="Directory",
),
],
annotations={
"allowed-annotation-1": "42",
"allowed-annotation-2": "43",
"forbidden-annotation": "666",
},
labels={
"allowed-label-1": "-42",
"allowed-label-2": "-43",
"forbidden-label": "-666",
},
)
)

Expand All @@ -121,6 +131,12 @@ def test_schedule_kubernetes_job(k8s_batch_client, mock_kube_config):
"ALLOW_CUSTOM_SECURITY_CONTEXTS": "true",
"ALLOW_HOST_PATH_MOUNTING": "true",
"WORKER_IMAGE_PULL_SECRETS": json.dumps([{"name": "foo-secret"}]),
"SEMATIC_WORKER_ALLOWED_ANNOTATION_KEYS": json.dumps(
["allowed-annotation-1", "allowed-annotation-2"]
),
"SEMATIC_WORKER_ALLOWED_LABEL_KEYS": json.dumps(
["allowed-label-1", "allowed-label-2"]
),
}
):
_schedule_kubernetes_job(
Expand Down

0 comments on commit c6259e2

Please sign in to comment.