diff --git a/.github/workflows/pipeline_swfs_test.yaml b/.github/workflows/pipeline_swfs_test.yaml new file mode 100644 index 0000000000..2489ee3201 --- /dev/null +++ b/.github/workflows/pipeline_swfs_test.yaml @@ -0,0 +1,100 @@ +name: Deploy and test Kubeflow Pipelines manifests with seaweedfs and m2m auth in KinD +on: + pull_request: + paths: + - tests/gh-actions/install_KinD_create_KinD_cluster_install_kustomize.sh + - .github/workflows/pipeline_swfs_test.yaml + - apps/pipeline/upstream/** + - tests/gh-actions/install_istio.sh + - tests/gh-actions/install_cert_manager.sh + - tests/gh-actions/install_oauth2-proxy.sh + - common/cert-manager/** + - common/oauth2-proxy/** + - common/istio*/** + - contrib/seaweedfs/** + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install KinD, Create KinD cluster and Install kustomize + run: ./tests/gh-actions/install_KinD_create_KinD_cluster_install_kustomize.sh + + - name: Install kubectl + run: ./tests/gh-actions/install_kubectl.sh + + - name: Install Istio + run: ./tests/gh-actions/install_istio.sh + + - name: Install oauth2-proxy + run: ./tests/gh-actions/install_oauth2-proxy.sh + + - name: Install cert-manager + run: ./tests/gh-actions/install_cert_manager.sh + + - name: Create kubeflow namespace + run: kustomize build common/kubeflow-namespace/base | kubectl apply -f - + + - name: Install KF Pipelines + run: ./tests/gh-actions/install_pipelines.sh + + - name: Install KF Multi Tenancy + run: ./tests/gh-actions/install_multi_tenancy.sh + + - name: Install kubeflow-istio-resources + run: kustomize build common/istio-1-22/kubeflow-istio-resources/base | kubectl apply -f - + + - name: Create KF Profile + run: kustomize build common/user-namespace/base | kubectl apply -f - + + - name: Install seaweedfs + run: | + kustomize build contrib/seaweedfs/istio | kubectl apply -f - + kubectl -n kubeflow wait --for=condition=available --timeout=600s deploy/seaweedfs + kubectl -n kubeflow exec deploy/seaweedfs -c seaweedfs -- sh -c "echo \"s3.configure -user minio -access_key minio -secret_key minio123 -actions Read,Write,List -apply\" | /usr/bin/weed shell" + + - name: port forward + run: | + ingress_gateway_service=$(kubectl get svc --namespace istio-system --selector="app=istio-ingressgateway" --output jsonpath='{.items[0].metadata.name}') + nohup kubectl port-forward --namespace istio-system svc/${ingress_gateway_service} 8080:80 & + while ! curl localhost:8080; do echo waiting for port-forwarding; sleep 1; done; echo port-forwarding ready + + - name: List and deploy test pipeline with authorized ServiceAccount Token + run: | + pip3 install kfp==2.4.0 + KF_PROFILE=kubeflow-user-example-com + TOKEN="$(kubectl -n $KF_PROFILE create token default-editor)" + + python -c ' + from time import sleep + import kfp + import sys + + token = sys.argv[1] + namespace = sys.argv[2] + client = kfp.Client(host="http://localhost:8080/pipeline", existing_token=token) + + pipeline = client.list_pipelines().pipelines[0] + pipeline_name = pipeline.display_name + pipeline_id = pipeline.pipeline_id + pipeline_version_id = client.list_pipeline_versions(pipeline_id).pipeline_versions[0].pipeline_version_id + experiment_id = client.create_experiment("seaweedfs-test", namespace=namespace).experiment_id + + print(f"Starting pipeline {pipeline_name}.") + run_id = client.run_pipeline(experiment_id=experiment_id, job_name="m2m-test", pipeline_id=pipeline_id, version_id=pipeline_version_id).run_id + + while True: + status = client.get_run(run_id=run_id).state + if status in ["PENDING", "RUNNING"]: + print(f"Waiting for run_id: {run_id}, status: {status}.") + sleep(10) + else: + print(f"Run with id {run_id} finished with status: {status}.") + if status != "SUCCEEDED": + print("Pipeline failed") + raise SystemExit(1) + break + ' "${TOKEN}" "${KF_PROFILE}" diff --git a/contrib/seaweedfs/OWNERS b/contrib/seaweedfs/OWNERS new file mode 100644 index 0000000000..82967fbf54 --- /dev/null +++ b/contrib/seaweedfs/OWNERS @@ -0,0 +1,6 @@ +approvers: +# - pschoen-itsc + - juliusvonkohout +reviewers: +# - pschoen-itsc + - juliusvonkohout diff --git a/contrib/seaweedfs/README.md b/contrib/seaweedfs/README.md new file mode 100644 index 0000000000..619b677cb0 --- /dev/null +++ b/contrib/seaweedfs/README.md @@ -0,0 +1,51 @@ +# SeaweedFS + +- [Official documentation](https://github.com/seaweedfs/seaweedfs/wiki) +- [Official repository](https://github.com/seaweedfs/seaweedfs) + +SeaweedFS is a simple and highly scalable distributed file system. It has an S3 interface which makes it usable as an object store for kubeflow. + +## Prerequisites + +- Kubernetes (any recent Version should work) +- You should have `kubectl` available and configured to talk to the desired cluster. +- `kustomize` +- If you installed kubeflow with minio, use the `istio` dir instead of `base` for the kustomize commands. + +## Compile manifests + +```bash +kubectl kustomize ./base/ +``` + +## Install SeaweedFS + +**WARNING** +This replaces the service `minio-service` and will redirect the traffic to seaweedfs. + +```bash +# Optional, but recommended to backup existing minio-service +kubectl get -n kubeflow svc minio-service -o=jsonpath='{.metadata.annotations.kubectl\.kubernetes\.io/last-applied-configuration}' > svc-minio-service-backup.json + +kubectl kustomize ./base/ | kubectl apply -f - +``` + +## Verify deployment + +Run +```bash +./test.sh +``` +With the ready check on the container it already verifies that the S3 starts correctly. +You can then use it with the endpoint at http://localhost:8333. +To create access keys open a shell on the pod and use `weed shell` to configure your instance. +Create a user with the command `s3.configure -user -access_key -secret-key -actions Read:/,Write::/ -apply` +Documentation for this can also be found [here](https://github.com/seaweedfs/seaweedfs/wiki/Amazon-S3-API). + +## Uninstall SeaweedFS + +```bash +kubectl kustomize ./base/ | kubectl delete -f - +# Restore minio-service from backup +kubectl apply -f svc-minio-service-backup.json +``` diff --git a/contrib/seaweedfs/UPDGRADE.md b/contrib/seaweedfs/UPDGRADE.md new file mode 100644 index 0000000000..0193a91844 --- /dev/null +++ b/contrib/seaweedfs/UPDGRADE.md @@ -0,0 +1,3 @@ +# Upgrade SeaweedFS + +Change the image tag in the Deployment to the desired version. You can find the available images [here](https://hub.docker.com/r/chrislusf/seaweedfs). diff --git a/contrib/seaweedfs/base/kustomization.yaml b/contrib/seaweedfs/base/kustomization.yaml new file mode 100644 index 0000000000..166c238dae --- /dev/null +++ b/contrib/seaweedfs/base/kustomization.yaml @@ -0,0 +1,9 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: kubeflow + +resources: +- seaweedfs-deployment.yaml +- seaweedfs-pvc.yaml +- seaweedfs-service.yaml +- seadweedfs-networkpolicy.yaml diff --git a/contrib/seaweedfs/base/seadweedfs-networkpolicy.yaml b/contrib/seaweedfs/base/seadweedfs-networkpolicy.yaml new file mode 100644 index 0000000000..6d2cffbdc9 --- /dev/null +++ b/contrib/seaweedfs/base/seadweedfs-networkpolicy.yaml @@ -0,0 +1,28 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: seaweedfs +spec: + ingress: + - from: + - namespaceSelector: + matchExpressions: + - key: app.kubernetes.io/part-of + operator: In + values: + - kubeflow-profile + - namespaceSelector: + matchExpressions: + - key: kubernetes.io/metadata.name + operator: In + values: + - istio-system + - podSelector: {} + podSelector: + matchExpressions: + - key: app + operator: In + values: + - seaweedfs + policyTypes: + - Ingress diff --git a/contrib/seaweedfs/base/seaweedfs-deployment.yaml b/contrib/seaweedfs/base/seaweedfs-deployment.yaml new file mode 100644 index 0000000000..002c4eef9d --- /dev/null +++ b/contrib/seaweedfs/base/seaweedfs-deployment.yaml @@ -0,0 +1,65 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: seaweedfs + namespace: kubeflow + labels: + app: seaweedfs +spec: + selector: + matchLabels: + app: seaweedfs + strategy: + type: Recreate + # Single container setup not scalable + replicas: 1 + template: + metadata: + labels: + app: seaweedfs + spec: + containers: + - name: seaweedfs + image: 'chrislusf/seaweedfs:3.69' + args: + - 'server' + - '-dir=/data' + - '-s3' + ports: + - containerPort: 8333 + readinessProbe: + httpGet: + path: /status + port: 8333 + scheme: HTTP + initialDelaySeconds: 15 + periodSeconds: 15 + successThreshold: 1 + failureThreshold: 100 + timeoutSeconds: 10 + securityContext: # Using restricted profile + allowPrivilegeEscalation: false + privileged: false + runAsNonRoot: true + # image defaults to root user + runAsUser: 1001 + runAsGroup: 1001 + seccompProfile: + type: RuntimeDefault + capabilities: + drop: + - ALL + add: + - NET_BIND_SERVICE + volumeMounts: + - mountPath: /data + name: data + resources: + # Benchmark this, just taken from minio + requests: + cpu: 20m + memory: 100Mi + volumes: + - name: data + persistentVolumeClaim: + claimName: seaweedfs-pvc diff --git a/contrib/seaweedfs/base/seaweedfs-pvc.yaml b/contrib/seaweedfs/base/seaweedfs-pvc.yaml new file mode 100644 index 0000000000..b0302f9cb7 --- /dev/null +++ b/contrib/seaweedfs/base/seaweedfs-pvc.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: seaweedfs-pvc + namespace: kubeflow +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 20Gi diff --git a/contrib/seaweedfs/base/seaweedfs-service.yaml b/contrib/seaweedfs/base/seaweedfs-service.yaml new file mode 100644 index 0000000000..d44ba3e614 --- /dev/null +++ b/contrib/seaweedfs/base/seaweedfs-service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: minio-service + namespace: kubeflow +spec: + ports: + - name: http + port: 9000 + protocol: TCP + targetPort: 8333 + selector: + app: seaweedfs diff --git a/contrib/seaweedfs/istio/istio-authorization-policy.yaml b/contrib/seaweedfs/istio/istio-authorization-policy.yaml new file mode 100644 index 0000000000..409b2c1b4c --- /dev/null +++ b/contrib/seaweedfs/istio/istio-authorization-policy.yaml @@ -0,0 +1,30 @@ +apiVersion: security.istio.io/v1beta1 +kind: AuthorizationPolicy +metadata: + name: seaweedfs-service +spec: + action: ALLOW + selector: + matchLabels: + app: seaweedfs + rules: + - from: + - source: + principals: + - cluster.local/ns/kubeflow/sa/ml-pipeline + - from: + - source: + principals: + - cluster.local/ns/kubeflow/sa/ml-pipeline-ui + # Allow traffic from User Pipeline Pods, which don't have a sidecar. + - {} +--- +apiVersion: "networking.istio.io/v1alpha3" +kind: DestinationRule +metadata: + name: ml-pipeline-seaweedfs +spec: + host: minio-service.kubeflow.svc.cluster.local + trafficPolicy: + tls: + mode: ISTIO_MUTUAL diff --git a/contrib/seaweedfs/istio/kustomization.yaml b/contrib/seaweedfs/istio/kustomization.yaml new file mode 100644 index 0000000000..2dffd5d150 --- /dev/null +++ b/contrib/seaweedfs/istio/kustomization.yaml @@ -0,0 +1,7 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: kubeflow + +resources: +- ../base/ +- istio-authorization-policy.yaml diff --git a/contrib/seaweedfs/test.sh b/contrib/seaweedfs/test.sh new file mode 100755 index 0000000000..f55ca2135e --- /dev/null +++ b/contrib/seaweedfs/test.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +set -xe + +kubectl create ns kubeflow || echo "namespace kubeflow already exists" +kubectl get -n kubeflow svc minio-service -o=jsonpath='{.metadata.annotations.kubectl\.kubernetes\.io/last-applied-configuration}' > svc-minio-service-backup.json +kustomize build istio/ | kubectl apply --server-side -f - +kubectl -n kubeflow wait --for=condition=available --timeout=600s deploy/seaweedfs +kubectl -n kubeflow exec deployments/seaweedfs -c seaweedfs -- sh -c "echo \"s3.configure -user minio -access_key minio -secret_key minio123 -actions Read,Write,List -apply\" | /usr/bin/weed shell" + +kubectl -n kubeflow port-forward svc/minio-service 8333:9000 +echo "S3 endpoint available on localhost:8333" & + +function trap_handler { + kubectl -n kubeflow logs -l app=seaweedfs --tail=100 + kustomize build istio/ | kubectl delete -f - + kubectl apply -f svc-minio-service-backup.json +} + +trap trap_handler EXIT