Skip to content

Commit

Permalink
core: k8s (#2294)
Browse files Browse the repository at this point in the history
* core: k8s

* bump grace period to 180s

* core: graceful shutdown

* github action

* update apply_infra
  • Loading branch information
spolu authored Oct 27, 2023
1 parent e4f32a7 commit a6a60dc
Show file tree
Hide file tree
Showing 8 changed files with 204 additions and 15 deletions.
54 changes: 54 additions & 0 deletions .github/workflows/deploy-core.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
name: Deploy Core

on:
workflow_dispatch:

concurrency:
group: deploy_core
cancel-in-progress: false

env:
GCLOUD_PROJECT_ID: ${{ secrets.GCLOUD_PROJECT_ID }}

jobs:
build-and-deploy:
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Get short sha
id: short_sha
run: echo "short_sha=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT

- name: "Authenticate with Google Cloud"
uses: "google-github-actions/auth@v1"
with:
credentials_json: "${{ secrets.GCLOUD_SA_KEY }}"

- name: "Set up Cloud SDK"
uses: "google-github-actions/setup-gcloud@v1"

- name: Install gke-gcloud-auth-plugin
run: |
gcloud components install gke-gcloud-auth-plugin
- name: Setup kubectl
run: |
gcloud container clusters get-credentials dust-kube --region us-central1
- name: Build the image on Cloud Build
run: |
chmod +x ./k8s/cloud-build.sh
./k8s/cloud-build.sh core
- name: Deploy the image on Kubernetes
run: |
chmod +x ./k8s/deploy-image.sh
./k8s/deploy-image.sh gcr.io/$GCLOUD_PROJECT_ID/core-image:${{ steps.short_sha.outputs.short_sha }} core-deployment
- name: Wait for rollout to complete
run: |
echo "Waiting for rollout to complete (web)"
kubectl rollout status deployment/core-deployment --timeout=10m
12 changes: 12 additions & 0 deletions core/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
FROM rust:1.70.0 as core

WORKDIR /app

COPY . .

RUN cargo build --release

EXPOSE 3001

# Set a default command, it will start the API service if no command is provided
CMD ["cargo", "run", "--release", "--bin", "dust-api"]
62 changes: 48 additions & 14 deletions core/bin/dust_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ use serde_json::{json, Value};
use std::collections::{HashMap, HashSet};
use std::convert::Infallible;
use std::sync::Arc;
use tokio::sync::mpsc::unbounded_channel;
use tokio::{
signal::unix::{signal, SignalKind},
sync::mpsc::unbounded_channel,
};
use tokio_stream::Stream;
use tower_http::trace::{self, TraceLayer};
use tracing::Level;
Expand Down Expand Up @@ -78,6 +81,23 @@ impl APIState {
run_manager.pending_apps.push((app, credentials));
}

async fn stop_loop(&self) {
loop {
let pending_runs = {
let manager = self.run_manager.lock();
utils::info(&format!(
"[GRACEFUL] {} stop_loop pending runs",
manager.pending_runs.len()
));
manager.pending_runs.len()
};
if pending_runs == 0 {
break;
}
tokio::time::sleep(std::time::Duration::from_millis(1024)).await;
}
}

async fn run_loop(&self) -> Result<()> {
let mut loop_count = 0;
loop {
Expand Down Expand Up @@ -121,8 +141,8 @@ impl APIState {
});
});
loop_count += 1;
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
if loop_count % (10 * 10) == 0 {
tokio::time::sleep(std::time::Duration::from_millis(4)).await;
if loop_count % 1024 == 0 {
let manager = self.run_manager.lock();
utils::info(&format!("{} pending runs", manager.pending_runs.len()));
}
Expand Down Expand Up @@ -1813,30 +1833,44 @@ fn main() {
.layer(extract::Extension(state.clone()));

// Start the APIState run loop.
let state = state.clone();
tokio::task::spawn(async move { state.run_loop().await });
let runloop_state = state.clone();
tokio::task::spawn(async move { runloop_state.run_loop().await });

let (tx1, rx1) = tokio::sync::oneshot::channel::<()>();
let (tx2, rx2) = tokio::sync::oneshot::channel::<()>();

let (tx, rx) = tokio::sync::oneshot::channel();
let srv = axum::Server::bind(&"[::]:3001".parse().unwrap())
.serve(app.into_make_service())
.with_graceful_shutdown(async {
rx.await.ok();
rx1.await.ok();
});

tokio::spawn(async move {
if let Err(e) = srv.await {
eprintln!("server error: {}", e);
utils::error(&format!("server error: {}", e));
}
utils::info("[GRACEFUL] Server stopped");
tx2.send(()).ok();
});

// Wait for `ctrl+c` and stop the server
tokio::signal::ctrl_c().await.unwrap();
println!("Ctrl+C received, stopping server...");
let _ = tx.send(());
utils::info(&format!("Current PID: {}", std::process::id()));

let mut stream = signal(SignalKind::terminate()).unwrap();
stream.recv().await;

// Gracefully shut down the server
utils::info("[GRACEFUL] SIGTERM received, stopping server...");
tx1.send(()).ok();

// Wait for the server to shutdown
utils::info("[GRACEFUL] Awaiting server shutdown...");
rx2.await.ok();

// Wait for another `ctrl+c` and exit
tokio::signal::ctrl_c().await.unwrap();
// Wait for the run loop to finish.
utils::info("[GRACEFUL] Awaiting stop loop...");
state.stop_loop().await;

utils::info("[GRACEFUL] Exiting!");
Ok::<(), anyhow::Error>(())
});
}
11 changes: 11 additions & 0 deletions core/dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
target

# misc
.DS_Store
*.pem

.env
.env*.local

Dockerfile*
.dockerignore
4 changes: 3 additions & 1 deletion k8s/apply_infra.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

set -e


function apply_deployment {
# This function applies a deployment, but if the deployment already exists,
# it will replace the image with the current image to avoid a rolling update
Expand Down Expand Up @@ -57,6 +56,7 @@ kubectl apply -f "$(dirname "$0")/configmaps/connectors-edge-configmap.yaml"
kubectl apply -f "$(dirname "$0")/configmaps/blog-configmap.yaml"
kubectl apply -f "$(dirname "$0")/configmaps/docs-configmap.yaml"
kubectl apply -f "$(dirname "$0")/configmaps/alerting-temporal-configmap.yaml"
kubectl apply -f "$(dirname "$0")/configmaps/core-configmap.yaml"

echo "-----------------------------------"
echo "Applying backend configs"
Expand Down Expand Up @@ -100,6 +100,7 @@ apply_deployment blog-deployment
apply_deployment docs-deployment
apply_deployment metabase-deployment
apply_deployment alerting-temporal-deployment
apply_deployment core-deployment


echo "-----------------------------------"
Expand All @@ -114,6 +115,7 @@ kubectl apply -f "$(dirname "$0")/services/connectors-edge-service.yaml"
kubectl apply -f "$(dirname "$0")/services/blog-service.yaml"
kubectl apply -f "$(dirname "$0")/services/docs-service.yaml"
kubectl apply -f "$(dirname "$0")/services/metabase-service.yaml"
kubectl apply -f "$(dirname "$0")/services/core-service.yaml"


echo "-----------------------------------"
Expand Down
9 changes: 9 additions & 0 deletions k8s/configmaps/core-configmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: core-config
data:
DD_ENV: "prod"
DD_SERVICE: "core"
DD_LOGS_INJECTION: "true"
DD_RUNTIME_METRICS_ENABLED: "true"
54 changes: 54 additions & 0 deletions k8s/deployments/core-deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: core-deployment
spec:
replicas: 3
selector:
matchLabels:
app: core
template:
metadata:
labels:
app: core
name: core-pod
admission.datadoghq.com/enabled: "true"
annotations:
ad.datadoghq.com/web.logs: '[{"source": "core","service": "core","tags": ["env:prod"]}]'
spec:
terminationGracePeriodSeconds: 180
containers:
- name: web
image: gcr.io/or1g1n-186209/core-image:latest
command: ["cargo", "run", "--release", "--bin", "dust-api"]
imagePullPolicy: Always
ports:
- containerPort: 3001

envFrom:
- configMapRef:
name: core-config
- secretRef:
name: core-secrets
env:
- name: DD_AGENT_HOST
valueFrom:
fieldRef:
fieldPath: status.hostIP

volumeMounts:
- name: service-account-volume
mountPath: /etc/service-accounts

resources:
requests:
cpu: 1000m
memory: 2.5Gi
limits:
cpu: 1000m
memory: 2.5Gi

volumes:
- name: service-account-volume
secret:
secretName: gcp-service-account-secret
13 changes: 13 additions & 0 deletions k8s/services/core-service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: v1
kind: Service
metadata:
name: core-service
spec:
selector:
app: core
name: core-pod
ports:
- protocol: TCP
port: 80
targetPort: 3001
type: ClusterIP

0 comments on commit a6a60dc

Please sign in to comment.