From e2855a150413b33354e46102cc4e386815907292 Mon Sep 17 00:00:00 2001
From: Lucian Carata <lcr@bitrunes.org>
Date: Tue, 5 Nov 2024 15:42:35 +0000
Subject: [PATCH] feat(k6): add scenario with multiple stages ramping up/down
 RPS

The added scenario allows one to configure an arbitrary number of stages,
with each consisting of a linear ramp-up/down to the desired rate and a
hold/plateau time.

Within each stage, the duration for which the inference RPS is held constant
is configured via one element in the `CONSTANT_RATE_DURATIONS_SECONDS`
environment variable (a vector of comma separated values), with the ramp-up/
down duration preceding it being 1/3rd of the hold time.
---
 tests/k6/components/settings.js            | 47 +++++++++++++-
 tests/k6/configs/k8s/base/k6.yaml          | 19 ++++++
 tests/k6/scenarios/infer_multiple_rates.js | 73 ++++++++++++++++++++++
 3 files changed, 138 insertions(+), 1 deletion(-)
 create mode 100644 tests/k6/scenarios/infer_multiple_rates.js

diff --git a/tests/k6/components/settings.js b/tests/k6/components/settings.js
index 503efcc2c1..e9a214cdbb 100644
--- a/tests/k6/components/settings.js
+++ b/tests/k6/components/settings.js
@@ -238,11 +238,34 @@ function doWarmup() {
 
 function requestRate() {
     if (__ENV.REQUEST_RATE) {
-        return __ENV.REQUEST_RATE
+        return parseInt(__ENV.REQUEST_RATE)
     }
     return 10
 }
 
+function requestRates() {
+    if (__ENV.REQUEST_RATES) {
+        return __ENV.REQUEST_RATES.split(",").map( s => parseInt(s))
+    }
+    return [requestRate()]
+}
+
+function rateStages() {
+    if (__ENV.REQUEST_RATES) {
+        var stages = []
+        var durations = constantRateDurationsSeconds()
+        var rates = requestRates()
+        for (var i = 0; i < rates.length; i++) {
+            // ramp up (1/3 rd of the duration)
+            stages.push({target: rates[i], duration: Math.ceil(durations[i]/3).toString()+'s'})
+            // hold
+            stages.push({target: rates[i], duration: durations[i].toString()+'s'})
+        }
+        return stages
+    }
+    return [{target: requestRate(), duration: constantRateDurationSeconds().toString()+'s'}]
+}
+
 function constantRateDurationSeconds() {
     if (__ENV.CONSTANT_RATE_DURATION_SECONDS) {
         return __ENV.CONSTANT_RATE_DURATION_SECONDS
@@ -250,6 +273,25 @@ function constantRateDurationSeconds() {
     return 30
 }
 
+function constantRateDurationsSeconds() {
+    if (__ENV.CONSTANT_RATE_DURATIONS_SECONDS) {
+        var durations = __ENV.CONSTANT_RATE_DURATIONS_SECONDS.split(",").map( s => parseInt(s))
+        if (durations.length > requestRates().length) {
+            return durations.slice(0, requestRates().length)
+        } else if (durations.length < requestRates().length) {
+            // pad with the last value
+            const last = durations[durations.length - 1]
+            for (var i = durations.length; i < requestRates().length; i++) {
+                durations.push(last)
+            }
+        } else {
+            return durations
+        }
+    }
+    const reqNumberOfStages = requestRates().length
+    return new Array(reqNumberOfStages).fill(constantRateDurationSeconds()/reqNumberOfStages)
+}
+
 function podNamespace() {
     if (__ENV.NAMESPACE) {
         return __ENV.NAMESPACE
@@ -349,7 +391,10 @@ export function getConfig() {
         "inferType" : inferType(),
         "doWarmup": doWarmup(),
         "requestRate": requestRate(),
+        "requestRates": requestRates(),
         "constantRateDurationSeconds": constantRateDurationSeconds(),
+        "constantRateDurationsSeconds": constantRateDurationsSeconds(),
+        "rateStages": rateStages(),
         "modelReplicas": modelReplicas(),
         "maxModelReplicas": maxModelReplicas(),
         "namespace":  podNamespace(),
diff --git a/tests/k6/configs/k8s/base/k6.yaml b/tests/k6/configs/k8s/base/k6.yaml
index 892c4435dd..9ff2108ea7 100644
--- a/tests/k6/configs/k8s/base/k6.yaml
+++ b/tests/k6/configs/k8s/base/k6.yaml
@@ -36,6 +36,15 @@ spec:
         #   "csv=results/base.gz",
         #   "scenarios/infer_constant_rate.js",
         #   ]
+        # # infer_multiple_rates
+        # args: [
+        #   "--no-teardown",
+        #   "--summary-export",
+        #   "results/base.json",
+        #   "--out",
+        #   "csv=results/base.gz",
+        #   "scenarios/infer_multiple_rates.js",
+        #   ]
         # # k8s-test-script
         # args: [
         #   "--summary-export",
@@ -61,8 +70,18 @@ spec:
         #   "scenarios/core2_qa_control_plane_ops.js",
         #   ]
         env:
+        - name: REQUEST_RATE
+          value: "20"
+        - name: REQUEST_RATES
+          value: "10,70,10,50,20"
+        - name: CONSTANT_RATE_DURATION_SECONDS
+          value: "60"
+        - name: CONSTANT_RATE_DURATIONS_SECONDS
+          value: "120,120,400,120,400"
         - name: USE_KUBE_CONTROL_PLANE
           value: "true"
+        # - name: SKIP_UNLOAD_MODEL
+        #   value: "true"
         - name: SCHEDULER_ENDPOINT
           value: "${SCHEDULER_ENDPOINT}:9004"
         - name: INFER_HTTP_ITERATIONS
diff --git a/tests/k6/scenarios/infer_multiple_rates.js b/tests/k6/scenarios/infer_multiple_rates.js
new file mode 100644
index 0000000000..e2e4df1252
--- /dev/null
+++ b/tests/k6/scenarios/infer_multiple_rates.js
@@ -0,0 +1,73 @@
+import { getConfig } from '../components/settings.js'
+import { doInfer, setupBase, teardownBase, getVersionSuffix } from '../components/utils.js'
+
+export const options = {
+    thresholds: {
+        'http_req_duration{scenario:default}': [`max>=0`],
+        'http_reqs{scenario:default}': [],
+        'grpc_req_duration{scenario:default}': [`max>=0`],
+        'data_received{scenario:default}': [],
+        'data_sent{scenario:default}': [],
+    },
+    scenarios: {
+        ramping_request_rates: {
+            startTime: '0s',
+            executor: 'ramping-arrival-rate',
+            startRate: 5,
+            timeUnit: '1s',
+            preAllocatedVUs: 50, // how large the initial pool of VUs would be
+            maxVUs: 1000, // if the preAllocatedVUs are not enough, we can initialize more
+            stages: getConfig().rateStages,
+        },
+    },
+    setupTimeout: '6000s',
+    teardownTimeout: '6000s',
+};
+
+export function setup() {
+    const config = getConfig()
+
+    setupBase(config)
+    console.log("rate stages:", getConfig().rateStages)
+
+    return config
+}
+
+export default function (config) {
+    const numModelTypes = config.modelType.length
+
+    let candidateIdxs = []
+    for (let i = 0; i < numModelTypes; i++) {
+        if (config.maxNumModels[i] !== 0)
+            candidateIdxs.push(i)
+    }
+    const numCandidates = candidateIdxs.length
+    var idx = candidateIdxs[Math.floor(Math.random() * numCandidates)]
+
+    const modelId = Math.floor(Math.random() * config.maxNumModels[idx])
+    const modelName = config.modelNamePrefix[idx] + modelId.toString()
+
+    const modelNameWithVersion = modelName + getVersionSuffix(config.isSchedulerProxy)  // first version
+
+    var rest_enabled = Number(config.inferHttpIterations)
+    var grpc_enabled = Number(config.inferGrpcIterations)
+    if (rest_enabled && grpc_enabled) {
+        // if both protocols are enabled, choose one randomly
+        const rand = Math.random()
+        if (rand > 0.5) {
+            doInfer(modelName, modelNameWithVersion, config, true, idx) // rest
+        } else {
+            doInfer(modelName, modelNameWithVersion, config, false, idx) // grpc
+        }
+    } else if (rest_enabled) {
+        doInfer(modelName, modelNameWithVersion, config, true, idx)
+    } else if (grpc_enabled) {
+        doInfer(modelName, modelNameWithVersion, config, false, idx)
+    } else {
+        throw new Error('Both REST and GRPC protocols are disabled!')
+    }
+}
+
+export function teardown(config) {
+    teardownBase(config)
+}