From e69367d614afa5b001f1f6bb8790620a1da160e7 Mon Sep 17 00:00:00 2001 From: James Rasell Date: Wed, 17 Apr 2024 12:13:59 +0100 Subject: [PATCH] allocrunner: automatically stop batch types allocations. Previously, when running batch type jobs (batch/sysbatch), the allocs would run until the user initiated shutdown. This is not how batch workloads run and caused load testing problems as nodes would fill with running workload. This problem can be overcome by using the "real" allocrunner, but adds resource overhead on the nodesim process. Automatically stopping batch workload mimics real work and allows us to use the simulated allocrunner for a lighter overhead. This means benchmarking can be cheaper and easier to run. --- allocrunnersim/allocrunnersim.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/allocrunnersim/allocrunnersim.go b/allocrunnersim/allocrunnersim.go index 42e2e40..bef0009 100644 --- a/allocrunnersim/allocrunnersim.go +++ b/allocrunnersim/allocrunnersim.go @@ -174,6 +174,24 @@ func (ar *simulatedAllocRunner) Run() { ar.allocStateLock.Unlock() ar.updateAllocAndSendUpdate(taskStates) + + // Who wants to live forever? + // + // Batch and sysbatch jobs certainly don't, so after a little pause to + // simulate the allocation has done something, we stop it. This is useful + // for load testing, as we can continually dispatch jobs, without nodes + // becoming resource exhausted and still use the lighter-weight simulated + // alloc-runner. + // + // Other job types which are meant to run forever, must be stopped by a + // user initiated command. + switch ar.alloc.Job.Type { + case structs.JobTypeBatch, structs.JobTypeSysBatch: + go func() { + time.Sleep(5 * time.Second) + ar.stopAll() + }() + } } // updateAllocAndSendUpdate is a small helper that builds a new allocation