Skip to content

Commit

Permalink
Handle failed start
Browse files Browse the repository at this point in the history
  • Loading branch information
patuwwy committed Jan 23, 2024
1 parent 891fa75 commit e2eec2e
Show file tree
Hide file tree
Showing 5 changed files with 178 additions and 216 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
"lint": "TIMING=1 NODE_OPTIONS=\"--max-old-space-size=2048\" scripts/run-script.js -w modules -j 4 -e \"! ls .eslintrc* > /dev/null || npx eslint ./ --ext .ts --ext .js --cache --cache-strategy=content\"",
"lint:uncached": "find . -name .eslintcache -delete && yarn lint",
"start": "DEVELOPMENT=true node dist/sth/bin/hub.js",
"start:dev": "DEVELOPMENT=true ts-node packages/sth/src/bin/hub.ts",
"start:dev": "ts-node packages/sth/src/bin/hub.ts",
"start:dev:cli": "DEVELOPMENT=true ts-node packages/cli/src/bin/index.ts",
"install:clean": "yarn clean && yarn clean:modules && yarn install",
"postinstall": "scripts/run-script.js -v -w modules install:deps",
Expand Down
14 changes: 13 additions & 1 deletion packages/adapters/src/process-instance-adapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class ProcessInstanceAdapter implements
sthConfig: STHConfiguration;

processPID: number = -1;
exitCode = -1;
id?: string | undefined;

private runnerProcess?: ChildProcess;
Expand Down Expand Up @@ -171,6 +172,11 @@ class ProcessInstanceAdapter implements

runnerProcess.unref();

runnerProcess.on("exit", (code) => {
this.exitCode = Number(code) || -1;
this.logger.info("Runner exit code", code);
});

this.crashLogStreams = Promise.all([runnerProcess.stdout, runnerProcess.stderr].map(streamToString));

this.runnerProcess = runnerProcess;
Expand All @@ -187,7 +193,13 @@ class ProcessInstanceAdapter implements
async waitUntilExit(_config: InstanceConfig, _instanceId: string, _sequenceInfo: SequenceInfo): Promise<ExitCode> {
if (this.runnerProcess) {
const [statusCode, signal] = await new Promise<[number | null, NodeJS.Signals | null]>(
(res) => this.runnerProcess?.on("exit", (code, sig) => res([code, sig]))
(res) => {
if (this.exitCode > -1) {
res([this.exitCode, null]);
}

this.runnerProcess?.on("exit", (code, sig) => res([code, sig]));
}
);

this.logger.trace("Runner process exited", this.runnerProcess?.pid);
Expand Down
75 changes: 4 additions & 71 deletions packages/host/src/lib/csi-controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import {
MessageUtilities
} from "@scramjet/model";
import { development } from "@scramjet/sth-config";
import { CommunicationChannel as CC, RunnerExitCode, RunnerMessageCode } from "@scramjet/symbols";
import { CommunicationChannel as CC, RunnerMessageCode } from "@scramjet/symbols";
import {
APIRoute,
AppConfig,
Expand Down Expand Up @@ -45,6 +45,7 @@ import { ObjLogger } from "@scramjet/obj-logger";
import { RunnerConnectInfo } from "@scramjet/types/src/runner-connect";
import { cancellableDefer, CancellablePromise, defer, promiseTimeout, TypedEmitter } from "@scramjet/utility";
import { ReasonPhrases } from "http-status-codes";
import { mapRunnerExitCode } from "./utils";

Check failure on line 48 in packages/host/src/lib/csi-controller.ts

View workflow job for this annotation

GitHub Actions / build-sth / Build STH Packages Nodejs 18.x

Cannot find module './utils' or its corresponding type declarations.

Check failure on line 48 in packages/host/src/lib/csi-controller.ts

View workflow job for this annotation

GitHub Actions / build-sth / Build STH Packages Nodejs 18.x

Cannot find module './utils' or its corresponding type declarations.

Check failure on line 48 in packages/host/src/lib/csi-controller.ts

View workflow job for this annotation

GitHub Actions / analyze-code / Analyze the source code

Unable to resolve path to module './utils'

Check failure on line 48 in packages/host/src/lib/csi-controller.ts

View workflow job for this annotation

GitHub Actions / build-docker-runner-image / Build runner docker image (Nodejs 18.x)

Cannot find module './utils' or its corresponding type declarations.

Check failure on line 48 in packages/host/src/lib/csi-controller.ts

View workflow job for this annotation

GitHub Actions / build-docker-runner-image / Build runner docker image (Nodejs 18.x)

Cannot find module './utils' or its corresponding type declarations.

/**
* @TODO: Runner exits after 10secs and k8s client checks status every 500ms so we need to give it some time
Expand Down Expand Up @@ -239,7 +240,7 @@ export class CSIController extends TypedEmitter<Events> {

async main() {
this.status = InstanceStatus.RUNNING;
this.logger.trace("Instance started", this.status);
this.logger.trace("Main. Current status:", this.status);

let code = -1;

Expand Down Expand Up @@ -315,7 +316,7 @@ export class CSIController extends TypedEmitter<Events> {
};

this.instancePromise = instanceMain()
.then((exitcode) => this.mapRunnerExitCode(exitcode))
.then((exitcode) => mapRunnerExitCode(exitcode, this.sequence))
.catch((error) => {
this.logger.error("Instance promise rejected", error);
this.initResolver?.rej(error);
Expand All @@ -336,74 +337,6 @@ export class CSIController extends TypedEmitter<Events> {
});
}

// eslint-disable-next-line complexity
private mapRunnerExitCode(exitcode: number): Promise<
{ message: string, exitcode: number, status: InstanceStatus }
> {
// eslint-disable-next-line default-case
switch (exitcode) {
case RunnerExitCode.INVALID_ENV_VARS: {
return Promise.reject({
message: "Runner was started with invalid configuration. This is probably a bug in STH.",
exitcode: RunnerExitCode.INVALID_ENV_VARS,
status: InstanceStatus.ERRORED
});
}
case RunnerExitCode.PODS_LIMIT_REACHED: {
return Promise.reject({
message: "Instance limit reached",
exitcode: RunnerExitCode.PODS_LIMIT_REACHED,
status: InstanceStatus.ERRORED
});
}
case RunnerExitCode.INVALID_SEQUENCE_PATH: {
return Promise.reject({
message: `Sequence entrypoint path ${this.sequence.config.entrypointPath} is invalid. ` +
"Check `main` field in Sequence package.json",
exitcode: RunnerExitCode.INVALID_SEQUENCE_PATH,
status: InstanceStatus.ERRORED
});
}
case RunnerExitCode.SEQUENCE_FAILED_ON_START: {
return Promise.reject({
message: "Sequence failed on start",
exitcode: RunnerExitCode.SEQUENCE_FAILED_ON_START,
status: InstanceStatus.ERRORED
});
}
case RunnerExitCode.SEQUENCE_FAILED_DURING_EXECUTION: {
return Promise.reject({
message: "Sequence failed during execution",
exitcode: RunnerExitCode.SEQUENCE_FAILED_DURING_EXECUTION,
status: InstanceStatus.ERRORED
});
}
case RunnerExitCode.SEQUENCE_UNPACK_FAILED: {
return Promise.reject({
message: "Sequence unpack failed",
exitcode: RunnerExitCode.SEQUENCE_UNPACK_FAILED,
status: InstanceStatus.ERRORED
});
}
case RunnerExitCode.KILLED: {
return Promise.resolve({
message: "Instance killed", exitcode: RunnerExitCode.KILLED, status: InstanceStatus.COMPLETED
});
}
case RunnerExitCode.STOPPED: {
return Promise.resolve({
message: "Instance stopped", exitcode: RunnerExitCode.STOPPED, status: InstanceStatus.COMPLETED
});
}
}

if (exitcode > 0) {
return Promise.reject({ message: "Runner failed", exitcode, status: InstanceStatus.ERRORED });
}

return Promise.resolve({ message: "Instance completed", exitcode, status: InstanceStatus.COMPLETED });
}

async cleanup() {
await this.instanceAdapter.cleanup();

Expand Down
Loading

0 comments on commit e2eec2e

Please sign in to comment.