Skip to content

Commit

Permalink
Merge pull request #14 from plazi/add_transform_all
Browse files Browse the repository at this point in the history
Add transform all
  • Loading branch information
nleanba authored Mar 4, 2024
2 parents d5db0ab + 4bcec72 commit c0b51e4
Show file tree
Hide file tree
Showing 10 changed files with 126 additions and 127 deletions.
25 changes: 15 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,35 @@ This Docker Image exposes a server on port `4505` which:
This webserver also exposes the follwing paths:

- `/status`: Serves a Badge (svg) to show the current pipeline status
- `/logs`: List of logs of past runs
- `/logs/[id]`: Log of past run with that id.
- `/update?from=[from-commit-id]&till=[till-commit-id]`: send a `POST` here to update all files modified since from-commit-id up till-commit-id or HEAD if not specified
- `/full_update`: send a `POST` here to run the full_update script. (Not
implemented yet, continue using the scripts in the "manual run" directory)
- `/workdir/jobs/`: List of runs
- `/workdir/jobs/[id]/status.json`: Status of run with that id
- `/workdir/jobs/[id]/log.txt`: Log of run with that id
- `/update?from=[from-commit-id]&till=[till-commit-id]`: send a `POST` here to
update all files modified since from-commit-id up till-commit-id or HEAD if
not specified
- `/full_update`: send a `POST` here to run the full_update script. Note that
this will not delete any files (yet).

## Usage

Build as a docker container.

```sh
docker build . -t gg2rdf
```

Requires a the environment-variable `GHTOKEN` as `username:<personal-acces-token>`
to authenticate the pushing into the target-repo.
Requires a the environment-variable `GHTOKEN` as
`username:<personal-acces-token>` to authenticate the pushing into the
target-repo.

Then run using a volume

```sh
docker run --name gg2rdf --env GHTOKEN=username:<personal-acces-token> -p 4505:4505 -v gg2rdf:/app/workdir gg2rdf
```

Exposes port `4505`.



### Docker-Compose

```yml
Expand All @@ -57,7 +61,8 @@ Edit the file `config/config.ts`. Should be self-explanatory what goes where.

## Development

The repo comes with vscode devcontaioner configurations. Some tweaks to allow using git from inside the devcontainer.
The repo comes with vscode devcontaioner configurations. Some tweaks to allow
using git from inside the devcontainer.

To start from the terminal in vscode:

Expand Down
28 changes: 0 additions & 28 deletions manual run/main-on-all-wrapper.sh

This file was deleted.

55 changes: 0 additions & 55 deletions manual run/main-on-all.sh

This file was deleted.

7 changes: 0 additions & 7 deletions manual run/readme

This file was deleted.

15 changes: 10 additions & 5 deletions src/JobsDataBase.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,17 @@ import * as path from "https://deno.land/std@0.209.0/path/mod.ts";

export type Job = {
id: string;
from: string;
till: string;
from?: string;
till?: string;
author: {
"name": string;
"email": string;
};
files?: {
// only used for transform_all
modified?: string[];
removed?: string[];
};
};

export type JobStatus = {
Expand Down Expand Up @@ -55,13 +60,13 @@ export class JobsDataBase {
);
}

allJobs(): JobStatus[] {
allJobs(oldestFirst = false): JobStatus[] {
const jobDirs = [];
for (const jobDir of Deno.readDirSync(this.jobsDir)) {
jobDirs.push(jobDir);
}
return jobDirs.filter((entry) => entry.isDirectory).sort((a, b) =>
b.name.localeCompare(a.name)
oldestFirst ? a.name.localeCompare(b.name) : b.name.localeCompare(a.name)
).map((jobDir) => {
const statusFile = path.join(this.jobsDir, jobDir.name, "status.json");
try {
Expand Down Expand Up @@ -93,6 +98,6 @@ export class JobsDataBase {
}).filter(notEmpty);
}
pendingJobs() {
return this.allJobs().filter((js) => js.status === "pending");
return this.allJobs(true).filter((js) => js.status === "pending");
}
}
98 changes: 85 additions & 13 deletions src/action_worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* The jobs are accepted as messages and stored on disk, when the worker is started uncompleted jobs are picked up and exxecuted.
*/
import { path } from "./deps.ts";
import { path, walk } from "./deps.ts";
import { config } from "../config/config.ts";
import { createBadge } from "./log.ts";
import { type Job, JobsDataBase } from "./JobsDataBase.ts";
Expand All @@ -20,10 +20,14 @@ let isRunning = false;
await startTask();

self.onmessage = (evt) => {
const job = evt.data as Job;
queue.addJob(job);
if (!isRunning) startTask();
else console.log("Already running");
const job = evt.data as Job | "FULLUPDATE";
if (job === "FULLUPDATE") {
gatherJobsForFullUpdate();
} else {
queue.addJob(job);
if (!isRunning) startTask();
else console.log("Already running");
}
};

async function startTask() {
Expand All @@ -35,7 +39,59 @@ async function startTask() {
}
}

async function run() {
async function gatherJobsForFullUpdate() {
isRunning = true;
try {
console.log("gathering jobs for full update");
updateLocalData("source");
const date = (new Date()).toISOString();
let block = 0;
const jobs: Job[] = [];
let files: string[] = [];
for await (
const walkEntry of walk(`${config.workDir}/repo/source/`, {
exts: ["xml"],
includeDirs: false,
includeSymlinks: false,
})
) {
if (walkEntry.isFile && walkEntry.path.endsWith(".xml")) {
files.push(
walkEntry.path.replace(`${config.workDir}/repo/source/`, ""),
);
if (files.length >= 500) {
jobs.push({
author: {
name: "GG2RDF Service",
email: "gg2rdf@plazi.org",
},
id: `${date} full update [${
(++block).toString(10).padStart(4, "0")
}`,
files: {
modified: files,
},
});
files = [];
}
} else {
console.log("skipped", walkEntry.path);
}
}
jobs.forEach((j) => {
j.id += ` of ${block.toString(10).padStart(4, "0")}]`;
queue.addJob(j);
});
console.log(`succesfully created full-update jobs (${block} jobs)`);
} catch (error) {
console.error("Could not create full-update jobs\n" + error);
} finally {
isRunning = false;
startTask();
}
}

function run() {
while (queue.pendingJobs().length > 0) {
const jobStatus = queue.pendingJobs()[0];
const job = jobStatus.job;
Expand All @@ -45,12 +101,28 @@ async function run() {
});
};
try {
log("Starting transformation" + JSON.stringify(job, undefined, 2));

const files = getModifiedAfter(job.from, job.till, log);

const modified = [...files.added, ...files.modified];
const removed = files.removed;
log("Starting transformation\n" + JSON.stringify(job, undefined, 2));

let modified: string[] = [];
let removed: string[] = [];
let message = "";

if (job.files) {
modified = job.files.modified || [];
removed = job.files.removed || [];
message =
`committed by action runner ${config.sourceRepository} ${job.id}`;
} else if (job.from) {
const files = getModifiedAfter(job.from, job.till, log);
modified = [...files.added, ...files.modified];
removed = files.removed;
message =
`committed by action runner ${config.sourceRepository}@${job.id}`;
} else {
throw new Error(
"Could not start job, neither explicit file list nor from-commit specified",
);
}

updateLocalData("source", log);

Expand Down Expand Up @@ -126,7 +198,7 @@ async function run() {
const gitCommands = `git config --replace-all user.name ${job.author.name}
git config --replace-all user.email ${job.author.email}
git add -A
git commit --quiet -m "committed by action runner ${config.sourceRepository}@${job.id}"
git commit --quiet -m "${message}"
git push --quiet ${
config.targetRepositoryUri.replace(
"https://",
Expand Down
2 changes: 1 addition & 1 deletion src/deps.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ export {
serveFile,
} from "https://deno.land/std@0.202.0/http/file_server.ts";

export { existsSync } from "https://deno.land/std@0.202.0/fs/mod.ts";
export { existsSync, walk } from "https://deno.land/std@0.202.0/fs/mod.ts";
export * as path from "https://deno.land/std@0.209.0/path/mod.ts";

export { parseArgs } from "https://deno.land/std@0.215.0/cli/parse_args.ts";
Expand Down
2 changes: 1 addition & 1 deletion src/gg2rdf.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { DOMParser, parseArgs } from "./deps.ts";
import { Element } from "https://esm.sh/v135/linkedom@0.16.8/types/interface/element.d.ts";
import type { Element } from "https://esm.sh/v135/linkedom@0.16.8/types/interface/element.d.ts";

class Subject {
properties: { [key: string]: Set<string> } = {};
Expand Down
18 changes: 12 additions & 6 deletions src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,14 @@ const WEBHOOK_SECRET: string | undefined = Deno.env.get("WEBHOOK_SECRET");
await Deno.mkdir(`${config.workDir}/repo`, { recursive: true });
await Deno.mkdir(`${config.workDir}/tmprdf`, { recursive: true });
await Deno.mkdir(`${config.workDir}/tmpttl`, { recursive: true });
await createBadge("Unknown");

const db = new JobsDataBase(`${config.workDir}/jobs`);
const latest =
db.allJobs().find((j) => j.status === "completed" || j.status === "failed")
?.status || "Unknown";
if (latest === "failed") createBadge("Failed");
else if (latest === "completed") createBadge("OK");
else createBadge("Unknown");

const worker = new Worker(
new URL("./action_worker.ts", import.meta.url).href,
Expand Down Expand Up @@ -78,10 +85,10 @@ const webhookHandler = async (request: Request) => {
}
if (pathname === "/full_update") {
console.log("· got full_update request");
// TODO
return new Response("Not Implemented", {
status: Status.NotImplemented,
statusText: STATUS_TEXT[Status.NotImplemented],
worker.postMessage("FULLUPDATE");
return new Response(undefined, {
status: Status.Accepted,
statusText: STATUS_TEXT[Status.Accepted],
});
} else {
if (WEBHOOK_SECRET && !(await verifySignature(request))) {
Expand Down Expand Up @@ -136,7 +143,6 @@ const webhookHandler = async (request: Request) => {
response.headers.set("Content-Type", "image/svg+xml");
return response;
} else if (pathname === "/jobs.json") {
const db = new JobsDataBase(`${config.workDir}/jobs`);
const json = JSON.stringify(db.allJobs(), undefined, 2);
const response = new Response(json);
response.headers.set("Content-Type", "application/json");
Expand Down
3 changes: 2 additions & 1 deletion src/repoActions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,9 @@ const cloneRepo = (which: "source" | "target", log = console.log) => {
// Function to update local data
export function updateLocalData(
which: "source" | "target",
log = console.log,
log: (msg: string) => void = console.log,
) {
log("starting git pull...");
Deno.mkdirSync(`${config.workDir}/repo/${which}/.git`, { recursive: true });
const p = new Deno.Command("git", {
args: ["pull"],
Expand Down

0 comments on commit c0b51e4

Please sign in to comment.