From 5557f9a0b6cf45546fe991b009c29be10f2fbc98 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Thu, 12 Oct 2023 10:37:09 -0700 Subject: [PATCH 01/35] Implement Deno CLI for schema validator uploads --- cli/README.md | 3 + cli/openneuro.ts | 10 ++ cli/src/commands/download.ts | 6 + cli/src/commands/git-credential.test.ts | 17 +++ cli/src/commands/git-credential.ts | 68 ++++++++++ cli/src/commands/login.test.ts | 33 +++++ cli/src/commands/login.ts | 69 ++++++++++ cli/src/commands/upload.ts | 120 ++++++++++++++++++ cli/src/commands/validate.ts | 28 ++++ cli/src/deps.ts | 52 ++++++++ cli/src/error.ts | 4 + cli/src/fetchHttpStack.ts | 104 +++++++++++++++ cli/src/graphq.ts | 92 ++++++++++++++ cli/src/logger.ts | 46 +++++++ cli/src/options.ts | 49 +++++++ docker-compose.yml | 15 +++ nginx/nginx.dev.conf | 19 +++ .../src/handlers/__tests__/tusd.spec.ts | 15 +++ .../openneuro-server/src/handlers/tusd.ts | 56 ++++++++ packages/openneuro-server/src/routes.ts | 8 ++ tsconfig.json | 2 +- 21 files changed, 815 insertions(+), 1 deletion(-) create mode 100644 cli/README.md create mode 100644 cli/openneuro.ts create mode 100644 cli/src/commands/download.ts create mode 100644 cli/src/commands/git-credential.test.ts create mode 100644 cli/src/commands/git-credential.ts create mode 100644 cli/src/commands/login.test.ts create mode 100644 cli/src/commands/login.ts create mode 100644 cli/src/commands/upload.ts create mode 100644 cli/src/commands/validate.ts create mode 100644 cli/src/deps.ts create mode 100644 cli/src/error.ts create mode 100644 cli/src/fetchHttpStack.ts create mode 100644 cli/src/graphq.ts create mode 100644 cli/src/logger.ts create mode 100644 cli/src/options.ts create mode 100644 packages/openneuro-server/src/handlers/__tests__/tusd.spec.ts create mode 100644 packages/openneuro-server/src/handlers/tusd.ts diff --git a/cli/README.md b/cli/README.md new file mode 100644 index 000000000..61af9bc9b --- /dev/null +++ b/cli/README.md @@ -0,0 +1,3 @@ +# OpenNeuro CLI for Deno + +Simplified CLI for OpenNeuro implemented in Deno. Deno eliminates the need to install the CLI and allows for more code reuse with OpenNeuro's web frontend. diff --git a/cli/openneuro.ts b/cli/openneuro.ts new file mode 100644 index 000000000..168c7eb68 --- /dev/null +++ b/cli/openneuro.ts @@ -0,0 +1,10 @@ +/** + * Entrypoint for OpenNeuro CLI + */ +import { commandLine } from "./src/options.ts" + +export async function main() { + await commandLine(Deno.args) +} + +await main() diff --git a/cli/src/commands/download.ts b/cli/src/commands/download.ts new file mode 100644 index 000000000..d60e4e255 --- /dev/null +++ b/cli/src/commands/download.ts @@ -0,0 +1,6 @@ +import { Command } from "../deps.ts" + +export const download = new Command() + .name("download") + .description("Download a dataset from OpenNeuro") + .arguments(" ") diff --git a/cli/src/commands/git-credential.test.ts b/cli/src/commands/git-credential.test.ts new file mode 100644 index 000000000..7e605370f --- /dev/null +++ b/cli/src/commands/git-credential.test.ts @@ -0,0 +1,17 @@ +import { assertEquals } from "../deps.ts" +import { gitCredentialAction } from "./git-credential.ts" + +Deno.test("git-credential parses stdin correctly", async () => { + const stdin = new ReadableStream({ + start(controller) { + controller.enqueue( + new TextEncoder().encode( + "host=staging.openneuro.org\nprotocol=https\npath=/datasets/ds000001\n", + ), + ) + controller.close() + }, + }) + const output = await gitCredentialAction(stdin, () => "token") + assertEquals(output, "username=@openneuro/cli\npassword=token\n") +}) diff --git a/cli/src/commands/git-credential.ts b/cli/src/commands/git-credential.ts new file mode 100644 index 000000000..c621bc39f --- /dev/null +++ b/cli/src/commands/git-credential.ts @@ -0,0 +1,68 @@ +import { Command, TextLineStream } from "../deps.ts" + +const prepareRepoAccess = ` + mutation prepareRepoAccess($datasetId: ID!) { + prepareRepoAccess(datasetId: $datasetId) { + token + endpoint + } + } +` + +export function getRepoToken(datasetId?: string) { + /* + return client + .mutate({ + mutation: prepareRepoAccess, + variables: { + datasetId, + }, + }) + .then(({ data }) => data.prepareRepoAccess.token) + */ + return "token" +} + +/** + * Provide a git-credential helper for OpenNeuro + */ +export async function gitCredentialAction( + stdinReadable: ReadableStream = Deno.stdin.readable, + tokenGetter = getRepoToken, +) { + let pipeOutput = "" + const credential: Record = {} + // Create a stream of lines from stdin + const lineStream = stdinReadable + .pipeThrough(new TextDecoderStream()) + .pipeThrough(new TextLineStream()) + for await (const line of lineStream) { + const [key, value] = line.split("=", 2) + credential[key] = value + } + if ("path" in credential && credential.path) { + const datasetId = credential.path.split("/").pop() + const token = await tokenGetter(datasetId) + const output: Record = { + username: "@openneuro/cli", + password: token, + } + for (const key in output) { + pipeOutput += `${key}=${output[key]}\n` + } + } else { + throw new Error( + "Invalid input from git, check the credential helper is configured correctly", + ) + } + return pipeOutput +} + +export const gitCredential = new Command() + .name("git-credential") + .description( + "A git credentials helper for easier datalad or git-annex access to datasets.", + ) + .action(() => { + console.log(gitCredentialAction()) + }) diff --git a/cli/src/commands/login.test.ts b/cli/src/commands/login.test.ts new file mode 100644 index 000000000..613ed2b3d --- /dev/null +++ b/cli/src/commands/login.test.ts @@ -0,0 +1,33 @@ +import { assertEquals, assertSpyCalls, Select, stub } from "../deps.ts" +import { loginAction } from "./login.ts" + +Deno.test("login action supports non-interactive mode if all options are provided", async () => { + const SelectStub = stub(Select, "prompt", () => { + return new Promise(() => {}) + }) + await loginAction({ + url: "https://example.com", + token: "1234", + errorReporting: false, + }) + // Test to make sure we get here before the timeout + assertSpyCalls(SelectStub, 0) + SelectStub.restore() + localStorage.clear() +}) + +Deno.test("login action sets values in localStorage", async () => { + const loginOptions = { + url: "https://example.com", + token: "1234", + errorReporting: true, + } + await loginAction(loginOptions) + assertEquals(localStorage.getItem("url"), loginOptions.url) + assertEquals(localStorage.getItem("token"), loginOptions.token) + assertEquals( + localStorage.getItem("errorReporting"), + loginOptions.errorReporting.toString(), + ) + localStorage.clear() +}) diff --git a/cli/src/commands/login.ts b/cli/src/commands/login.ts new file mode 100644 index 000000000..ddcc7df70 --- /dev/null +++ b/cli/src/commands/login.ts @@ -0,0 +1,69 @@ +/** + * Configure credentials and other persistent settings for OpenNeuro + */ +import { Command, Confirm, Secret, Select } from "../deps.ts" +import type { CommandOptions } from "../deps.ts" +import { LoginError } from "../error.ts" + +export interface ClientConfig { + url: string + token: string + errorReporting: boolean +} + +const messages = { + url: + "URL for OpenNeuro instance to upload to (e.g. `https://openneuro.org`).", + token: "API key for OpenNeuro. See https://openneuro.org/keygen", + errorReporting: + "Enable error reporting. Errors and performance metrics are sent to the configured OpenNeuro instance.", +} + +/** + * Get credentials from local storage + */ +export function getConfig(): ClientConfig { + const url = localStorage.getItem("url") + const token = localStorage.getItem("token") + const errorReporting = localStorage.getItem("errorReporting") === "true" + if (url && token && errorReporting) { + const config: ClientConfig = { + url, + token, + errorReporting, + } + return config + } else { + throw new LoginError("Run `openneuro login` before upload.") + } +} + +export async function loginAction(options: CommandOptions) { + const url = options.url ? options.url : await Select.prompt({ + message: "Choose an OpenNeuro instance to use.", + options: [ + "https://openneuro.org", + "https://staging.openneuro.org", + "http://localhost:9876", + ], + }) + localStorage.setItem("url", url) + const token = options.token ? options.token : await Secret.prompt( + `Enter your API key for OpenNeuro (get an API key from ${url}/keygen).`, + ) + localStorage.setItem("token", token) + const errorReporting = options.hasOwnProperty("errorReporting") + ? options.errorReporting + : await Confirm.prompt(messages.errorReporting) + localStorage.setItem("errorReporting", errorReporting.toString()) +} + +export const login = new Command() + .name("login") + .description( + "Setup credentials for OpenNeuro. Set -u, -t, and -e flags to skip interactive prompts.", + ) + .option("-u, --url ", messages.url) + .option("-t, --token ", messages.token) + .option("-e, --error-reporting ", messages.errorReporting) + .action(loginAction) diff --git a/cli/src/commands/upload.ts b/cli/src/commands/upload.ts new file mode 100644 index 000000000..c7fe73bd4 --- /dev/null +++ b/cli/src/commands/upload.ts @@ -0,0 +1,120 @@ +import { FetchHttpStack } from "../fetchHttpStack.ts" +import { validateCommand } from "./validate.ts" +import { ClientConfig, getConfig } from "./login.ts" +import { logger } from "../logger.ts" +import { + Confirm, + ProgressBar, + relative, + resolve, + Tus, + Uppy, + walk, +} from "../deps.ts" +import type { CommandOptions } from "../deps.ts" + +export function readConfig(): ClientConfig { + const config = getConfig() + logger.info( + `configured with URL "${config.url}" and token "${ + config.token.slice( + 0, + 3, + ) + }...${config.token.slice(-3)}`, + ) + return config +} + +export async function uploadAction( + options: CommandOptions, + dataset_directory: string, +) { + const clientConfig = readConfig() + const dataset_directory_abs = resolve(dataset_directory) + logger.info( + `upload ${dataset_directory} resolved to ${dataset_directory_abs}`, + ) + + // TODO - call the validator here + + let datasetId = "ds001001" + if (options.dataset) { + datasetId = options.dataset + } else { + if (!options.create) { + const confirmation = await new Confirm( + "Confirm creation of a new dataset?", + ) + if (!confirmation) { + console.log("Specify --dataset to upload to an existing dataset.") + return + } + } + // TODO Create dataset here + datasetId = "ds001001" + } + // Setup upload + const uppy = new Uppy({ + id: "@openneuro/cli", + autoProceed: true, + debug: true, + }).use(Tus, { + endpoint: "http://localhost:9876/tusd/files/", + chunkSize: 64000000, // ~64MB + uploadLengthDeferred: true, + headers: { + Authorization: `Bearer ${clientConfig.token}`, + }, + httpStack: new FetchHttpStack(), + }) + + const progressBar = new ProgressBar({ + title: "Upload", + total: 100, + }) + progressBar.render(0) + uppy.on("progress", (progress) => { + progressBar.render(progress) + }) + + // Upload all files + for await ( + const walkEntry of walk(dataset_directory, { + includeDirs: false, + includeSymlinks: false, + }) + ) { + const file = await Deno.open(walkEntry.path) + const relativePath = relative(dataset_directory_abs, walkEntry.path) + const uppyFile = { + name: walkEntry.name, + data: file.readable.getReader(), + meta: { + datasetId, + relativePath, + }, + } + logger.debug(JSON.stringify({ name: uppyFile.name, meta: uppyFile.meta })) + uppy.addFile(uppyFile) + } +} + +/** + * Upload is validate extended with upload features + */ +export const upload = validateCommand + .name("upload") + .description("Upload a dataset to OpenNeuro") + .option("--json", "Hidden for upload usage", { hidden: true, override: true }) + .option("--filenameMode", "Hidden for upload usage", { + hidden: true, + override: true, + }) + .option("-d, --dataset", "Specify an existing dataset to update.", { + conflicts: ["create"], + }) + .option("-c, --create", "Skip confirmation to create a new dataset.", { + conflicts: ["dataset"], + }) + .action(uploadAction) diff --git a/cli/src/commands/validate.ts b/cli/src/commands/validate.ts new file mode 100644 index 000000000..c77121f14 --- /dev/null +++ b/cli/src/commands/validate.ts @@ -0,0 +1,28 @@ +import { Command } from "../deps.ts" + +export const validateCommand = new Command() + .name("bids-validator") + .description( + "This tool checks if a dataset in a given directory is compatible with the Brain Imaging Data Structure specification. To learn more about Brain Imaging Data Structure visit http://bids.neuroimaging.io", + ) + .arguments("") + .version("alpha") + .option("--json", "Output machine readable JSON") + .option( + "-s, --schema ", + "Specify a schema version to use for validation", + { + default: "latest", + }, + ) + .option("-v, --verbose", "Log more extensive information about issues") + .option( + "--ignoreNiftiHeaders", + "Disregard NIfTI header content during validation", + ) + .option( + "--filenameMode", + "Enable filename checks for newline separated filenames read from stdin", + ) + +export const validate = validateCommand diff --git a/cli/src/deps.ts b/cli/src/deps.ts new file mode 100644 index 000000000..9942f3063 --- /dev/null +++ b/cli/src/deps.ts @@ -0,0 +1,52 @@ +// Cliffy +export { + Command, + EnumType, +} from "https://deno.land/x/cliffy@v1.0.0-rc.3/command/mod.ts" +export { + Confirm, + Secret, + Select, +} from "https://deno.land/x/cliffy@v1.0.0-rc.3/prompt/mod.ts" +export type { + ActionHandler, + CommandOptions, +} from "https://deno.land/x/cliffy@v1.0.0-rc.3/command/mod.ts" +// bids-validator +export * as bidsValidator from "https://deno.land/x/bids_validator@v1.14.0/main.ts" +export { validateCommand } from "https://deno.land/x/bids_validator@v1.14.0/setup/options.ts" +export type { ValidatorOptions } from "https://deno.land/x/bids_validator@v1.14.0/setup/options.ts" +// Logging +export { + critical, + debug, + error, + getLogger, + handlers, + info, + Logger, + LogLevels, + setup, + warning, +} from "https://deno.land/std@0.212.0/log/mod.ts" +export * as log from "https://deno.land/std@0.212.0/log/mod.ts" +export { LogLevelNames } from "https://deno.land/std@0.212.0/log/levels.ts" +export type { LevelName } from "https://deno.land/std@0.212.0/log/mod.ts" +export { TextLineStream } from "https://deno.land/std@0.212.0/streams/mod.ts" +// File handling +export { walk } from "https://deno.land/std@0.212.0/fs/walk.ts" +export { resolve } from "https://deno.land/std@0.212.0/path/resolve.ts" +export { relative } from "https://deno.land/std@0.212.0/path/relative.ts" +export { join } from "https://deno.land/std@0.212.0/path/join.ts" +// Test suites +export { + assert, + assertEquals, +} from "https://deno.land/std@0.212.0/assert/mod.ts" +export { + assertSpyCalls, + returnsNext, + stub, +} from "https://deno.land/std@0.212.0/testing/mock.ts" +// Progress bars +export { default as ProgressBar } from "https://deno.land/x/progress@v1.3.9/mod.ts" diff --git a/cli/src/error.ts b/cli/src/error.ts new file mode 100644 index 000000000..91fc673da --- /dev/null +++ b/cli/src/error.ts @@ -0,0 +1,4 @@ +export class OpenNeuroCLIError extends Error {} +export class LoginError extends OpenNeuroCLIError {} +export class QueryError extends OpenNeuroCLIError {} +export class OptionError extends OpenNeuroCLIError {} diff --git a/cli/src/fetchHttpStack.ts b/cli/src/fetchHttpStack.ts new file mode 100644 index 000000000..f6d5b04a6 --- /dev/null +++ b/cli/src/fetchHttpStack.ts @@ -0,0 +1,104 @@ +/** + * tus-js-client fetch stack adapter. + * + * Useful for environments such as Deno or web workers without native XMLHttpRequest. + */ +export class FetchHttpStack { + createRequest(method: string, url: string) { + return new FetchHttpRequest(method, url) + } + getName(): string { + return "FetchHttpStack" + } +} + +export class FetchHttpRequest { + #headers: Headers + #method: string + #url: string + #abortController: AbortController + #request: Request + #progressHandler?: (bytesSent: number) => void + + constructor(method: string, url: string) { + this.#abortController = new AbortController() + this.#headers = new Headers() + this.#method = method + this.#url = url + // This is a dummy request that is replaced later (for API compatibility only) + this.#request = new Request(url, { method }) + } + + getMethod() { + return this.#method + } + + getURL() { + return this.#url + } + + setHeader(header: string, value: string) { + this.#headers.set(header, value) + } + + getHeader(header: string) { + this.#headers.get(header) + } + + setProgressHandler(progressHandler: (bytesSent: number) => void): void { + this.#progressHandler = progressHandler + } + // Send the HTTP request with the provided request body. The value of the request body depends + // on the platform and what `fileReader` implementation is used. With the default `fileReader`, + // `body` can be + // - in browsers: a TypedArray, a DataView a Blob, or null. + // - in Node.js: a Buffer, a ReadableStream, or null. + async send(body: ReadableStream): Promise { + this.#request = new Request(this.#url, { + body, + signal: this.#abortController.signal, + method: this.#method, + headers: this.#headers, + }) + const response = await fetch(this.#request) + const text = await response.text() + return new FetchHttpResponse(response, text) + } + + abort(): Promise { + return new Promise((resolve) => { + this.#abortController.signal.onabort = () => { + resolve() + } + this.#abortController.abort() + }) + } + + // Return an environment specific object, e.g. the XMLHttpRequest object in browsers. + async getUnderlyingObject(): Promise { + return this.#request + } +} + +export class FetchHttpResponse { + #response: Response + #body: string + constructor(response: Response, body: string) { + this.#response = response + this.#body = body + } + getStatus(): number { + return this.#response.status + } + getHeader(header: string): string | null { + return this.#response.headers.get(header) + } + getBody(): string { + return this.#body + } + + // Return an environment specific object, e.g. the XMLHttpRequest object in browsers. + getUnderlyingObject(): Response { + return this.#response + } +} diff --git a/cli/src/graphq.ts b/cli/src/graphq.ts new file mode 100644 index 000000000..b9d4780cc --- /dev/null +++ b/cli/src/graphq.ts @@ -0,0 +1,92 @@ +/** + * Minimalist OpenNeuro client with no dependencies + */ + +import { unicodeWidth } from "https://deno.land/std@0.196.0/console/unicode_width.ts" +import { getConfig } from "./commands/login.ts" +import { QueryError } from "./error.ts" + +function request(query: string, variables = {}): Promise { + const config = getConfig() + return fetch(config.url, { + method: "POST", + headers: { + Authorization: `Bearer ${config.token}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ query, variables }), + }) +} + +const createDatasetMutation = ` +mutation($affirmedConsent: Boolean, $affirmedDefaced: Boolean) { + createDataset(affirmedConsent: $affirmedConsent, affirmedDefaced: $affirmedDefaced) { + id + } +} +` + +interface CreateDatasetMutationResponse { + data?: { + createDataset: { + id: string + } + } + error?: { + message: string + } +} + +/** + * Create a new dataset + * @param affirmedDefaced Has the upload affirmed this dataset is defaced? + * @param affirmedConsent Has the uploader affirmed they have obtained participant conset to share non-defaced images? + * @returns Dataset ID + */ +export async function createDataset( + affirmedDefaced: boolean, + affirmedConsent: boolean, +): Promise { + const res = await request(createDatasetMutation, { + affirmedDefaced, + affirmedConsent, + }) + const body: CreateDatasetMutationResponse = await res.json() + if (body.error) { + throw new QueryError(JSON.stringify(body.error)) + } + if (body.data) { + return body?.data?.createDataset?.id + } else { + throw new QueryError("Invalid response") + } +} + +const prepareUploadMutation = ` +mutation($datasetId: ID!, $uploadId: ID!) { + prepareUpload(datasetId: $datasetId, uploadId: $uploadId) { + id + } +} +` + +/** + * Setup an upload on the server side + * @param datasetId Accession number `e.g. ds000001` + * @param uploadId UUID for the upload if this is a resume operation + * @returns The UUID for this upload + */ +export async function prepareUpload( + datasetId: string, + uploadId: string | undefined, +) { + const uuid = uploadId ? uploadId : crypto.randomUUID() + await request(prepareUploadMutation, { + datasetId, + uploadId: uuid, + }) + return uuid +} + +export async function finishUpload() { +} diff --git a/cli/src/logger.ts b/cli/src/logger.ts new file mode 100644 index 000000000..fb8244bdc --- /dev/null +++ b/cli/src/logger.ts @@ -0,0 +1,46 @@ +import { getLogger, handlers, LevelName, Logger, setup } from "./deps.ts" + +const loggerName = "@openneuro/cli" + +/** + * Setup a console logger used with the --debug flag + */ +export function setupLogging(level: LevelName) { + setup({ + handlers: { + console: new handlers.ConsoleHandler(level), + }, + + loggers: { + "@openneuro/cli": { + level, + handlers: ["console"], + }, + }, + }) +} + +export function parseStack(stack: string) { + const lines = stack.split("\n") + const caller = lines[2].trim() + const token = caller.split("at ") + return token[1] +} + +const loggerProxyHandler = { + // deno-lint-ignore no-explicit-any + get: function (_: any, prop: keyof Logger) { + const logger = getLogger(loggerName) + const stack = new Error().stack + if (stack) { + const callerLocation = parseStack(stack) + logger.debug(`Logger invoked at "${callerLocation}"`) + } + const logFunc = logger[prop] as typeof logger.warning + return logFunc.bind(logger) + }, +} + +const logger = new Proxy(getLogger(loggerName), loggerProxyHandler) + +export { logger } diff --git a/cli/src/options.ts b/cli/src/options.ts new file mode 100644 index 000000000..793ec45cd --- /dev/null +++ b/cli/src/options.ts @@ -0,0 +1,49 @@ +import { + Command, + EnumType, + LevelName, + LogLevelNames, + ValidatorOptions, +} from "./deps.ts" + +import { setupLogging } from "./logger.ts" +import { login } from "./commands/login.ts" +import { upload } from "./commands/upload.ts" +import { gitCredential } from "./commands/git-credential.ts" + +export type OpenNeuroOptions = { + localPath?: string + validatorOptions?: ValidatorOptions + debug: LevelName +} + +const openneuroCommand = new Command() + .name("openneuro") + .description( + "OpenNeuro command line tools for uploading, downloading, or syncing datasets. See https://docs.openneuro.org for detailed guides.", + ) + // TODO - Sync this with the node packages + .version("4.20.4") + .globalType("debugLevel", new EnumType(LogLevelNames)) + .globalEnv("LOG=", "Enable debug output.") + .globalAction(({ log }) => { + setupLogging(log ? log : "ERROR") + }) + .command("login", login) + .command("upload", upload) + .command("git-credential", gitCredential) + +/** + * Parse command line options and return a OpenNeuroOptions config + * @param argumentOverride Override the arguments instead of using Deno.args + */ +export async function commandLine( + argumentOverride: string[], +): Promise { + const { args, options } = await openneuroCommand.parse(argumentOverride) + + return { + datasetPath: args[0], + ...options, + } +} diff --git a/docker-compose.yml b/docker-compose.yml index 63247b07e..bff65bd93 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -115,6 +115,21 @@ services: aliases: - datalad-0 - datalad-1 + + # Upload daemon + tusd: + image: docker.io/tusproject/tusd:v2.1.0 + user: root + command: + [ + '--base-path=/tusd/files/', + '--hooks-http=http://server:8111/api/tusd', + '--behind-proxy', + '--hooks-http-forward-headers=authorization', + '--upload-dir=/datasets/tusd' + ] + volumes: + - ${PERSISTENT_DIR}/datalad/tusd:/datasets/tusd:z # nginx + app web: diff --git a/nginx/nginx.dev.conf b/nginx/nginx.dev.conf index c14087c91..0ce346a55 100644 --- a/nginx/nginx.dev.conf +++ b/nginx/nginx.dev.conf @@ -46,6 +46,25 @@ server { proxy_pass http://server:8111; } + location /tusd { + # Forward incoming requests to local tusd instance + proxy_pass http://tusd:8080; + + # Disable request and response buffering + proxy_request_buffering off; + proxy_buffering off; + proxy_http_version 1.1; + + # Add X-Forwarded-* headers + proxy_set_header X-Forwarded-Host $host:9876; + proxy_set_header X-Forwarded-Proto $scheme; + + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + client_max_body_size 0; + } + + # Sitemap path location /sitemap.xml { client_max_body_size 0; diff --git a/packages/openneuro-server/src/handlers/__tests__/tusd.spec.ts b/packages/openneuro-server/src/handlers/__tests__/tusd.spec.ts new file mode 100644 index 000000000..8e51185dc --- /dev/null +++ b/packages/openneuro-server/src/handlers/__tests__/tusd.spec.ts @@ -0,0 +1,15 @@ +import { vi } from "vitest" +import { acceptUpload } from "../tusd" + +vi.mock("../../config.ts") + +describe("tusd handler", () => { + it("generates the expected upload ID", () => { + const accepted = acceptUpload("ds000001", "1234-5678", "path") + expect(accepted.ChangeFileInfo.ID).toMatch( + /^ds[0-9]{6}\/[0-9a-fA-F]{8}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{12}$/, + ) + expect(accepted.ChangeFileInfo.MetaData.datasetId).toEqual("ds000001") + expect(accepted.ChangeFileInfo.MetaData.uploaderId).toEqual("1234-5678") + }) +}) diff --git a/packages/openneuro-server/src/handlers/tusd.ts b/packages/openneuro-server/src/handlers/tusd.ts new file mode 100644 index 000000000..569c74da9 --- /dev/null +++ b/packages/openneuro-server/src/handlers/tusd.ts @@ -0,0 +1,56 @@ +import { checkDatasetWrite } from "../graphql/permissions" + +export function acceptUpload( + datasetId: string, + uploaderId: string, + path: string, +) { + return { + "ChangeFileInfo": { + ID: `${datasetId}:${uploaderId}:${path.replaceAll("/", ":")}`, + MetaData: { + datasetId, + uploaderId, + }, + }, + "RejectUpload": false, + } +} + +export const rejectUpload = { + "HTTPResponse": { + StatusCode: 403, + Body: '{"message":"access denied to dataset"}', + Header: { + "Content-Type": "application/json", + }, + }, + "RejectUpload": true, +} + +export const tusdHandler = (req, res, next) => { + try { + const userId = req.user.id + const userInfo = { + id: userId, + admin: req.user.admin, + } + if (req.body.Type === "pre-create") { + try { + const datasetId = req.body.Event.Upload.MetaData.datasetId + if (checkDatasetWrite(datasetId, userId, userInfo)) { + const path = req.body.Event.Upload.MetaData.relativePath + res.json(acceptUpload(datasetId, userId, path)) + } else { + res.json(rejectUpload) + } + } catch (_err) { + res.status(400) + res.send("`datasetId` MetaData parameter is required to upload") + } + } + } catch (err) { + res.status(401) + next(err) + } +} diff --git a/packages/openneuro-server/src/routes.ts b/packages/openneuro-server/src/routes.ts index 5e9a23132..3f455c4fb 100644 --- a/packages/openneuro-server/src/routes.ts +++ b/packages/openneuro-server/src/routes.ts @@ -14,6 +14,7 @@ import * as auth from "./libs/authentication/states" import * as doi from "./handlers/doi" import { sitemapHandler } from "./handlers/sitemap" import { reviewerHandler } from "./handlers/reviewer" +import { tusdHandler } from "./handlers/tusd" const noCache = (req, res, next) => { res.setHeader("Surrogate-Control", "no-store") @@ -170,6 +171,13 @@ const routes = [ url: "/sitemap", handler: sitemapHandler, }, + // tusd upload hooks + { + method: "post", + url: "/tusd", + middleware: [noCache, jwt.authenticate, auth.authenticated], + handler: tusdHandler, + }, ] // initialize routes ------------------------------- diff --git a/tsconfig.json b/tsconfig.json index 689f0dfaa..af8e1e466 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -9,7 +9,7 @@ "module": "commonjs", "moduleResolution": "node", "resolveJsonModule": true, - "lib": ["es2020", "dom", "webworker"], + "lib": ["es2021", "dom", "webworker"], "skipLibCheck": true, "incremental": true, "composite": true, From a9003ff47e43bbeac76123fe4623bad950c2c3f3 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Wed, 17 Jan 2024 14:54:06 -0800 Subject: [PATCH 02/35] feat(cli): Implement very basic .gitattributes parser --- cli/src/commands/git-credential.ts | 27 +++++--- cli/src/commands/upload.ts | 89 +++++++++++++----------- cli/src/fetchHttpStack.ts | 104 ----------------------------- cli/src/gitattributes.test.ts | 34 ++++++++++ cli/src/gitattributes.ts | 68 +++++++++++++++++++ cli/src/worker/git.ts | 56 ++++++++++++++++ 6 files changed, 226 insertions(+), 152 deletions(-) delete mode 100644 cli/src/fetchHttpStack.ts create mode 100644 cli/src/gitattributes.test.ts create mode 100644 cli/src/gitattributes.ts create mode 100644 cli/src/worker/git.ts diff --git a/cli/src/commands/git-credential.ts b/cli/src/commands/git-credential.ts index c621bc39f..fc8eb1baf 100644 --- a/cli/src/commands/git-credential.ts +++ b/cli/src/commands/git-credential.ts @@ -1,4 +1,5 @@ import { Command, TextLineStream } from "../deps.ts" +import { getConfig } from "./login.ts" const prepareRepoAccess = ` mutation prepareRepoAccess($datasetId: ID!) { @@ -9,18 +10,26 @@ const prepareRepoAccess = ` } ` -export function getRepoToken(datasetId?: string) { - /* - return client - .mutate({ - mutation: prepareRepoAccess, +export async function getRepoAccess(datasetId?: string) { + const config = getConfig() + const req = await fetch(`${config.url}/crn/graphql`, { + method: "POST", + headers: { + "Content-Type": "application/json", + "Authorization": `Bearer ${config.token}`, // Long lived token + }, + body: JSON.stringify({ + query: prepareRepoAccess, variables: { datasetId, }, - }) - .then(({ data }) => data.prepareRepoAccess.token) - */ - return "token" + }), + }) + const { data } = await req.json() + return { + token: data.prepareRepoAccess.token, // Short lived repo access token + endpoint: data.prepareRepoAccess.endpoint, + } } /** diff --git a/cli/src/commands/upload.ts b/cli/src/commands/upload.ts index c7fe73bd4..f32b1aea9 100644 --- a/cli/src/commands/upload.ts +++ b/cli/src/commands/upload.ts @@ -1,17 +1,9 @@ -import { FetchHttpStack } from "../fetchHttpStack.ts" import { validateCommand } from "./validate.ts" import { ClientConfig, getConfig } from "./login.ts" import { logger } from "../logger.ts" -import { - Confirm, - ProgressBar, - relative, - resolve, - Tus, - Uppy, - walk, -} from "../deps.ts" +import { Confirm, join, ProgressBar, relative, resolve, walk } from "../deps.ts" import type { CommandOptions } from "../deps.ts" +import { getRepoAccess } from "./git-credential.ts" export function readConfig(): ClientConfig { const config = getConfig() @@ -26,11 +18,25 @@ export function readConfig(): ClientConfig { return config } +async function getRepoDir(url: URL): Promise { + const LOCAL_STORAGE_KEY = `openneuro_cli_${url.hostname}_` + const repoDir = localStorage.getItem(LOCAL_STORAGE_KEY) + if (repoDir) { + return repoDir + } else { + const tmpDir = await Deno.makeTempDir({ prefix: LOCAL_STORAGE_KEY }) + localStorage.setItem(LOCAL_STORAGE_KEY, tmpDir) + return tmpDir + } +} + export async function uploadAction( options: CommandOptions, dataset_directory: string, ) { const clientConfig = readConfig() + const serverUrl = new URL(clientConfig.url) + const repoDir = await getRepoDir(serverUrl) const dataset_directory_abs = resolve(dataset_directory) logger.info( `upload ${dataset_directory} resolved to ${dataset_directory_abs}`, @@ -38,7 +44,7 @@ export async function uploadAction( // TODO - call the validator here - let datasetId = "ds001001" + let datasetId = "ds001130" if (options.dataset) { datasetId = options.dataset } else { @@ -52,30 +58,38 @@ export async function uploadAction( } } // TODO Create dataset here - datasetId = "ds001001" + datasetId = "ds001130" } - // Setup upload - const uppy = new Uppy({ - id: "@openneuro/cli", - autoProceed: true, - debug: true, - }).use(Tus, { - endpoint: "http://localhost:9876/tusd/files/", - chunkSize: 64000000, // ~64MB - uploadLengthDeferred: true, - headers: { - Authorization: `Bearer ${clientConfig.token}`, - }, - httpStack: new FetchHttpStack(), + + // Create the git worker + const worker = new Worker(new URL("../worker/git.ts", import.meta.url).href, { + type: "module", }) + const repoPath = join(repoDir, datasetId) + const { token, endpoint } = await getRepoAccess(datasetId) + await Deno.mkdir(repoPath, { recursive: true }) + // Configure worker + worker.postMessage({ + "command": "setContext", + "datasetId": datasetId, + "sourcePath": dataset_directory_abs, + "repoPath": repoPath, + "repoUrl": endpoint, + "authorization": `Bearer ${token}`, + }) + + /* const progressBar = new ProgressBar({ title: "Upload", total: 100, }) - progressBar.render(0) - uppy.on("progress", (progress) => { - progressBar.render(progress) + progressBar.render(0)*/ + + console.log(join(repoDir, datasetId)) + worker.postMessage({ + "command": "clone", + "url": "https://staging.openneuro.org/git/2/ds001130", }) // Upload all files @@ -85,19 +99,16 @@ export async function uploadAction( includeSymlinks: false, }) ) { - const file = await Deno.open(walkEntry.path) + //const file = await Deno.open(walkEntry.path) const relativePath = relative(dataset_directory_abs, walkEntry.path) - const uppyFile = { - name: walkEntry.name, - data: file.readable.getReader(), - meta: { - datasetId, - relativePath, - }, - } - logger.debug(JSON.stringify({ name: uppyFile.name, meta: uppyFile.meta })) - uppy.addFile(uppyFile) + /*worker.postMessage({ + "command": "add", + "path": walkEntry.path, + "relativePath": relativePath, + })*/ } + + //worker.postMessage({ command: "close" }) } /** diff --git a/cli/src/fetchHttpStack.ts b/cli/src/fetchHttpStack.ts deleted file mode 100644 index f6d5b04a6..000000000 --- a/cli/src/fetchHttpStack.ts +++ /dev/null @@ -1,104 +0,0 @@ -/** - * tus-js-client fetch stack adapter. - * - * Useful for environments such as Deno or web workers without native XMLHttpRequest. - */ -export class FetchHttpStack { - createRequest(method: string, url: string) { - return new FetchHttpRequest(method, url) - } - getName(): string { - return "FetchHttpStack" - } -} - -export class FetchHttpRequest { - #headers: Headers - #method: string - #url: string - #abortController: AbortController - #request: Request - #progressHandler?: (bytesSent: number) => void - - constructor(method: string, url: string) { - this.#abortController = new AbortController() - this.#headers = new Headers() - this.#method = method - this.#url = url - // This is a dummy request that is replaced later (for API compatibility only) - this.#request = new Request(url, { method }) - } - - getMethod() { - return this.#method - } - - getURL() { - return this.#url - } - - setHeader(header: string, value: string) { - this.#headers.set(header, value) - } - - getHeader(header: string) { - this.#headers.get(header) - } - - setProgressHandler(progressHandler: (bytesSent: number) => void): void { - this.#progressHandler = progressHandler - } - // Send the HTTP request with the provided request body. The value of the request body depends - // on the platform and what `fileReader` implementation is used. With the default `fileReader`, - // `body` can be - // - in browsers: a TypedArray, a DataView a Blob, or null. - // - in Node.js: a Buffer, a ReadableStream, or null. - async send(body: ReadableStream): Promise { - this.#request = new Request(this.#url, { - body, - signal: this.#abortController.signal, - method: this.#method, - headers: this.#headers, - }) - const response = await fetch(this.#request) - const text = await response.text() - return new FetchHttpResponse(response, text) - } - - abort(): Promise { - return new Promise((resolve) => { - this.#abortController.signal.onabort = () => { - resolve() - } - this.#abortController.abort() - }) - } - - // Return an environment specific object, e.g. the XMLHttpRequest object in browsers. - async getUnderlyingObject(): Promise { - return this.#request - } -} - -export class FetchHttpResponse { - #response: Response - #body: string - constructor(response: Response, body: string) { - this.#response = response - this.#body = body - } - getStatus(): number { - return this.#response.status - } - getHeader(header: string): string | null { - return this.#response.headers.get(header) - } - getBody(): string { - return this.#body - } - - // Return an environment specific object, e.g. the XMLHttpRequest object in browsers. - getUnderlyingObject(): Response { - return this.#response - } -} diff --git a/cli/src/gitattributes.test.ts b/cli/src/gitattributes.test.ts new file mode 100644 index 000000000..b3cdff32a --- /dev/null +++ b/cli/src/gitattributes.test.ts @@ -0,0 +1,34 @@ +import { assertEquals } from "./deps.ts" +import { parseGitAttributes } from "./gitattributes.ts" + +const testAttributes = `* annex.backend=SHA256E +**/.git* annex.largefiles=nothing +*.bval annex.largefiles=nothing +*.bvec annex.largefiles=nothing +*.json annex.largefiles=largerthan=1mb +phenotype/*.tsv annex.largefiles=anything +*.tsv annex.largefiles=largerthan=1mb +dataset_description.json annex.largefiles=nothing +.bidsignore annex.largefiles=nothing +CHANGES annex.largefiles=nothing +README* annex.largefiles=nothing +LICENSE annex.largefiles=nothing annex.backend=MD5E +` + +Deno.test("parses a git-annex .gitattributes file", async () => { + const parsed = parseGitAttributes(testAttributes) + assertEquals(parsed, { + "*": { backend: "SHA256E" }, + "**/.git*": { largefiles: Infinity }, + "*.bval": { largefiles: Infinity }, + "*.bvec": { largefiles: Infinity }, + "*.json": { largefiles: 1024 * 1024 }, + "phenotype/*.tsv": { largefiles: 0 }, + "*.tsv": { largefiles: 1024 * 1024 }, + "dataset_description.json": { largefiles: Infinity }, + ".bidsignore": { largefiles: Infinity }, + "CHANGES": { largefiles: Infinity }, + "README*": { largefiles: Infinity }, + "LICENSE": { largefiles: Infinity, backend: "MD5E" }, + }) +}) diff --git a/cli/src/gitattributes.ts b/cli/src/gitattributes.ts new file mode 100644 index 000000000..4adf3f2f3 --- /dev/null +++ b/cli/src/gitattributes.ts @@ -0,0 +1,68 @@ +/** + * Git annex supports many backends, we support a limited subset used by OpenNeuro (for now) + * https://git-annex.branchable.com/backends/ + */ +enum SupportedAnnexBackends { + MD5E = "MD5E", + SHA256E = "SHA256E", +} + +/** + * Annex attributes for one path + */ +interface GitAnnexAttributeOptions { + largefiles?: number + backend?: SupportedAnnexBackends +} + +/** + * Minimal parsing of .gitattributes for uploader usage + */ +type GitAnnexAttributes = Record + +/** + * Parse any relevant annex options from .gitattributes + * @param gitattributes A .gitattributes file in string format + */ +export function parseGitAttributes(gitattributes: string): GitAnnexAttributes { + const attributesObject: GitAnnexAttributes = {} + for (const line of gitattributes.split("\n")) { + if (line.length < 3) { + continue + } + const [prefix, ...rest] = line.split(" ") + attributesObject[prefix] = {} + for (const attr of rest) { + const eqIndex = attr.indexOf("=") + const key = attr.substring(0, eqIndex) + const value = attr.substring(eqIndex + 1) + if (key === "annex.largefiles") { + if (value === "nothing") { + attributesObject[prefix].largefiles = Infinity + } else if (value === "anything") { + attributesObject[prefix].largefiles = 0 + } else if (value.startsWith("largerthan=")) { + const size = value.split("largerthan=")[1].toLowerCase() + if (size.endsWith("kb")) { + attributesObject[prefix].largefiles = Number(size.slice(0, -2)) * + 1024 + } else if (size.endsWith("mb")) { + attributesObject[prefix].largefiles = Number(size.slice(0, -2)) * + 1024 * 1024 + } else if (size.endsWith("gb")) { + attributesObject[prefix].largefiles = Number(size.slice(0, -2)) * + 1024 * 1024 * 1024 + } else if (size.endsWith("tb")) { + attributesObject[prefix].largefiles = Number(size.slice(0, -2)) * + 1024 * 1024 * 1024 * 1024 + } else { + attributesObject[prefix].largefiles = Number(size) + } + } + } else if (key === "annex.backend") { + attributesObject[prefix].backend = value as SupportedAnnexBackends + } + } + } + return attributesObject +} diff --git a/cli/src/worker/git.ts b/cli/src/worker/git.ts new file mode 100644 index 000000000..8e9efa590 --- /dev/null +++ b/cli/src/worker/git.ts @@ -0,0 +1,56 @@ +// Might be useful if this is shared by the browser uploader at some point +import "https://deno.land/x/indexeddb@1.3.5/polyfill.ts" +import LightningFS from "https://esm.sh/@isomorphic-git/lightning-fs@4.6.0" +import git from "https://esm.sh/isomorphic-git@1.25.3" +import http from "https://esm.sh/isomorphic-git@1.25.3/http/node" +//import fs from "node:fs" + +const context = { + // Current working dataset + datasetId: undefined, + // The path being uploaded from to OpenNeuro + sourcePath: undefined, + // The path of our local clone (possibly in virtual fs) + repoPath: undefined, + // URL for the remote git repo + repoEndpoint: undefined, + // OpenNeuro git access short lived API key + authorization: undefined, + fs: undefined, + // setContext has been called at least once + initialized: false, +} + +self.addEventListener("unhandledrejection", (e) => { + console.log(e.reason) + console.log(e.reason.stack) + e.preventDefault() +}) + +self.onmessage = async (event) => { + if (event.data.command === "setContext") { + context.datasetId = event.data.datasetId + context.sourcePath = event.data.sourcePath + context.repoPath = event.data.repoPath + context.repoEndpoint = event.data.repoEndpoint + context.authorization = event.data.authorization + context.fs = new LightningFS(context.datasetId) + context.initialized = true + } else if (event.data.command === "clone") { + await git.clone({ + fs: context.fs, + http, + dir: context.repoPath, + url: event.data.url, + singleBranch: true, + depth: 1, + headers: { + Authorization: context.authorization, + }, + }) + } else if (event.data.command === "add") { + console.log(event.data.path) + } else if (event.data.command === "close") { + globalThis.close() + } +} From ad52a00a08c0391cad62f9293f01444a4b9a2708 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Wed, 17 Jan 2024 14:54:27 -0800 Subject: [PATCH 03/35] feat(cli): Support adding git objects to upload repo --- cli/src/commands/git-credential.test.ts | 5 +- cli/src/commands/git-credential.ts | 4 +- cli/src/commands/upload.ts | 20 ++-- cli/src/deps.ts | 1 + cli/src/graphq.ts | 1 - cli/src/logger.ts | 3 + cli/src/worker/git.ts | 151 ++++++++++++++++++------ 7 files changed, 133 insertions(+), 52 deletions(-) diff --git a/cli/src/commands/git-credential.test.ts b/cli/src/commands/git-credential.test.ts index 7e605370f..5f2117b16 100644 --- a/cli/src/commands/git-credential.test.ts +++ b/cli/src/commands/git-credential.test.ts @@ -12,6 +12,9 @@ Deno.test("git-credential parses stdin correctly", async () => { controller.close() }, }) - const output = await gitCredentialAction(stdin, () => "token") + const output = await gitCredentialAction( + stdin, + async () => ({ token: "token", endpoint: 2 }), + ) assertEquals(output, "username=@openneuro/cli\npassword=token\n") }) diff --git a/cli/src/commands/git-credential.ts b/cli/src/commands/git-credential.ts index fc8eb1baf..d9410361d 100644 --- a/cli/src/commands/git-credential.ts +++ b/cli/src/commands/git-credential.ts @@ -37,7 +37,7 @@ export async function getRepoAccess(datasetId?: string) { */ export async function gitCredentialAction( stdinReadable: ReadableStream = Deno.stdin.readable, - tokenGetter = getRepoToken, + tokenGetter = getRepoAccess, ) { let pipeOutput = "" const credential: Record = {} @@ -51,7 +51,7 @@ export async function gitCredentialAction( } if ("path" in credential && credential.path) { const datasetId = credential.path.split("/").pop() - const token = await tokenGetter(datasetId) + const { token } = await tokenGetter(datasetId) const output: Record = { username: "@openneuro/cli", password: token, diff --git a/cli/src/commands/upload.ts b/cli/src/commands/upload.ts index f32b1aea9..cfe20ecb0 100644 --- a/cli/src/commands/upload.ts +++ b/cli/src/commands/upload.ts @@ -11,9 +11,9 @@ export function readConfig(): ClientConfig { `configured with URL "${config.url}" and token "${ config.token.slice( 0, - 3, + 4, ) - }...${config.token.slice(-3)}`, + }...${config.token.slice(-4)}"`, ) return config } @@ -75,8 +75,9 @@ export async function uploadAction( "datasetId": datasetId, "sourcePath": dataset_directory_abs, "repoPath": repoPath, - "repoUrl": endpoint, + "repoEndpoint": `${clientConfig.url}/git/${endpoint}/${datasetId}`, "authorization": `Bearer ${token}`, + "logLevel": logger.levelName, }) /* @@ -89,26 +90,25 @@ export async function uploadAction( console.log(join(repoDir, datasetId)) worker.postMessage({ "command": "clone", - "url": "https://staging.openneuro.org/git/2/ds001130", }) // Upload all files for await ( - const walkEntry of walk(dataset_directory, { + const walkEntry of walk(dataset_directory_abs, { includeDirs: false, includeSymlinks: false, }) ) { - //const file = await Deno.open(walkEntry.path) const relativePath = relative(dataset_directory_abs, walkEntry.path) - /*worker.postMessage({ + worker.postMessage({ "command": "add", "path": walkEntry.path, "relativePath": relativePath, - })*/ + "annexed": false, + }) } - - //worker.postMessage({ command: "close" }) + // Close after all tasks are queued + worker.postMessage({ command: "close" }) } /** diff --git a/cli/src/deps.ts b/cli/src/deps.ts index 9942f3063..1c4752cb1 100644 --- a/cli/src/deps.ts +++ b/cli/src/deps.ts @@ -38,6 +38,7 @@ export { walk } from "https://deno.land/std@0.212.0/fs/walk.ts" export { resolve } from "https://deno.land/std@0.212.0/path/resolve.ts" export { relative } from "https://deno.land/std@0.212.0/path/relative.ts" export { join } from "https://deno.land/std@0.212.0/path/join.ts" +export { ensureLink } from "https://deno.land/std@0.212.0/fs/ensure_link.ts" // Test suites export { assert, diff --git a/cli/src/graphq.ts b/cli/src/graphq.ts index b9d4780cc..61e21c9da 100644 --- a/cli/src/graphq.ts +++ b/cli/src/graphq.ts @@ -2,7 +2,6 @@ * Minimalist OpenNeuro client with no dependencies */ -import { unicodeWidth } from "https://deno.land/std@0.196.0/console/unicode_width.ts" import { getConfig } from "./commands/login.ts" import { QueryError } from "./error.ts" diff --git a/cli/src/logger.ts b/cli/src/logger.ts index fb8244bdc..b81eb76a4 100644 --- a/cli/src/logger.ts +++ b/cli/src/logger.ts @@ -31,6 +31,9 @@ const loggerProxyHandler = { // deno-lint-ignore no-explicit-any get: function (_: any, prop: keyof Logger) { const logger = getLogger(loggerName) + if (prop === "levelName" || prop === "level") { + return logger[prop] + } const stack = new Error().stack if (stack) { const callerLocation = parseStack(stack) diff --git a/cli/src/worker/git.ts b/cli/src/worker/git.ts index 8e9efa590..23321d53f 100644 --- a/cli/src/worker/git.ts +++ b/cli/src/worker/git.ts @@ -1,56 +1,131 @@ // Might be useful if this is shared by the browser uploader at some point import "https://deno.land/x/indexeddb@1.3.5/polyfill.ts" -import LightningFS from "https://esm.sh/@isomorphic-git/lightning-fs@4.6.0" -import git from "https://esm.sh/isomorphic-git@1.25.3" -import http from "https://esm.sh/isomorphic-git@1.25.3/http/node" -//import fs from "node:fs" +import git from "npm:isomorphic-git@1.25.3" +import http from "npm:isomorphic-git@1.25.3/http/node/index.js" +import fs from "node:fs" +import { ensureLink, join } from "../deps.ts" +import { logger, setupLogging } from "../logger.ts" -const context = { +// This error originates in isomorphic-git due to a bug in Deno 1.39.4 +// https://github.com/denoland/deno/issues/21795 +self.addEventListener("unhandledrejection", (e) => { + if (String(e?.reason)?.endsWith("readfile ''")) { + e.preventDefault() + } +}) + +interface GitContext { // Current working dataset - datasetId: undefined, + datasetId: string // The path being uploaded from to OpenNeuro - sourcePath: undefined, + sourcePath: string // The path of our local clone (possibly in virtual fs) - repoPath: undefined, + repoPath: string // URL for the remote git repo - repoEndpoint: undefined, + repoEndpoint: string // OpenNeuro git access short lived API key - authorization: undefined, - fs: undefined, - // setContext has been called at least once - initialized: false, + authorization: string + // .gitattributes + attributes?: GitAnnexAttributes } -self.addEventListener("unhandledrejection", (e) => { - console.log(e.reason) - console.log(e.reason.stack) - e.preventDefault() -}) +let context: GitContext +// Shut down if this is set +let done = false -self.onmessage = async (event) => { - if (event.data.command === "setContext") { - context.datasetId = event.data.datasetId - context.sourcePath = event.data.sourcePath - context.repoPath = event.data.repoPath - context.repoEndpoint = event.data.repoEndpoint - context.authorization = event.data.authorization - context.fs = new LightningFS(context.datasetId) - context.initialized = true - } else if (event.data.command === "clone") { +function shutdownIfDone() { + if (done) { + globalThis.close() + } +} + +function gitOptions(dir) { + return { + fs, + http, + dir, + url: context.repoEndpoint, + headers: { + Authorization: context.authorization, + }, + } +} + +/** + * Clone or fetch the draft + */ +async function update() { + const options = gitOptions(context.repoPath) + try { + await fs.promises.access(join(dir, ".git")) + logger.info( + `Fetching ${context.datasetId} draft from "${context.repoEndpoint}"`, + ) + await git.fetch(options) + } catch (_err) { + logger.info( + `Cloning ${context.datasetId} draft from "${context.repoEndpoint}"`, + ) await git.clone({ - fs: context.fs, - http, - dir: context.repoPath, - url: event.data.url, + ...options, singleBranch: true, depth: 1, - headers: { - Authorization: context.authorization, - }, }) - } else if (event.data.command === "add") { - console.log(event.data.path) - } else if (event.data.command === "close") { + } + try { + const oid = await git.resolveRef({ ...options, ref: "main" }) || + await git.resolveRef({ ...options, ref: "master" }) + context.attributes = new TextDecoder().decode( + (await git.readBlob({ ...options, oid, filepath: ".gitattributes" })) + .blob, + ) + } catch (_err) { + logger.error( + "Dataset repository is missing .gitattributes and may be improperly initialized.", + ) globalThis.close() } + logger.info(`${context.datasetId} draft fetched!`) +} + +/** + * git-annex add equivalent + */ +async function add(event) { + if (event.data.annexed) { + // Compute hash and add link + } else { + // Simple add case + const options = { + ...gitOptions(context.repoPath), + filepath: event.data.relativePath, + } + return + // Hard link to the target location + await ensureLink( + event.data.path, + join(context.repoPath, event.data.relativePath), + ) + await git.add(options) + logger.info(`Added ${event.data.relativePath}`) + } +} + +self.onmessage = async (event) => { + if (event.data.command === "setContext") { + context = { + datasetId: event.data.datasetId, + sourcePath: event.data.sourcePath, + repoPath: event.data.repoPath, + repoEndpoint: event.data.repoEndpoint, + authorization: event.data.authorization, + } + setupLogging(event.data.logLevel) + } else if (event.data.command === "clone") { + await update() + } else if (event.data.command === "add") { + await add(event) + } else if (event.data.command === "done") { + done = true + } } From 25823cae11ab706ce2e4ce1c5a90e03c2bc5e6bf Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Wed, 17 Jan 2024 16:00:47 -0800 Subject: [PATCH 04/35] feat(cli): Add support for matching .gitattributes files --- cli/src/commands/upload.ts | 1 - cli/src/deps.ts | 3 +++ cli/src/gitattributes.test.ts | 32 ++++++++++++++++++++++------ cli/src/gitattributes.ts | 40 +++++++++++++++++++++++++++++++---- cli/src/worker/git.ts | 13 +++++++----- 5 files changed, 73 insertions(+), 16 deletions(-) diff --git a/cli/src/commands/upload.ts b/cli/src/commands/upload.ts index cfe20ecb0..24c471ccf 100644 --- a/cli/src/commands/upload.ts +++ b/cli/src/commands/upload.ts @@ -104,7 +104,6 @@ export async function uploadAction( "command": "add", "path": walkEntry.path, "relativePath": relativePath, - "annexed": false, }) } // Close after all tasks are queued diff --git a/cli/src/deps.ts b/cli/src/deps.ts index 1c4752cb1..573286cfb 100644 --- a/cli/src/deps.ts +++ b/cli/src/deps.ts @@ -43,6 +43,7 @@ export { ensureLink } from "https://deno.land/std@0.212.0/fs/ensure_link.ts" export { assert, assertEquals, + assertObjectMatch, } from "https://deno.land/std@0.212.0/assert/mod.ts" export { assertSpyCalls, @@ -51,3 +52,5 @@ export { } from "https://deno.land/std@0.212.0/testing/mock.ts" // Progress bars export { default as ProgressBar } from "https://deno.land/x/progress@v1.3.9/mod.ts" +// Ignore library +export { default as ignore } from "npm:ignore@5.3.0" diff --git a/cli/src/gitattributes.test.ts b/cli/src/gitattributes.test.ts index b3cdff32a..087a44159 100644 --- a/cli/src/gitattributes.test.ts +++ b/cli/src/gitattributes.test.ts @@ -1,5 +1,5 @@ -import { assertEquals } from "./deps.ts" -import { parseGitAttributes } from "./gitattributes.ts" +import { assertEquals, assertObjectMatch } from "./deps.ts" +import { matchGitAttributes, parseGitAttributes } from "./gitattributes.ts" const testAttributes = `* annex.backend=SHA256E **/.git* annex.largefiles=nothing @@ -15,20 +15,40 @@ README* annex.largefiles=nothing LICENSE annex.largefiles=nothing annex.backend=MD5E ` -Deno.test("parses a git-annex .gitattributes file", async () => { +Deno.test("parseGitAttributes() parses a git-annex .gitattributes file", async () => { const parsed = parseGitAttributes(testAttributes) - assertEquals(parsed, { + assertObjectMatch(parsed, { "*": { backend: "SHA256E" }, - "**/.git*": { largefiles: Infinity }, + "**/.git*": { + largefiles: Infinity, + }, "*.bval": { largefiles: Infinity }, "*.bvec": { largefiles: Infinity }, "*.json": { largefiles: 1024 * 1024 }, "phenotype/*.tsv": { largefiles: 0 }, "*.tsv": { largefiles: 1024 * 1024 }, - "dataset_description.json": { largefiles: Infinity }, + "dataset_description.json": { + largefiles: Infinity, + }, ".bidsignore": { largefiles: Infinity }, "CHANGES": { largefiles: Infinity }, "README*": { largefiles: Infinity }, "LICENSE": { largefiles: Infinity, backend: "MD5E" }, }) }) + +Deno.test("matchGitAttributes() matches any relevant rules for a path", async () => { + const attr = parseGitAttributes(testAttributes) + assertEquals(matchGitAttributes(attr, "derivatives/test_file.json"), { + backend: "SHA256E", + largefiles: 1024 * 1024, + }) + assertEquals(matchGitAttributes(attr, "dataset_description.json"), { + backend: "SHA256E", + largefiles: Infinity, + }) + assertEquals(matchGitAttributes(attr, "LICENSE"), { + backend: "MD5E", + largefiles: Infinity, + }) +}) diff --git a/cli/src/gitattributes.ts b/cli/src/gitattributes.ts index 4adf3f2f3..b00cfbbf7 100644 --- a/cli/src/gitattributes.ts +++ b/cli/src/gitattributes.ts @@ -1,8 +1,11 @@ +import { string } from "https://deno.land/x/cliffy@v1.0.0-rc.3/flags/types/string.ts" +import { ignore } from "./deps.ts" + /** * Git annex supports many backends, we support a limited subset used by OpenNeuro (for now) * https://git-annex.branchable.com/backends/ */ -enum SupportedAnnexBackends { +export enum SupportedAnnexBackends { MD5E = "MD5E", SHA256E = "SHA256E", } @@ -10,15 +13,16 @@ enum SupportedAnnexBackends { /** * Annex attributes for one path */ -interface GitAnnexAttributeOptions { +export interface GitAnnexAttributeOptions { largefiles?: number backend?: SupportedAnnexBackends + match: ignore.Ignore } /** * Minimal parsing of .gitattributes for uploader usage */ -type GitAnnexAttributes = Record +export type GitAnnexAttributes = Record /** * Parse any relevant annex options from .gitattributes @@ -31,7 +35,9 @@ export function parseGitAttributes(gitattributes: string): GitAnnexAttributes { continue } const [prefix, ...rest] = line.split(" ") - attributesObject[prefix] = {} + attributesObject[prefix] = { + match: ignore.default().add(prefix), + } for (const attr of rest) { const eqIndex = attr.indexOf("=") const key = attr.substring(0, eqIndex) @@ -66,3 +72,29 @@ export function parseGitAttributes(gitattributes: string): GitAnnexAttributes { } return attributesObject } + +interface MatchingAnnexAttributes { + backend?: SupportedAnnexBackends + largefiles?: number +} + +/** + * Return any matching values merged for a given path + */ +export function matchGitAttributes( + attributes: GitAnnexAttributes, + path: string, +) { + const matching: MatchingAnnexAttributes = {} + for (const [prefix, attr] of Object.entries(attributes)) { + if (attr.match.test(path).ignored == true) { + if ("backend" in attr) { + matching.backend = attr.backend + } + if ("largefiles" in attr) { + matching.largefiles = attr.largefiles + } + } + } + return matching +} diff --git a/cli/src/worker/git.ts b/cli/src/worker/git.ts index 23321d53f..a2cf3abbd 100644 --- a/cli/src/worker/git.ts +++ b/cli/src/worker/git.ts @@ -3,6 +3,7 @@ import "https://deno.land/x/indexeddb@1.3.5/polyfill.ts" import git from "npm:isomorphic-git@1.25.3" import http from "npm:isomorphic-git@1.25.3/http/node/index.js" import fs from "node:fs" +import { GitAnnexAttributes, parseGitAttributes } from "../gitattributes.ts" import { ensureLink, join } from "../deps.ts" import { logger, setupLogging } from "../logger.ts" @@ -75,10 +76,13 @@ async function update() { try { const oid = await git.resolveRef({ ...options, ref: "main" }) || await git.resolveRef({ ...options, ref: "master" }) - context.attributes = new TextDecoder().decode( - (await git.readBlob({ ...options, oid, filepath: ".gitattributes" })) - .blob, - ) + const rawAttributes = await git.readBlob({ + ...options, + oid, + filepath: ".gitattributes", + }) + const stringAttributes = new TextDecoder().decode(rawAttributes.blob) + context.attributes = parseGitAttributes(stringAttributes) } catch (_err) { logger.error( "Dataset repository is missing .gitattributes and may be improperly initialized.", @@ -100,7 +104,6 @@ async function add(event) { ...gitOptions(context.repoPath), filepath: event.data.relativePath, } - return // Hard link to the target location await ensureLink( event.data.path, From a79207465774c97edc5b3df9c601d02fc9ccd31d Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Mon, 22 Jan 2024 10:17:08 -0800 Subject: [PATCH 05/35] feat(cli): Add support for determining annexed files --- cli/src/worker/git.ts | 83 +++++++++++++++++++++++++++++++------------ 1 file changed, 61 insertions(+), 22 deletions(-) diff --git a/cli/src/worker/git.ts b/cli/src/worker/git.ts index a2cf3abbd..00bc269cf 100644 --- a/cli/src/worker/git.ts +++ b/cli/src/worker/git.ts @@ -3,7 +3,11 @@ import "https://deno.land/x/indexeddb@1.3.5/polyfill.ts" import git from "npm:isomorphic-git@1.25.3" import http from "npm:isomorphic-git@1.25.3/http/node/index.js" import fs from "node:fs" -import { GitAnnexAttributes, parseGitAttributes } from "../gitattributes.ts" +import { + GitAnnexAttributes, + matchGitAttributes, + parseGitAttributes, +} from "../gitattributes.ts" import { ensureLink, join } from "../deps.ts" import { logger, setupLogging } from "../logger.ts" @@ -26,11 +30,10 @@ interface GitContext { repoEndpoint: string // OpenNeuro git access short lived API key authorization: string - // .gitattributes - attributes?: GitAnnexAttributes } let context: GitContext +let attributesCache: GitAnnexAttributes // Shut down if this is set let done = false @@ -58,7 +61,7 @@ function gitOptions(dir) { async function update() { const options = gitOptions(context.repoPath) try { - await fs.promises.access(join(dir, ".git")) + await fs.promises.access(join(context.repoPath, ".git")) logger.info( `Fetching ${context.datasetId} draft from "${context.repoEndpoint}"`, ) @@ -73,30 +76,63 @@ async function update() { depth: 1, }) } - try { - const oid = await git.resolveRef({ ...options, ref: "main" }) || - await git.resolveRef({ ...options, ref: "master" }) - const rawAttributes = await git.readBlob({ - ...options, - oid, - filepath: ".gitattributes", - }) - const stringAttributes = new TextDecoder().decode(rawAttributes.blob) - context.attributes = parseGitAttributes(stringAttributes) - } catch (_err) { - logger.error( - "Dataset repository is missing .gitattributes and may be improperly initialized.", - ) - globalThis.close() - } logger.info(`${context.datasetId} draft fetched!`) } +/** + * Load or return a cache copy of .gitattributes + */ +async function getGitAttributes(): Promise { + if (!attributesCache) { + const options = gitOptions(context.repoPath) + try { + const oid = await git.resolveRef({ ...options, ref: "main" }) || + await git.resolveRef({ ...options, ref: "master" }) + const rawAttributes = await git.readBlob({ + ...options, + oid, + filepath: ".gitattributes", + }) + const stringAttributes = new TextDecoder().decode(rawAttributes.blob) + attributesCache = parseGitAttributes(stringAttributes) + } catch (_err) { + logger.error( + "Dataset repository is missing .gitattributes and may be improperly initialized.", + ) + globalThis.close() + } + } + return attributesCache +} + +/** + * Decide if this incoming file is annexed or not + */ +async function shouldBeAnnexed(absolutePath: string, relativePath: string) { + const gitAttributes = await getGitAttributes() + const attributes = matchGitAttributes(gitAttributes, relativePath) + if (attributes.largefiles) { + const { size } = await Deno.stat(absolutePath) + if (size > attributes.largefiles) { + return true + } else { + return false + } + } + // No rules matched, default to annex + return true +} + /** * git-annex add equivalent */ async function add(event) { - if (event.data.annexed) { + const annexed = await shouldBeAnnexed( + event.data.path, + event.data.relativePath, + ) + console.log(event.data.path, annexed) + if (annexed) { // Compute hash and add link } else { // Simple add case @@ -104,10 +140,13 @@ async function add(event) { ...gitOptions(context.repoPath), filepath: event.data.relativePath, } + const targetPath = join(context.repoPath, event.data.relativePath) + // Remove the target + await Deno.remove(targetPath) // Hard link to the target location await ensureLink( event.data.path, - join(context.repoPath, event.data.relativePath), + targetPath, ) await git.add(options) logger.info(`Added ${event.data.relativePath}`) From 680c57451b2971b98951b6a61f1f25dbef20fa8e Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Thu, 25 Jan 2024 18:02:39 -0800 Subject: [PATCH 06/35] feat(cli): Implement git-annex hashing --- cli/src/commands/upload.ts | 4 ++ cli/src/deps.ts | 2 +- cli/src/gitattributes.ts | 12 ++--- cli/src/worker/git.ts | 93 +++++++++++++++++++++++--------------- cli/src/worker/queue.ts | 23 ++++++++++ 5 files changed, 89 insertions(+), 45 deletions(-) create mode 100644 cli/src/worker/queue.ts diff --git a/cli/src/commands/upload.ts b/cli/src/commands/upload.ts index 24c471ccf..4a7b2c9de 100644 --- a/cli/src/commands/upload.ts +++ b/cli/src/commands/upload.ts @@ -106,6 +106,10 @@ export async function uploadAction( "relativePath": relativePath, }) } + + // Generate a commit + worker.postMessage({ command: "commit" }) + // Close after all tasks are queued worker.postMessage({ command: "close" }) } diff --git a/cli/src/deps.ts b/cli/src/deps.ts index 573286cfb..7e76063dc 100644 --- a/cli/src/deps.ts +++ b/cli/src/deps.ts @@ -38,7 +38,7 @@ export { walk } from "https://deno.land/std@0.212.0/fs/walk.ts" export { resolve } from "https://deno.land/std@0.212.0/path/resolve.ts" export { relative } from "https://deno.land/std@0.212.0/path/relative.ts" export { join } from "https://deno.land/std@0.212.0/path/join.ts" -export { ensureLink } from "https://deno.land/std@0.212.0/fs/ensure_link.ts" +export { extname } from "https://deno.land/std@0.212.0/path/extname.ts" // Test suites export { assert, diff --git a/cli/src/gitattributes.ts b/cli/src/gitattributes.ts index b00cfbbf7..f67ee02d0 100644 --- a/cli/src/gitattributes.ts +++ b/cli/src/gitattributes.ts @@ -1,21 +1,17 @@ -import { string } from "https://deno.land/x/cliffy@v1.0.0-rc.3/flags/types/string.ts" import { ignore } from "./deps.ts" /** * Git annex supports many backends, we support a limited subset used by OpenNeuro (for now) * https://git-annex.branchable.com/backends/ */ -export enum SupportedAnnexBackends { - MD5E = "MD5E", - SHA256E = "SHA256E", -} +export type GitAnnexBackend = "GIT" | "SHA256" | "SHA256E" | "MD5" | "MD5E" /** * Annex attributes for one path */ export interface GitAnnexAttributeOptions { largefiles?: number - backend?: SupportedAnnexBackends + backend?: GitAnnexBackend match: ignore.Ignore } @@ -66,7 +62,7 @@ export function parseGitAttributes(gitattributes: string): GitAnnexAttributes { } } } else if (key === "annex.backend") { - attributesObject[prefix].backend = value as SupportedAnnexBackends + attributesObject[prefix].backend = value as GitAnnexBackend } } } @@ -74,7 +70,7 @@ export function parseGitAttributes(gitattributes: string): GitAnnexAttributes { } interface MatchingAnnexAttributes { - backend?: SupportedAnnexBackends + backend?: GitAnnexBackend largefiles?: number } diff --git a/cli/src/worker/git.ts b/cli/src/worker/git.ts index 00bc269cf..808b676c2 100644 --- a/cli/src/worker/git.ts +++ b/cli/src/worker/git.ts @@ -5,19 +5,18 @@ import http from "npm:isomorphic-git@1.25.3/http/node/index.js" import fs from "node:fs" import { GitAnnexAttributes, + GitAnnexBackend, matchGitAttributes, parseGitAttributes, } from "../gitattributes.ts" -import { ensureLink, join } from "../deps.ts" +import { extname, join } from "../deps.ts" import { logger, setupLogging } from "../logger.ts" - -// This error originates in isomorphic-git due to a bug in Deno 1.39.4 -// https://github.com/denoland/deno/issues/21795 -self.addEventListener("unhandledrejection", (e) => { - if (String(e?.reason)?.endsWith("readfile ''")) { - e.preventDefault() - } -}) +import { PromiseQueue } from "./queue.ts" +/** + * Why are we using hash wasm over web crypto? + * Web crypto cannot do streaming hashes of the common git-annex functions yet. + */ +import { createMD5, createSHA256 } from "npm:hash-wasm" interface GitContext { // Current working dataset @@ -34,13 +33,9 @@ interface GitContext { let context: GitContext let attributesCache: GitAnnexAttributes -// Shut down if this is set -let done = false -function shutdownIfDone() { - if (done) { - globalThis.close() - } +async function done() { + await globalThis.close() } function gitOptions(dir) { @@ -108,52 +103,76 @@ async function getGitAttributes(): Promise { /** * Decide if this incoming file is annexed or not */ -async function shouldBeAnnexed(absolutePath: string, relativePath: string) { +async function shouldBeAnnexed( + relativePath: string, + size: number, +): Promise { const gitAttributes = await getGitAttributes() const attributes = matchGitAttributes(gitAttributes, relativePath) if (attributes.largefiles) { - const { size } = await Deno.stat(absolutePath) - if (size > attributes.largefiles) { - return true + if (size > attributes.largefiles && attributes.backend) { + return attributes.backend } else { - return false + return "GIT" } } // No rules matched, default to annex - return true + return "SHA256E" } /** * git-annex add equivalent */ async function add(event) { + const { size } = await fs.promises.stat(event.data.path) const annexed = await shouldBeAnnexed( - event.data.path, event.data.relativePath, + size, ) console.log(event.data.path, annexed) - if (annexed) { - // Compute hash and add link - } else { + if (annexed === "GIT") { // Simple add case const options = { ...gitOptions(context.repoPath), filepath: event.data.relativePath, } const targetPath = join(context.repoPath, event.data.relativePath) - // Remove the target - await Deno.remove(targetPath) - // Hard link to the target location - await ensureLink( - event.data.path, - targetPath, - ) + // Copy non-annexed files for git index creation + await fs.promises.copyFile(event.data.path, targetPath) await git.add(options) logger.info(`Added ${event.data.relativePath}`) + } else { + // Compute hash and add link + const computeHash = annexed.startsWith("MD5") + ? await createMD5() + : await createSHA256() + // E in the backend means include the file extension + const extension = annexed.endsWith("E") + ? extname(event.data.relativePath) + : "" + computeHash.init() + const stream = fs.createReadStream(event.data.path, { + highWaterMark: 1024 * 1024 * 10, + }) + for await (const data of stream) { + computeHash.update(data) + } + const digest = computeHash.digest("hex") + const annexKey = `${annexed}-${size}--${digest}${extension}` + console.log(annexKey) } } -self.onmessage = async (event) => { +/** + * `git commit` equivalent + */ +async function commit() { + console.log("Commit goes here") +} + +const workQueue = new PromiseQueue() + +self.onmessage = (event) => { if (event.data.command === "setContext") { context = { datasetId: event.data.datasetId, @@ -164,10 +183,12 @@ self.onmessage = async (event) => { } setupLogging(event.data.logLevel) } else if (event.data.command === "clone") { - await update() + workQueue.enqueue(update) } else if (event.data.command === "add") { - await add(event) + workQueue.enqueue(add, event) + } else if (event.data.command === "commit") { + workQueue.enqueue(commit) } else if (event.data.command === "done") { - done = true + workQueue.enqueue(done) } } diff --git a/cli/src/worker/queue.ts b/cli/src/worker/queue.ts new file mode 100644 index 000000000..aa07b17cd --- /dev/null +++ b/cli/src/worker/queue.ts @@ -0,0 +1,23 @@ +export class PromiseQueue { + private queue: ((...args: any[]) => Promise)[] = [] + private running = false + + enqueue(promiseFn: (...args: any[]) => Promise, ...args: any[]) { + this.queue.push(async () => promiseFn(...args)) + this.processQueue() + } + + private async processQueue() { + if (this.running) return + + this.running = true + try { + while (this.queue.length > 0) { + const promiseFn = this.queue.shift() + if (promiseFn) await promiseFn() + } + } finally { + this.running = false + } + } +} From c4e92b5fa62f32fdd7c6e02c47709981ef3d845a Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Sat, 27 Jan 2024 12:09:02 -0800 Subject: [PATCH 07/35] fix(cli): Improve typing of git worker interface --- cli/src/commands/upload.ts | 2 +- cli/src/worker/git.ts | 43 ++++++++++++++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/cli/src/commands/upload.ts b/cli/src/commands/upload.ts index 4a7b2c9de..14d439c9d 100644 --- a/cli/src/commands/upload.ts +++ b/cli/src/commands/upload.ts @@ -71,7 +71,7 @@ export async function uploadAction( await Deno.mkdir(repoPath, { recursive: true }) // Configure worker worker.postMessage({ - "command": "setContext", + "command": "setup", "datasetId": datasetId, "sourcePath": dataset_directory_abs, "repoPath": repoPath, diff --git a/cli/src/worker/git.ts b/cli/src/worker/git.ts index 808b676c2..bbff02f95 100644 --- a/cli/src/worker/git.ts +++ b/cli/src/worker/git.ts @@ -9,7 +9,7 @@ import { matchGitAttributes, parseGitAttributes, } from "../gitattributes.ts" -import { extname, join } from "../deps.ts" +import { extname, join, LevelName } from "../deps.ts" import { logger, setupLogging } from "../logger.ts" import { PromiseQueue } from "./queue.ts" /** @@ -31,6 +31,41 @@ interface GitContext { authorization: string } +/** + * Events with no arguments + */ +interface GitWorkerEventGeneric { + data: { + command: "clone" | "commit" | "done" + } +} + +interface GitWorkerEventSetupData extends GitContext { + command: "setup" + logLevel: LevelName +} + +/** Setup event to set dataset and repo state for commands until next call */ +interface GitWorkerEventSetup { + data: GitWorkerEventSetupData +} + +/** Add event to add one file */ +interface GitWorkerEventAdd { + data: { + command: "add" + // Absolute path on the local system + path: string + // Dataset relative path + relativePath: string + } +} + +type GitWorkerEvent = + | GitWorkerEventSetup + | GitWorkerEventGeneric + | GitWorkerEventAdd + let context: GitContext let attributesCache: GitAnnexAttributes @@ -123,7 +158,7 @@ async function shouldBeAnnexed( /** * git-annex add equivalent */ -async function add(event) { +async function add(event: GitWorkerEventAdd) { const { size } = await fs.promises.stat(event.data.path) const annexed = await shouldBeAnnexed( event.data.relativePath, @@ -172,8 +207,8 @@ async function commit() { const workQueue = new PromiseQueue() -self.onmessage = (event) => { - if (event.data.command === "setContext") { +self.onmessage = (event: GitWorkerEvent) => { + if (event.data.command === "setup") { context = { datasetId: event.data.datasetId, sourcePath: event.data.sourcePath, From 48a0bb794a91cc46ce8269e1332b8d165e25298d Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Wed, 31 Jan 2024 15:33:26 -0800 Subject: [PATCH 08/35] feat(cli): Support for git upload and annexed objects --- cli/src/commands/upload.ts | 2 +- cli/src/deps.ts | 2 + cli/src/worker/git.spec.ts | 24 ++++++ cli/src/worker/git.ts | 172 +++++++++++++++++++++++++++++++++---- 4 files changed, 183 insertions(+), 17 deletions(-) create mode 100644 cli/src/worker/git.spec.ts diff --git a/cli/src/commands/upload.ts b/cli/src/commands/upload.ts index 14d439c9d..a0d9869d4 100644 --- a/cli/src/commands/upload.ts +++ b/cli/src/commands/upload.ts @@ -76,7 +76,7 @@ export async function uploadAction( "sourcePath": dataset_directory_abs, "repoPath": repoPath, "repoEndpoint": `${clientConfig.url}/git/${endpoint}/${datasetId}`, - "authorization": `Bearer ${token}`, + "authorization": token, "logLevel": logger.levelName, }) diff --git a/cli/src/deps.ts b/cli/src/deps.ts index 7e76063dc..b4d3c3f09 100644 --- a/cli/src/deps.ts +++ b/cli/src/deps.ts @@ -39,6 +39,8 @@ export { resolve } from "https://deno.land/std@0.212.0/path/resolve.ts" export { relative } from "https://deno.land/std@0.212.0/path/relative.ts" export { join } from "https://deno.land/std@0.212.0/path/join.ts" export { extname } from "https://deno.land/std@0.212.0/path/extname.ts" +export { basename } from "https://deno.land/std@0.212.0/path/basename.ts" +export { dirname } from "https://deno.land/std@0.212.0/path/dirname.ts" // Test suites export { assert, diff --git a/cli/src/worker/git.spec.ts b/cli/src/worker/git.spec.ts new file mode 100644 index 000000000..533cf10ae --- /dev/null +++ b/cli/src/worker/git.spec.ts @@ -0,0 +1,24 @@ +import { annexRelativePath, hashDirLower, hashDirMixed } from "./git.ts" +import { assertEquals } from "../deps.ts" + +Deno.test("annexRelativePath() returns appropriate paths", () => { + assertEquals(annexRelativePath("sub-01/anat/sub-01_T1w.nii.gz"), "../..") +}) + +Deno.test("hashDirLower() returns the correct key prefix", async () => { + assertEquals( + await hashDirLower( + "SHA256E-s311112--c3527d7944a9619afb57863a34e6af7ec3fe4f108e56c860d9e700699ff806fb.nii.gz", + ), + ["2ed", "6ea"], + ) +}) + +Deno.test("hashDirMixed() returns the correct key prefix", async () => { + assertEquals( + await hashDirMixed( + "SHA256E-s311112--c3527d7944a9619afb57863a34e6af7ec3fe4f108e56c860d9e700699ff806fb.nii.gz", + ), + ["Xk", "Mx"], + ) +}) diff --git a/cli/src/worker/git.ts b/cli/src/worker/git.ts index bbff02f95..229caea07 100644 --- a/cli/src/worker/git.ts +++ b/cli/src/worker/git.ts @@ -1,15 +1,14 @@ -// Might be useful if this is shared by the browser uploader at some point -import "https://deno.land/x/indexeddb@1.3.5/polyfill.ts" import git from "npm:isomorphic-git@1.25.3" import http from "npm:isomorphic-git@1.25.3/http/node/index.js" import fs from "node:fs" +import { decode } from "https://deno.land/x/djwt@v3.0.1/mod.ts" import { GitAnnexAttributes, GitAnnexBackend, matchGitAttributes, parseGitAttributes, } from "../gitattributes.ts" -import { extname, join, LevelName } from "../deps.ts" +import { basename, dirname, join, LevelName, relative } from "../deps.ts" import { logger, setupLogging } from "../logger.ts" import { PromiseQueue } from "./queue.ts" /** @@ -29,6 +28,10 @@ interface GitContext { repoEndpoint: string // OpenNeuro git access short lived API key authorization: string + // Author name + name: string + // Author email + email: string } /** @@ -36,7 +39,7 @@ interface GitContext { */ interface GitWorkerEventGeneric { data: { - command: "clone" | "commit" | "done" + command: "clone" | "commit" | "done" | "push" } } @@ -69,18 +72,23 @@ type GitWorkerEvent = let context: GitContext let attributesCache: GitAnnexAttributes +/** + * Paths to upload to the remote annex + */ +const annexKeys: Record = {} + async function done() { await globalThis.close() } -function gitOptions(dir) { +function gitOptions(dir: string) { return { fs, http, dir, url: context.repoEndpoint, headers: { - Authorization: context.authorization, + Authorization: `Bearer ${context.authorization}`, }, } } @@ -155,6 +163,47 @@ async function shouldBeAnnexed( return "SHA256E" } +/** + * git-annex hashDirLower implementation based on https://git-annex.branchable.com/internals/hashing/ + * Compute the directory path from a git-annex filename + */ +export async function hashDirLower( + annexKey: string, +): Promise<[string, string]> { + const computeMD5 = await createMD5() + computeMD5.init() + computeMD5.update(annexKey) + const digest = computeMD5.digest("hex") + return [digest.slice(0, 3), digest.slice(3, 6)] +} + +/** + * Return the relative path to the .git/annex directory from a repo relative path + * + * Used for symlink path creation + */ +export function annexRelativePath(path: string) { + return relative(dirname(join("/", path)), "/") +} + +/** + * git-annex hashDirMixed implementation based on https://git-annex.branchable.com/internals/hashing/ + */ +export async function hashDirMixed( + annexKey: string, +): Promise<[string, string]> { + const computeMD5 = await createMD5() + computeMD5.init() + computeMD5.update(annexKey) + const digest = computeMD5.digest("binary") + const firstWord = new DataView(digest.buffer).getUint32(0, true) + const nums = Array.from({ length: 4 }, (_, i) => (firstWord >> (6 * i)) & 31) + const letters = nums.map( + (num) => "0123456789zqjxkmvwgpfZQJXKMVWGPF".charAt(num), + ) + return [`${letters[1]}${letters[0]}`, `${letters[3]}${letters[2]}`] +} + /** * git-annex add equivalent */ @@ -164,7 +213,6 @@ async function add(event: GitWorkerEventAdd) { event.data.relativePath, size, ) - console.log(event.data.path, annexed) if (annexed === "GIT") { // Simple add case const options = { @@ -175,16 +223,18 @@ async function add(event: GitWorkerEventAdd) { // Copy non-annexed files for git index creation await fs.promises.copyFile(event.data.path, targetPath) await git.add(options) - logger.info(`Added ${event.data.relativePath}`) + logger.info(`Add\t${event.data.relativePath}`) } else { - // Compute hash and add link + // E in the backend means include the file extension + let extension = "" + if (annexed.endsWith("E")) { + const filename = basename(event.data.relativePath) + extension = filename.substring(filename.indexOf(".")) + } + // Compute hash const computeHash = annexed.startsWith("MD5") ? await createMD5() : await createSHA256() - // E in the backend means include the file extension - const extension = annexed.endsWith("E") - ? extname(event.data.relativePath) - : "" computeHash.init() const stream = fs.createReadStream(event.data.path, { highWaterMark: 1024 * 1024 * 10, @@ -193,20 +243,106 @@ async function add(event: GitWorkerEventAdd) { computeHash.update(data) } const digest = computeHash.digest("hex") - const annexKey = `${annexed}-${size}--${digest}${extension}` - console.log(annexKey) + const annexKey = `${annexed}-s${size}--${digest}${extension}` + const annexPath = join( + ".git", + "annex", + "objects", + ...(await hashDirMixed(annexKey)), + annexKey, + annexKey, + ) + // Path to this file in our repo + const fileRepoPath = join(context.repoPath, event.data.relativePath) + + let link + let forceAdd = false + try { + // Test if the repo already has this object + link = await fs.promises.readlink(fileRepoPath) + } catch (_err) { + forceAdd = true + } + + // Calculate the relative symlinks for our file + const symlinkTarget = join( + annexRelativePath(event.data.relativePath), + annexPath, + ) + + // Key has changed if the existing link points to another object + if (forceAdd || link !== symlinkTarget) { + // Upload this key after the git commit + annexKeys[annexKey] = event.data.path + // This object has a new annex hash, update the symlink and add it + const symlinkTarget = join( + annexRelativePath(event.data.relativePath), + annexPath, + ) + // Verify parent directories exist + await fs.promises.mkdir(dirname(fileRepoPath), { recursive: true }) + // Remove the existing symlink or git file + await fs.promises.rm(fileRepoPath, { force: true }) + // Create our new symlink pointing at the right annex object + await fs.promises.symlink(symlinkTarget, fileRepoPath) + const options = { + ...gitOptions(context.repoPath), + filepath: event.data.relativePath, + } + await git.add(options) + logger.info(`Annexed\t${event.data.relativePath}`) + } else { + logger.info(`Unchanged\t${event.data.relativePath}`) + } } } +/** + * Git repo specific token + */ +interface OpenNeuroGitToken { + sub: string + email: string + provider: string + name: string + admin: boolean + scopes: [string] + dataset: string + iat: number + exp: number +} + /** * `git commit` equivalent */ async function commit() { - console.log("Commit goes here") + const options = gitOptions(context.repoPath) + const decodedToken = decode(context.authorization) + const { email, name } = decodedToken[1] as OpenNeuroGitToken + const commitHash = await git.commit({ + ...options, + author: { + name, + email, + }, + message: "[OpenNeuro] Added local files", + }) + logger.info(`Committed as "${commitHash}"`) +} + +/** + * `git push` and `git-annex copy --to=openneuro` + */ +async function push() { + await git.push( + gitOptions(context.repoPath), + ) } +// Queue of tasks to perform in order const workQueue = new PromiseQueue() +// @ts-ignore Expected for workers self.onmessage = (event: GitWorkerEvent) => { if (event.data.command === "setup") { context = { @@ -215,6 +351,8 @@ self.onmessage = (event: GitWorkerEvent) => { repoPath: event.data.repoPath, repoEndpoint: event.data.repoEndpoint, authorization: event.data.authorization, + name: event.data.name, + email: event.data.email, } setupLogging(event.data.logLevel) } else if (event.data.command === "clone") { @@ -223,6 +361,8 @@ self.onmessage = (event: GitWorkerEvent) => { workQueue.enqueue(add, event) } else if (event.data.command === "commit") { workQueue.enqueue(commit) + } else if (event.data.command === "push") { + workQueue.enqueue(push) } else if (event.data.command === "done") { workQueue.enqueue(done) } From 3436777fcf8ef4a0744380859454d3e5cc4db7c6 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Wed, 7 Feb 2024 14:04:20 -0800 Subject: [PATCH 09/35] feat(cli): Run bids-validator and passthrough any options. --- cli/src/bids_validator.ts | 5 +++++ cli/src/commands/upload.ts | 23 ++++++++++++++++++++--- deno.json | 3 +++ 3 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 cli/src/bids_validator.ts diff --git a/cli/src/bids_validator.ts b/cli/src/bids_validator.ts new file mode 100644 index 000000000..24becc495 --- /dev/null +++ b/cli/src/bids_validator.ts @@ -0,0 +1,5 @@ +// TODO - Switch to upstream after next release +export { validateCommand } from "https://raw.githubusercontent.com/bids-standard/bids-validator/master/bids-validator/src/setup/options.ts" +export { validate } from "https://deno.land/x/bids_validator@v1.14.0/main.ts" +export { readFileTree } from "https://deno.land/x/bids_validator@v1.14.0/files/deno.ts" +export { consoleFormat } from "https://deno.land/x/bids_validator@v1.14.0/utils/output.ts" diff --git a/cli/src/commands/upload.ts b/cli/src/commands/upload.ts index a0d9869d4..46dffba99 100644 --- a/cli/src/commands/upload.ts +++ b/cli/src/commands/upload.ts @@ -1,4 +1,9 @@ -import { validateCommand } from "./validate.ts" +import { + consoleFormat, + readFileTree, + validate, + validateCommand, +} from "../bids_validator.ts" import { ClientConfig, getConfig } from "./login.ts" import { logger } from "../logger.ts" import { Confirm, join, ProgressBar, relative, resolve, walk } from "../deps.ts" @@ -42,7 +47,19 @@ export async function uploadAction( `upload ${dataset_directory} resolved to ${dataset_directory_abs}`, ) - // TODO - call the validator here + const schemaResult = await validate( + await readFileTree(dataset_directory_abs), + options, + ) + console.log(consoleFormat(schemaResult)) + + for (const issue of schemaResult.issues.values()) { + if (issue.severity === "error") { + console.log("Please correct any errors before uploading.") + return + } + } + console.log("Validation complete, preparing upload.") let datasetId = "ds001130" if (options.dataset) { @@ -87,7 +104,7 @@ export async function uploadAction( }) progressBar.render(0)*/ - console.log(join(repoDir, datasetId)) + logger.info(`Repo path: ${join(repoDir, datasetId)}`) worker.postMessage({ "command": "clone", }) diff --git a/deno.json b/deno.json index 3e151e5ac..562d295a9 100644 --- a/deno.json +++ b/deno.json @@ -12,5 +12,8 @@ "services/datalad/tests/.pytest_cache", "**/__pycache/**" ] + }, + "imports": { + "std/": "https://deno.land/std@0.214.0/" } } From 1b126a64e760dc75f6581992ce83bee4ef531b3e Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Wed, 7 Feb 2024 15:18:47 -0800 Subject: [PATCH 10/35] feat(cli): Enable git push for uploads --- cli/src/commands/upload.ts | 5 +++- cli/src/worker/git.ts | 50 ++++++++++++++++++++++++++++++++------ 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/cli/src/commands/upload.ts b/cli/src/commands/upload.ts index 46dffba99..64a9f0a40 100644 --- a/cli/src/commands/upload.ts +++ b/cli/src/commands/upload.ts @@ -55,7 +55,7 @@ export async function uploadAction( for (const issue of schemaResult.issues.values()) { if (issue.severity === "error") { - console.log("Please correct any errors before uploading.") + console.log("Please correct validation errors before uploading.") return } } @@ -127,6 +127,9 @@ export async function uploadAction( // Generate a commit worker.postMessage({ command: "commit" }) + // Push git/annexed data + worker.postMessage({ command: "push" }) + // Close after all tasks are queued worker.postMessage({ command: "close" }) } diff --git a/cli/src/worker/git.ts b/cli/src/worker/git.ts index 229caea07..28d394040 100644 --- a/cli/src/worker/git.ts +++ b/cli/src/worker/git.ts @@ -1,4 +1,4 @@ -import git from "npm:isomorphic-git@1.25.3" +import git, { STAGE, TREE } from "npm:isomorphic-git@1.25.3" import http from "npm:isomorphic-git@1.25.3/http/node/index.js" import fs from "node:fs" import { decode } from "https://deno.land/x/djwt@v3.0.1/mod.ts" @@ -319,15 +319,51 @@ async function commit() { const options = gitOptions(context.repoPath) const decodedToken = decode(context.authorization) const { email, name } = decodedToken[1] as OpenNeuroGitToken - const commitHash = await git.commit({ + let generateCommit = false + let changes = 0 + const tree = await git.walk({ ...options, - author: { - name, - email, + trees: [TREE({ ref: "HEAD" }), STAGE()], + map: async function (filepath, [A, B]) { + if (await A?.type() === "blob" || await B?.type() === "blob") { + const Aoid = await A.oid() + const Boid = await B.oid() + let type = "equal" + if (Aoid !== Boid) { + logger.info(`modified:\t${filepath}`) + type = "modify" + } + if (Aoid === undefined) { + logger.info(`new file:\t${filepath}`) + type = "add" + } + if (Boid === undefined) { + logger.info(`deleted:\t${filepath}`) + type = "remove" + } + if (type !== "equal") { + generateCommit = true + changes += 1 + } + } }, - message: "[OpenNeuro] Added local files", }) - logger.info(`Committed as "${commitHash}"`) + if (generateCommit) { + console.log( + `Detected ${changes} change${changes === 1 ? "" : "s"}.`, + ) + const commitHash = await git.commit({ + ...options, + author: { + name, + email, + }, + message: "[OpenNeuro] Added local files", + }) + logger.info(`Committed as "${commitHash}"`) + } else { + console.log("No changes found, not uploading.") + } } /** From 1ca7ff94d9b1efa3adf1899465197b81cad74131 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Thu, 8 Feb 2024 12:59:50 -0800 Subject: [PATCH 11/35] feat(cli): Allow upload of annexed objects and port transferKey to Deno --- cli/src/worker/git.ts | 34 +++++++++- cli/src/worker/transferKey.ts | 117 ++++++++++++++++++++++++++++++++++ 2 files changed, 149 insertions(+), 2 deletions(-) create mode 100644 cli/src/worker/transferKey.ts diff --git a/cli/src/worker/git.ts b/cli/src/worker/git.ts index 28d394040..cb69f12ac 100644 --- a/cli/src/worker/git.ts +++ b/cli/src/worker/git.ts @@ -11,6 +11,8 @@ import { import { basename, dirname, join, LevelName, relative } from "../deps.ts" import { logger, setupLogging } from "../logger.ts" import { PromiseQueue } from "./queue.ts" +import { checkKey, storeKey } from "./transferKey.ts" + /** * Why are we using hash wasm over web crypto? * Web crypto cannot do streaming hashes of the common git-annex functions yet. @@ -326,8 +328,8 @@ async function commit() { trees: [TREE({ ref: "HEAD" }), STAGE()], map: async function (filepath, [A, B]) { if (await A?.type() === "blob" || await B?.type() === "blob") { - const Aoid = await A.oid() - const Boid = await B.oid() + const Aoid = await A?.oid() + const Boid = await B?.oid() let type = "equal" if (Aoid !== Boid) { logger.info(`modified:\t${filepath}`) @@ -363,6 +365,7 @@ async function commit() { logger.info(`Committed as "${commitHash}"`) } else { console.log("No changes found, not uploading.") + self.close() } } @@ -370,6 +373,33 @@ async function commit() { * `git push` and `git-annex copy --to=openneuro` */ async function push() { + // Git-annex copy --to=openneuro + for (const [key, path] of Object.entries(annexKeys)) { + const checkKeyResult = await checkKey({ + url: context.repoEndpoint, + token: context.authorization, + }, key) + if (checkKeyResult) { + logger.info(`Skipping key "${key}" present on remote`) + } else { + const storeKeyResult = await storeKey( + { + url: context.repoEndpoint, + token: context.authorization, + }, + key, + path, + ) + if (storeKeyResult === -1) { + logger.error(`Failed to transfer annex object "${key}"`) + } else { + logger.info( + `Stored ${storeKeyResult} bytes for key "${key}" from path "${path}"`, + ) + } + } + } + // Git push await git.push( gitOptions(context.repoPath), ) diff --git a/cli/src/worker/transferKey.ts b/cli/src/worker/transferKey.ts new file mode 100644 index 000000000..124923bba --- /dev/null +++ b/cli/src/worker/transferKey.ts @@ -0,0 +1,117 @@ +import * as base64 from "std/encoding/base64.ts" + +/** Deno port of transferKey from Node.js CLI */ +interface TransferKeyState { + // Base URL + url: string + // Basic auth token for repos + token: string +} + +/** + * Create a Request object for this url and key + * @param {object} state + * @param {string} state.url Base URL + * @param {string} state.token Basic auth token for repos + * @param {string} key git-annex key + * @param {object} options fetch options + * @returns {Request} Configured fetch Request object + */ +export function keyRequest(state: TransferKeyState, key: string, options) { + const headers = new Headers( + "headers" in options && options.headers || undefined, + ) + headers.set( + "Authorization", + "Basic " + base64.encodeBase64(`openneuro-cli:${state.token}`), + ) + const requestUrl = `${state.url}/annex/${key}` + return new Request(requestUrl, { ...options, headers }) +} + +/** + * Call POST to upload a key to a remote + * @param {object} state + * @param {string} state.url Base URL + * @param {string} state.token Basic auth token for repos + * @param {string} key Git-annex key + * @param {string} file File path + */ +export async function storeKey(state: TransferKeyState, key: string, file) { + const fileHandle = await Deno.open(file) + const fileStat = await fileHandle.stat() + const requestOptions = { + method: "POST", + headers: { + "Content-Length": fileStat.size, + }, + } + const request = keyRequest(state, key, requestOptions) + const response = await fetch(request, { body: fileHandle.readable }) + if (response.status === 200) { + return fileStat.size + } else { + return -1 + } +} + +/** + * Call GET to download a key from a remote + * @param {object} state + * @param {string} state.url Base URL + * @param {string} state.token Basic auth token for repos + * @param {string} key Git-annex key + * @param {string} file File path + */ +export async function retrieveKey(state: TransferKeyState, key: string, file) { + try { + const request = keyRequest(state, key, { method: "GET" }) + const response = await fetch(request) + if (response.status === 200 && response.body) { + const fileHandle = await Deno.open(file, { write: true, create: true }) + await response.body.pipeTo(fileHandle.writable) + return true + } else { + return false + } + } catch (err) { + console.error(err) + return false + } +} + +/** + * Call HEAD to check if key exists on remote + * @param {object} state + * @param {string} state.url Base URL + * @param {string} state.token Basic auth token for repos + * @param {string} key + * @returns {Promise} True or false if key exists + */ +export async function checkKey(state: TransferKeyState, key: string) { + const request = keyRequest(state, key, { method: "HEAD" }) + const response = await fetch(request) + if (response.status === 200) { + return true + } else { + return false + } +} + +/** + * Call DELETE to remove a key from the remote + * @param {object} state + * @param {string} state.url Base URL + * @param {string} state.token Basic auth token for repos + * @param {string} key + * @returns {Promise} True or false if key exists + */ +export async function removeKey(state: TransferKeyState, key: string) { + const request = keyRequest(state, key, { method: "DELETE" }) + const response = await fetch(request) + if (response.status === 204) { + return true + } else { + return false + } +} From 5bb084208dacf9a6071cf29cb200354bc44d6f79 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Tue, 13 Feb 2024 09:58:54 -0800 Subject: [PATCH 12/35] feat(cli): Config refactor and download support started --- cli/src/commands/download.ts | 8 ++++++ cli/src/commands/git-credential.ts | 2 +- cli/src/commands/login.ts | 26 -------------------- cli/src/commands/upload.ts | 15 +----------- cli/src/config.ts | 39 ++++++++++++++++++++++++++++++ cli/src/graphq.ts | 2 +- 6 files changed, 50 insertions(+), 42 deletions(-) create mode 100644 cli/src/config.ts diff --git a/cli/src/commands/download.ts b/cli/src/commands/download.ts index d60e4e255..b5c55ea10 100644 --- a/cli/src/commands/download.ts +++ b/cli/src/commands/download.ts @@ -1,6 +1,14 @@ import { Command } from "../deps.ts" +import { readConfig } from "../config.ts" export const download = new Command() .name("download") .description("Download a dataset from OpenNeuro") .arguments(" ") + +export async function downloadAction( + options: CommandOptions, + dataset_directory: string, +) { + const clientConfig = readConfig() +} diff --git a/cli/src/commands/git-credential.ts b/cli/src/commands/git-credential.ts index d9410361d..8fd178dbe 100644 --- a/cli/src/commands/git-credential.ts +++ b/cli/src/commands/git-credential.ts @@ -1,5 +1,5 @@ import { Command, TextLineStream } from "../deps.ts" -import { getConfig } from "./login.ts" +import { getConfig } from "../config.ts" const prepareRepoAccess = ` mutation prepareRepoAccess($datasetId: ID!) { diff --git a/cli/src/commands/login.ts b/cli/src/commands/login.ts index ddcc7df70..d94b8ae54 100644 --- a/cli/src/commands/login.ts +++ b/cli/src/commands/login.ts @@ -3,13 +3,6 @@ */ import { Command, Confirm, Secret, Select } from "../deps.ts" import type { CommandOptions } from "../deps.ts" -import { LoginError } from "../error.ts" - -export interface ClientConfig { - url: string - token: string - errorReporting: boolean -} const messages = { url: @@ -19,25 +12,6 @@ const messages = { "Enable error reporting. Errors and performance metrics are sent to the configured OpenNeuro instance.", } -/** - * Get credentials from local storage - */ -export function getConfig(): ClientConfig { - const url = localStorage.getItem("url") - const token = localStorage.getItem("token") - const errorReporting = localStorage.getItem("errorReporting") === "true" - if (url && token && errorReporting) { - const config: ClientConfig = { - url, - token, - errorReporting, - } - return config - } else { - throw new LoginError("Run `openneuro login` before upload.") - } -} - export async function loginAction(options: CommandOptions) { const url = options.url ? options.url : await Select.prompt({ message: "Choose an OpenNeuro instance to use.", diff --git a/cli/src/commands/upload.ts b/cli/src/commands/upload.ts index 64a9f0a40..57cd101f7 100644 --- a/cli/src/commands/upload.ts +++ b/cli/src/commands/upload.ts @@ -4,24 +4,11 @@ import { validate, validateCommand, } from "../bids_validator.ts" -import { ClientConfig, getConfig } from "./login.ts" import { logger } from "../logger.ts" import { Confirm, join, ProgressBar, relative, resolve, walk } from "../deps.ts" import type { CommandOptions } from "../deps.ts" import { getRepoAccess } from "./git-credential.ts" - -export function readConfig(): ClientConfig { - const config = getConfig() - logger.info( - `configured with URL "${config.url}" and token "${ - config.token.slice( - 0, - 4, - ) - }...${config.token.slice(-4)}"`, - ) - return config -} +import { readConfig } from "../config.ts" async function getRepoDir(url: URL): Promise { const LOCAL_STORAGE_KEY = `openneuro_cli_${url.hostname}_` diff --git a/cli/src/config.ts b/cli/src/config.ts new file mode 100644 index 000000000..b782a8f9d --- /dev/null +++ b/cli/src/config.ts @@ -0,0 +1,39 @@ +import { LoginError } from "./error.ts" + +export interface ClientConfig { + url: string + token: string + errorReporting: boolean +} + +/** + * Get credentials from local storage + */ +export function getConfig(): ClientConfig { + const url = localStorage.getItem("url") + const token = localStorage.getItem("token") + const errorReporting = localStorage.getItem("errorReporting") === "true" + if (url && token && errorReporting) { + const config: ClientConfig = { + url, + token, + errorReporting, + } + return config + } else { + throw new LoginError("Run `openneuro login` before upload.") + } +} + +export function readConfig(): ClientConfig { + const config = getConfig() + logger.info( + `configured with URL "${config.url}" and token "${ + config.token.slice( + 0, + 4, + ) + }...${config.token.slice(-4)}"`, + ) + return config +} diff --git a/cli/src/graphq.ts b/cli/src/graphq.ts index 61e21c9da..fc5ed147b 100644 --- a/cli/src/graphq.ts +++ b/cli/src/graphq.ts @@ -2,7 +2,7 @@ * Minimalist OpenNeuro client with no dependencies */ -import { getConfig } from "./commands/login.ts" +import { getConfig } from "./config.ts" import { QueryError } from "./error.ts" function request(query: string, variables = {}): Promise { From 51ef0409d77dd7930c3963ece7858948a6303d0a Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Mon, 4 Mar 2024 12:04:55 -0800 Subject: [PATCH 13/35] feat(cli): Implement download for git objects --- cli/src/commands/download.ts | 40 ++++++++++++++++++++++++++++++++++-- cli/src/config.ts | 1 + cli/src/options.ts | 2 ++ cli/src/worker/git.ts | 2 -- 4 files changed, 41 insertions(+), 4 deletions(-) diff --git a/cli/src/commands/download.ts b/cli/src/commands/download.ts index b5c55ea10..1281a0073 100644 --- a/cli/src/commands/download.ts +++ b/cli/src/commands/download.ts @@ -1,14 +1,50 @@ import { Command } from "../deps.ts" import { readConfig } from "../config.ts" +import { logger } from "../logger.ts" +import { getRepoAccess } from "./git-credential.ts" export const download = new Command() .name("download") .description("Download a dataset from OpenNeuro") - .arguments(" ") + .arguments(" ") + .option( + "-d, --draft", + "Download a draft instead of the latest version snapshot.", + ) + .option( + "-v, --version", + "Download a specific version.", + ) + .action(downloadAction) export async function downloadAction( options: CommandOptions, - dataset_directory: string, + accession_number: string, + download_directory: string, ) { + const datasetId = accession_number const clientConfig = readConfig() + const { token, endpoint } = await getRepoAccess(datasetId) + + // Create the git worker + const worker = new Worker(new URL("../worker/git.ts", import.meta.url).href, { + type: "module", + }) + + // Configure worker + worker.postMessage({ + "command": "setup", + "datasetId": datasetId, + "repoPath": download_directory, + "repoEndpoint": `${clientConfig.url}/git/${endpoint}/${datasetId}`, + "authorization": token, + "logLevel": logger.levelName, + }) + + worker.postMessage({ + "command": "clone", + }) + + // Close after all tasks are queued + worker.postMessage({ command: "close" }) } diff --git a/cli/src/config.ts b/cli/src/config.ts index b782a8f9d..c3486dcbc 100644 --- a/cli/src/config.ts +++ b/cli/src/config.ts @@ -1,3 +1,4 @@ +import { logger } from "./logger.ts" import { LoginError } from "./error.ts" export interface ClientConfig { diff --git a/cli/src/options.ts b/cli/src/options.ts index 793ec45cd..67d59949a 100644 --- a/cli/src/options.ts +++ b/cli/src/options.ts @@ -9,6 +9,7 @@ import { import { setupLogging } from "./logger.ts" import { login } from "./commands/login.ts" import { upload } from "./commands/upload.ts" +import { download } from "./commands/download.ts" import { gitCredential } from "./commands/git-credential.ts" export type OpenNeuroOptions = { @@ -30,6 +31,7 @@ const openneuroCommand = new Command() setupLogging(log ? log : "ERROR") }) .command("login", login) + .command("download", download) .command("upload", upload) .command("git-credential", gitCredential) diff --git a/cli/src/worker/git.ts b/cli/src/worker/git.ts index cb69f12ac..de30bf4d3 100644 --- a/cli/src/worker/git.ts +++ b/cli/src/worker/git.ts @@ -112,8 +112,6 @@ async function update() { ) await git.clone({ ...options, - singleBranch: true, - depth: 1, }) } logger.info(`${context.datasetId} draft fetched!`) From 7f1f2eea959151c463837b7272140af2920906f1 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Wed, 13 Mar 2024 12:29:49 -0700 Subject: [PATCH 14/35] fix(cli): Rework defaced/consent checks and fix create dataset API call --- cli/src/commands/upload.ts | 68 +++++++++++++++++++++++++++++++++----- cli/src/deps.ts | 1 + cli/src/graphq.ts | 2 +- 3 files changed, 62 insertions(+), 9 deletions(-) diff --git a/cli/src/commands/upload.ts b/cli/src/commands/upload.ts index 57cd101f7..ebcaa8f9d 100644 --- a/cli/src/commands/upload.ts +++ b/cli/src/commands/upload.ts @@ -5,10 +5,19 @@ import { validateCommand, } from "../bids_validator.ts" import { logger } from "../logger.ts" -import { Confirm, join, ProgressBar, relative, resolve, walk } from "../deps.ts" +import { + Confirm, + join, + ProgressBar, + prompt, + relative, + resolve, + walk, +} from "../deps.ts" import type { CommandOptions } from "../deps.ts" import { getRepoAccess } from "./git-credential.ts" import { readConfig } from "../config.ts" +import { createDataset } from "../graphq.ts" async function getRepoDir(url: URL): Promise { const LOCAL_STORAGE_KEY = `openneuro_cli_${url.hostname}_` @@ -48,21 +57,54 @@ export async function uploadAction( } console.log("Validation complete, preparing upload.") - let datasetId = "ds001130" + let datasetId if (options.dataset) { datasetId = options.dataset } else { if (!options.create) { - const confirmation = await new Confirm( - "Confirm creation of a new dataset?", - ) - if (!confirmation) { + const confirmation = await prompt([ + { + name: "create", + message: "Create a new dataset?", + type: Confirm, + }, + ]) + if (!confirmation.create) { console.log("Specify --dataset to upload to an existing dataset.") return } } - // TODO Create dataset here - datasetId = "ds001130" + let affirmedDefaced = options.affirmDefaced + let affirmedConsent = options.affirmConsent + if (affirmedDefaced || affirmedConsent) { + datasetId = await createDataset(affirmedDefaced, affirmedConsent) + } else { + console.log("confirm") + const affirmed = await prompt([ + { + name: "affirmedDefaced", + message: + "All structural scans have been defaced, obscuring any tissue on or near the face that could potentially be used to reconstruct the facial structure.", + type: Confirm, + }, + { + name: "affirmedConsent", + message: + "I have explicit participant consent and ethical authorization to publish structural scans without defacing.", + type: Confirm, + }, + ]) + affirmedDefaced = affirmed.affirmedDefaced + affirmedConsent = affirmed.affirmedConsent + if (affirmedDefaced || affirmedConsent) { + datasetId = await createDataset(affirmedDefaced, affirmedConsent) + } else { + console.log( + "You must affirm defacing or consent to upload without defacing to continue.", + ) + return + } + } } // Create the git worker @@ -138,4 +180,14 @@ export const upload = validateCommand .option("-c, --create", "Skip confirmation to create a new dataset.", { conflicts: ["dataset"], }) + .option( + "--affirmDefaced", + "All structural scans have been defaced, obscuring any tissue on or near the face that could potentially be used to reconstruct the facial structure.", + { default: false }, + ) + .option( + "--affirmConsent", + "I have explicit participant consent and ethical authorization to publish structural scans without defacing.", + { default: false }, + ) .action(uploadAction) diff --git a/cli/src/deps.ts b/cli/src/deps.ts index b4d3c3f09..3f13128ba 100644 --- a/cli/src/deps.ts +++ b/cli/src/deps.ts @@ -5,6 +5,7 @@ export { } from "https://deno.land/x/cliffy@v1.0.0-rc.3/command/mod.ts" export { Confirm, + prompt, Secret, Select, } from "https://deno.land/x/cliffy@v1.0.0-rc.3/prompt/mod.ts" diff --git a/cli/src/graphq.ts b/cli/src/graphq.ts index fc5ed147b..a165d32ce 100644 --- a/cli/src/graphq.ts +++ b/cli/src/graphq.ts @@ -7,7 +7,7 @@ import { QueryError } from "./error.ts" function request(query: string, variables = {}): Promise { const config = getConfig() - return fetch(config.url, { + return fetch(`${config.url}/crn/graphql`, { method: "POST", headers: { Authorization: `Bearer ${config.token}`, From 1ecc8b112950ed5b10a7446a181d706aec661f45 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Mon, 18 Mar 2024 11:28:37 -0700 Subject: [PATCH 15/35] fix(cli): Rename git test suite --- cli/src/worker/{git.spec.ts => git.test.ts} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename cli/src/worker/{git.spec.ts => git.test.ts} (100%) diff --git a/cli/src/worker/git.spec.ts b/cli/src/worker/git.test.ts similarity index 100% rename from cli/src/worker/git.spec.ts rename to cli/src/worker/git.test.ts From 52a9dba31a32892469b61ad2c618b78f6a964183 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Mon, 18 Mar 2024 11:49:05 -0700 Subject: [PATCH 16/35] fix(cli): Improve tests for deno CLI --- cli/src/deps.ts | 1 + cli/src/worker/queue.test.ts | 23 +++++++++++++++++++++++ cli/src/worker/transferKey.ts | 31 +++++++++++++++++++++++++------ deno.json | 4 ++-- 4 files changed, 51 insertions(+), 8 deletions(-) create mode 100644 cli/src/worker/queue.test.ts diff --git a/cli/src/deps.ts b/cli/src/deps.ts index 3f13128ba..fb4216d8e 100644 --- a/cli/src/deps.ts +++ b/cli/src/deps.ts @@ -57,3 +57,4 @@ export { export { default as ProgressBar } from "https://deno.land/x/progress@v1.3.9/mod.ts" // Ignore library export { default as ignore } from "npm:ignore@5.3.0" +export { encodeBase64 } from "https://deno.land/std@0.220.1/encoding/base64.ts" diff --git a/cli/src/worker/queue.test.ts b/cli/src/worker/queue.test.ts new file mode 100644 index 000000000..2021831a1 --- /dev/null +++ b/cli/src/worker/queue.test.ts @@ -0,0 +1,23 @@ +import { assertEquals } from "../deps.ts" +import { PromiseQueue } from "./queue.ts" + +Deno.test("PromiseQueue should execute promises in order", async () => { + const order: number[] = [] + const promiseQueue = new PromiseQueue() + + promiseQueue.enqueue(async () => { + await new Promise((resolve) => setTimeout(resolve, 10)) + order.push(1) + }) + promiseQueue.enqueue(async () => { + await new Promise((resolve) => setTimeout(resolve, 5)) + order.push(2) + }) + promiseQueue.enqueue(async () => { + order.push(3) + }) + + await new Promise((resolve) => setTimeout(resolve, 20)) + + assertEquals(order, [1, 2, 3]) +}) diff --git a/cli/src/worker/transferKey.ts b/cli/src/worker/transferKey.ts index 124923bba..741244d69 100644 --- a/cli/src/worker/transferKey.ts +++ b/cli/src/worker/transferKey.ts @@ -1,6 +1,7 @@ -import * as base64 from "std/encoding/base64.ts" +import { encodeBase64 } from "../deps.ts" /** Deno port of transferKey from Node.js CLI */ + interface TransferKeyState { // Base URL url: string @@ -8,6 +9,12 @@ interface TransferKeyState { token: string } +interface FetchOptions { + method?: "GET" | "POST" | "PUT" | "DELETE" | string + headers?: { [key: string]: string } | Headers // Key-value pairs for request headers + body?: BodyInit +} + /** * Create a Request object for this url and key * @param {object} state @@ -17,13 +24,17 @@ interface TransferKeyState { * @param {object} options fetch options * @returns {Request} Configured fetch Request object */ -export function keyRequest(state: TransferKeyState, key: string, options) { +export function keyRequest( + state: TransferKeyState, + key: string, + options: FetchOptions, +) { const headers = new Headers( "headers" in options && options.headers || undefined, ) headers.set( "Authorization", - "Basic " + base64.encodeBase64(`openneuro-cli:${state.token}`), + "Basic " + encodeBase64(`openneuro-cli:${state.token}`), ) const requestUrl = `${state.url}/annex/${key}` return new Request(requestUrl, { ...options, headers }) @@ -37,13 +48,17 @@ export function keyRequest(state: TransferKeyState, key: string, options) { * @param {string} key Git-annex key * @param {string} file File path */ -export async function storeKey(state: TransferKeyState, key: string, file) { +export async function storeKey( + state: TransferKeyState, + key: string, + file: string, +) { const fileHandle = await Deno.open(file) const fileStat = await fileHandle.stat() const requestOptions = { method: "POST", headers: { - "Content-Length": fileStat.size, + "Content-Length": fileStat.size.toString(), }, } const request = keyRequest(state, key, requestOptions) @@ -63,7 +78,11 @@ export async function storeKey(state: TransferKeyState, key: string, file) { * @param {string} key Git-annex key * @param {string} file File path */ -export async function retrieveKey(state: TransferKeyState, key: string, file) { +export async function retrieveKey( + state: TransferKeyState, + key: string, + file: string, +) { try { const request = keyRequest(state, key, { method: "GET" }) const response = await fetch(request) diff --git a/deno.json b/deno.json index 562d295a9..73c2e4075 100644 --- a/deno.json +++ b/deno.json @@ -13,7 +13,7 @@ "**/__pycache/**" ] }, - "imports": { - "std/": "https://deno.land/std@0.214.0/" + "tasks": { + "tests": "deno test cli/" } } From f02b00ffdad5ff18dcee299a763596970db4e4c5 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Mon, 18 Mar 2024 13:59:54 -0700 Subject: [PATCH 17/35] tests(cli): Add test coverage for transferKey --- cli/src/commands/upload.test.ts | 0 cli/src/deps.ts | 3 + cli/src/tests/fetch-stub.test.ts | 19 +++ cli/src/tests/fetch-stub.ts | 9 ++ cli/src/worker/transferKey.test.ts | 181 +++++++++++++++++++++++++++++ cli/src/worker/transferKey.ts | 37 +++--- deno.json | 2 +- 7 files changed, 234 insertions(+), 17 deletions(-) create mode 100644 cli/src/commands/upload.test.ts create mode 100644 cli/src/tests/fetch-stub.test.ts create mode 100644 cli/src/tests/fetch-stub.ts create mode 100644 cli/src/worker/transferKey.test.ts diff --git a/cli/src/commands/upload.test.ts b/cli/src/commands/upload.test.ts new file mode 100644 index 000000000..e69de29bb diff --git a/cli/src/deps.ts b/cli/src/deps.ts index fb4216d8e..1cc2ddcbc 100644 --- a/cli/src/deps.ts +++ b/cli/src/deps.ts @@ -47,9 +47,12 @@ export { assert, assertEquals, assertObjectMatch, + assertStrictEquals, } from "https://deno.land/std@0.212.0/assert/mod.ts" export { + assertSpyCallArgs, assertSpyCalls, + restore, returnsNext, stub, } from "https://deno.land/std@0.212.0/testing/mock.ts" diff --git a/cli/src/tests/fetch-stub.test.ts b/cli/src/tests/fetch-stub.test.ts new file mode 100644 index 000000000..828b05e9d --- /dev/null +++ b/cli/src/tests/fetch-stub.test.ts @@ -0,0 +1,19 @@ +import { assertEquals } from "../deps.ts" +import { assertSpyCallArgs, assertSpyCalls } from "../deps.ts" +import { mockFetch } from "./fetch-stub.ts" + +Deno.test({ + name: "fetch test", + async fn() { + const mockFetchText = "__MOCK_FETCH__" + const fetchStub = mockFetch(new Response(mockFetchText)) + try { + const res = await fetch("foo.com") + assertEquals(await res.text(), mockFetchText) + assertSpyCallArgs(fetchStub, 0, ["foo.com"]) + assertSpyCalls(fetchStub, 1) + } finally { + fetchStub.restore() + } + }, +}) diff --git a/cli/src/tests/fetch-stub.ts b/cli/src/tests/fetch-stub.ts new file mode 100644 index 000000000..61dc8f7e1 --- /dev/null +++ b/cli/src/tests/fetch-stub.ts @@ -0,0 +1,9 @@ +import { stub } from "../deps.ts" + +export function mockFetch(response: Response) { + return stub( + globalThis, + "fetch", + () => Promise.resolve(response), + ) +} diff --git a/cli/src/worker/transferKey.test.ts b/cli/src/worker/transferKey.test.ts new file mode 100644 index 000000000..deaf97371 --- /dev/null +++ b/cli/src/worker/transferKey.test.ts @@ -0,0 +1,181 @@ +import type { FetchOptions, TransferKeyState } from "./transferKey.ts" +import { + checkKey, + keyRequest, + removeKey, + retrieveKey, + storeKey, +} from "./transferKey.ts" +import { assertEquals, assertStrictEquals } from "../deps.ts" +import { mockFetch } from "../tests/fetch-stub.ts" + +Deno.test({ + name: "keyRequest() generates correct Request object", + fn() { + const state: TransferKeyState = { + url: "https://api.example.com", + token: "secret_token", + } + const key = "sample_git_annex_key" + const options: FetchOptions = { + method: "POST", + headers: { "Content-Type": "application/json" }, + } + + const result = keyRequest(state, key, options) + + assertEquals(result.method, "POST") + assertEquals( + result.url, + "https://api.example.com/annex/sample_git_annex_key", + ) + assertStrictEquals( + result.headers.get("Authorization"), + "Basic b3Blbm5ldXJvLWNsaTpzZWNyZXRfdG9rZW4=", + ) + assertStrictEquals(result.headers.get("Content-Type"), "application/json") + }, +}) + +Deno.test({ + name: "storeKey() uploads successfully", + async fn() { + const mocked = mockFetch(new Response("", { status: 200 })) + const testData = "test data here" + const tmpFilePath = await Deno.makeTempFile() + const textEncoder = new TextEncoder() + await Deno.writeFile(tmpFilePath, textEncoder.encode(testData)) + + try { + const testFileSize = testData.length + + const result = await storeKey( + { url: "http://localhost", token: "" }, + "key", + tmpFilePath, + ) + assertEquals(result, testFileSize) + } finally { + mocked.restore() + } + }, +}) + +Deno.test({ + name: "storeKey() handles upload failure", + async fn() { + const mocked = mockFetch(new Response("", { status: 500 })) + + try { + const result = await storeKey( + { url: "http://localhost", token: "" }, + "key", + "./deno.json", + ) + assertEquals(result, -1) + } finally { + mocked.restore() + } + }, +}) + +Deno.test({ + name: "retrieveKey() downloads successfully", + async fn() { + const testData = "test data here" + const tmpFilePath = await Deno.makeTempFile() + const mocked = mockFetch(new Response(testData, { status: 200 })) + + try { + const result = await retrieveKey( + { url: "http://localhost", token: "" }, + "key", + tmpFilePath, + ) + assertEquals(result, true) + } finally { + mocked.restore() + } + }, +}) + +Deno.test({ + name: "retrieveKey() handles download failure (e.g., 404)", + async fn() { + const mocked = mockFetch(new Response("", { status: 404 })) + try { + const result = await retrieveKey( + { url: "http://localhost", token: "" }, + "key", + "output.file", + ) + assertEquals(result, false) + } finally { + mocked.restore() + } + }, +}) + +Deno.test({ + name: "checkKey() confirms key exists (status 200)", + async fn() { + const mocked = mockFetch(new Response("", { status: 200 })) + try { + const result = await checkKey( + { url: "http://localhost", token: "" }, + "key", + ) + assertEquals(result, true) + } finally { + mocked.restore() + } + }, +}) + +Deno.test({ + name: "checkKey() identifies when key doesn't exist", + async fn() { + const mocked = mockFetch(new Response("", { status: 404 })) + try { + const result = await checkKey( + { url: "http://localhost", token: "" }, + "key", + ) + assertEquals(result, false) + } finally { + mocked.restore() + } + }, +}) + +Deno.test({ + name: "removeKey() successful deletion (status 204)", + async fn() { + const mocked = mockFetch(new Response(null, { status: 204 })) + try { + const result = await removeKey( + { url: "http://localhost", token: "" }, + "key", + ) + assertEquals(result, true) + } finally { + mocked.restore() + } + }, +}) + +Deno.test({ + name: "removeKey() handles failed deletion", + async fn() { + const mocked = mockFetch(new Response("", { status: 500 })) + try { + const result = await removeKey( + { url: "http://localhost", token: "" }, + "key", + ) + assertEquals(result, false) + } finally { + mocked.restore() + } + }, +}) diff --git a/cli/src/worker/transferKey.ts b/cli/src/worker/transferKey.ts index 741244d69..c31a5a18a 100644 --- a/cli/src/worker/transferKey.ts +++ b/cli/src/worker/transferKey.ts @@ -2,14 +2,14 @@ import { encodeBase64 } from "../deps.ts" /** Deno port of transferKey from Node.js CLI */ -interface TransferKeyState { +export interface TransferKeyState { // Base URL url: string // Basic auth token for repos token: string } -interface FetchOptions { +export interface FetchOptions { method?: "GET" | "POST" | "PUT" | "DELETE" | string headers?: { [key: string]: string } | Headers // Key-value pairs for request headers body?: BodyInit @@ -53,20 +53,25 @@ export async function storeKey( key: string, file: string, ) { - const fileHandle = await Deno.open(file) - const fileStat = await fileHandle.stat() - const requestOptions = { - method: "POST", - headers: { - "Content-Length": fileStat.size.toString(), - }, - } - const request = keyRequest(state, key, requestOptions) - const response = await fetch(request, { body: fileHandle.readable }) - if (response.status === 200) { - return fileStat.size - } else { - return -1 + let fileHandle + try { + fileHandle = await Deno.open(file) + const fileStat = await fileHandle.stat() + const requestOptions = { + method: "POST", + headers: { + "Content-Length": fileStat.size.toString(), + }, + } + const request = keyRequest(state, key, requestOptions) + const response = await fetch(request, { body: fileHandle.readable }) + if (response.status === 200) { + return fileStat.size + } else { + return -1 + } + } finally { + fileHandle?.close() } } diff --git a/deno.json b/deno.json index 73c2e4075..54553baa5 100644 --- a/deno.json +++ b/deno.json @@ -14,6 +14,6 @@ ] }, "tasks": { - "tests": "deno test cli/" + "tests": "deno test --allow-read --allow-write cli/" } } From bc6d061e13ef244f522add26963551aa33b9f411 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Mon, 18 Mar 2024 15:52:44 -0700 Subject: [PATCH 18/35] tests(cli): Add a test for git add/git commit with files --- cli/src/commands/upload.ts | 41 +++++++---- cli/src/commands/validate.ts | 28 -------- cli/src/deps.ts | 3 + cli/src/worker/git.test.ts | 128 ++++++++++++++++++++++++++++++++++- cli/src/worker/git.ts | 13 ++-- 5 files changed, 166 insertions(+), 47 deletions(-) delete mode 100644 cli/src/commands/validate.ts diff --git a/cli/src/commands/upload.ts b/cli/src/commands/upload.ts index ebcaa8f9d..f60411d23 100644 --- a/cli/src/commands/upload.ts +++ b/cli/src/commands/upload.ts @@ -31,6 +31,31 @@ async function getRepoDir(url: URL): Promise { } } +/** + * Add all files to a setup git worker + * @param worker The worker to use for this + * @param dataset_directory_abs An absolute path on the local system to upload files from (dataset root) + */ +export async function addGitFiles( + worker: Worker, + dataset_directory_abs: string, +) { + // Upload all files + for await ( + const walkEntry of walk(dataset_directory_abs, { + includeDirs: false, + includeSymlinks: false, + }) + ) { + const relativePath = relative(dataset_directory_abs, walkEntry.path) + worker.postMessage({ + "command": "add", + "path": walkEntry.path, + "relativePath": relativePath, + }) + } +} + export async function uploadAction( options: CommandOptions, dataset_directory: string, @@ -139,19 +164,7 @@ export async function uploadAction( }) // Upload all files - for await ( - const walkEntry of walk(dataset_directory_abs, { - includeDirs: false, - includeSymlinks: false, - }) - ) { - const relativePath = relative(dataset_directory_abs, walkEntry.path) - worker.postMessage({ - "command": "add", - "path": walkEntry.path, - "relativePath": relativePath, - }) - } + await addGitFiles(worker, dataset_directory_abs) // Generate a commit worker.postMessage({ command: "commit" }) @@ -160,7 +173,7 @@ export async function uploadAction( worker.postMessage({ command: "push" }) // Close after all tasks are queued - worker.postMessage({ command: "close" }) + worker.postMessage({ command: "done" }) } /** diff --git a/cli/src/commands/validate.ts b/cli/src/commands/validate.ts deleted file mode 100644 index c77121f14..000000000 --- a/cli/src/commands/validate.ts +++ /dev/null @@ -1,28 +0,0 @@ -import { Command } from "../deps.ts" - -export const validateCommand = new Command() - .name("bids-validator") - .description( - "This tool checks if a dataset in a given directory is compatible with the Brain Imaging Data Structure specification. To learn more about Brain Imaging Data Structure visit http://bids.neuroimaging.io", - ) - .arguments("") - .version("alpha") - .option("--json", "Output machine readable JSON") - .option( - "-s, --schema ", - "Specify a schema version to use for validation", - { - default: "latest", - }, - ) - .option("-v, --verbose", "Log more extensive information about issues") - .option( - "--ignoreNiftiHeaders", - "Disregard NIfTI header content during validation", - ) - .option( - "--filenameMode", - "Enable filename checks for newline separated filenames read from stdin", - ) - -export const validate = validateCommand diff --git a/cli/src/deps.ts b/cli/src/deps.ts index 1cc2ddcbc..14cf246d2 100644 --- a/cli/src/deps.ts +++ b/cli/src/deps.ts @@ -45,6 +45,7 @@ export { dirname } from "https://deno.land/std@0.212.0/path/dirname.ts" // Test suites export { assert, + assertArrayIncludes, assertEquals, assertObjectMatch, assertStrictEquals, @@ -61,3 +62,5 @@ export { default as ProgressBar } from "https://deno.land/x/progress@v1.3.9/mod. // Ignore library export { default as ignore } from "npm:ignore@5.3.0" export { encodeBase64 } from "https://deno.land/std@0.220.1/encoding/base64.ts" +// Isomorphic git +export { default as git, STAGE, TREE } from "npm:isomorphic-git@1.25.6" diff --git a/cli/src/worker/git.test.ts b/cli/src/worker/git.test.ts index 533cf10ae..39eace547 100644 --- a/cli/src/worker/git.test.ts +++ b/cli/src/worker/git.test.ts @@ -1,5 +1,7 @@ import { annexRelativePath, hashDirLower, hashDirMixed } from "./git.ts" -import { assertEquals } from "../deps.ts" +import { assertArrayIncludes, assertEquals, git, join, walk } from "../deps.ts" +import { addGitFiles } from "../commands/upload.ts" +import fs from "node:fs" Deno.test("annexRelativePath() returns appropriate paths", () => { assertEquals(annexRelativePath("sub-01/anat/sub-01_T1w.nii.gz"), "../..") @@ -22,3 +24,127 @@ Deno.test("hashDirMixed() returns the correct key prefix", async () => { ["Xk", "Mx"], ) }) + +Deno.test("adds git and annexed content given a directory of files", async () => { + const testUpload = await Deno.makeTempDir() + const testRepo = await Deno.makeTempDir() + const testUrl = "https://example.com/repo.git" + + await git.init({ + fs, + dir: testRepo, + defaultBranch: "main", + }) + + const textEncoder = new TextEncoder() + + // Add .gitattributes directly here (add requires it) + await Deno.writeFile( + join(testRepo, ".gitattributes"), + textEncoder.encode(`* annex.backend=SHA256E +**/.git* annex.largefiles=nothing +*.bval annex.largefiles=nothing +*.bvec annex.largefiles=nothing +*.json annex.largefiles=largerthan=1mb +*.tsv annex.largefiles=largerthan=1mb +dataset_description.json annex.largefiles=nothing +.bidsignore annex.largefiles=nothing +CHANGES annex.largefiles=nothing +README* annex.largefiles=nothing +LICENSE annex.largefiles=nothing`), + ) + await git.add({ fs, dir: testRepo, filepath: ".gitattributes" }) + await git.commit({ + fs, + dir: testRepo, + author: { + name: "OpenNeuro", + email: "git@openneuro.org", + }, + message: "Test suite repo, please ignore", + }) + + // dataset_description.json + await Deno.writeFile( + join(testUpload, "dataset_description.json"), + textEncoder.encode(JSON.stringify({ + "Name": "Test Experiment", + "BIDSVersion": "1.8.0", + "DatasetType": "raw", + "License": "CC0", + "Authors": [ + "J. Doe", + "J. Doe", + ], + })), + ) + + // An annexed nifti file + const fakeNifti = new Uint8Array(65536) + crypto.getRandomValues(fakeNifti) + await Deno.mkdir(join(testUpload, "sub-01", "anat"), { recursive: true }) + await Deno.writeFile( + join(testUpload, "sub-01", "anat", "sub-01_T1w.nii.gz"), + fakeNifti, + ) + + // Create the git worker + const worker = new Worker(new URL("../worker/git.ts", import.meta.url).href, { + type: "module", + }) + + // Configure worker + worker.postMessage({ + "command": "setup", + "datasetId": "test_dataset", + "repoPath": testRepo, + "repoEndpoint": testUrl, + "authorization": + "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJmNjZhNzRjNS05ZDhmLTQ2M2MtOGE2ZS1lYTE3ODljYTNiOTIiLCJlbWFpbCI6Im5lbGxAZGV2LW5lbGwuY29tIiwicHJvdmlkZXIiOiJnb29nbGUiLCJuYW1lIjoiTmVsbCBIYXJkY2FzdGxlIiwiYWRtaW4iOnRydWUsImlhdCI6MTcwMDUyNDIzNCwiZXhwIjoxNzMyMDYwMjM0fQ.5glc_uoxqcRJ4KWn2EvRR0hH-ono2MPJH0wqvcXBIOg", + "logLevel": "INFO", + }) + + await addGitFiles(worker, testUpload) + + // Setup a way to make sure the worker is finished + const closedPromise = new Promise((resolve) => { + worker.onmessage = (event) => { + if (event.data.command === "closed") { + resolve(true) + } + } + }) + + worker.postMessage({ command: "commit" }) + + // Close after all tasks are queued + worker.postMessage({ command: "done" }) + + // Wait until the worker says it's closed + await closedPromise + + const expectedFiles = [ + ".git/refs/heads/main", + ".git/config", + ".git/HEAD", + ".git/index", + ".gitattributes", + "dataset_description.json", + "sub-01/anat/sub-01_T1w.nii.gz", + ] + let gitObjects = 0 + for await ( + const walkEntry of walk(testRepo, { + includeDirs: false, + includeSymlinks: true, + }) + ) { + const relativePath = walkEntry.path.split(testRepo + "/")[1] + if (relativePath.startsWith(".git/objects/")) { + gitObjects += 1 + } else { + assertArrayIncludes(expectedFiles, [relativePath]) + } + } + assertEquals(gitObjects, 9) +}) diff --git a/cli/src/worker/git.ts b/cli/src/worker/git.ts index de30bf4d3..1059998f5 100644 --- a/cli/src/worker/git.ts +++ b/cli/src/worker/git.ts @@ -1,4 +1,4 @@ -import git, { STAGE, TREE } from "npm:isomorphic-git@1.25.3" +import { git, STAGE, TREE } from "../deps.ts" import http from "npm:isomorphic-git@1.25.3/http/node/index.js" import fs from "node:fs" import { decode } from "https://deno.land/x/djwt@v3.0.1/mod.ts" @@ -80,6 +80,11 @@ let attributesCache: GitAnnexAttributes const annexKeys: Record = {} async function done() { + logger.info("Git worker shutdown.") + // @ts-ignore + await globalThis.postMessage({ + command: "closed", + }) await globalThis.close() } @@ -137,7 +142,7 @@ async function getGitAttributes(): Promise { logger.error( "Dataset repository is missing .gitattributes and may be improperly initialized.", ) - globalThis.close() + await done() } } return attributesCache @@ -329,7 +334,7 @@ async function commit() { const Aoid = await A?.oid() const Boid = await B?.oid() let type = "equal" - if (Aoid !== Boid) { + if (Aoid !== Boid && Aoid !== undefined && Boid !== undefined) { logger.info(`modified:\t${filepath}`) type = "modify" } @@ -363,7 +368,7 @@ async function commit() { logger.info(`Committed as "${commitHash}"`) } else { console.log("No changes found, not uploading.") - self.close() + workQueue.enqueue(done) } } From 738314938e17bfcbaed427329d3c929fab772cd0 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Mon, 18 Mar 2024 16:18:31 -0700 Subject: [PATCH 19/35] docs(cli): Add readme with usage information and tips --- cli/README.md | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/cli/README.md b/cli/README.md index 61af9bc9b..c066f4608 100644 --- a/cli/README.md +++ b/cli/README.md @@ -1,3 +1,45 @@ # OpenNeuro CLI for Deno -Simplified CLI for OpenNeuro implemented in Deno. Deno eliminates the need to install the CLI and allows for more code reuse with OpenNeuro's web frontend. +Command line tools for OpenNeuro implemented in Deno. Deno eliminates the need to install the CLI and allows for more code reuse with OpenNeuro's web frontend. + +## Install + +Download deno via [any supported installation method](https://docs.deno.com/runtime/manual/getting_started/installation). + +## Usage + +OpenNeuro CLI will validate your dataset with the [bids-validator](https://github.com/bids-standard/bids-validator/) and then allow you to upload to OpenNeuro. If you wish to make changes to a dataset, the CLI can download, allow you to make local changes, and reupload only the changes to OpenNeuro. + +### Login + +To upload or download data from OpenNeuro, login with your account. + +```shell +# Run login and follow the prompts +deno run -A cli/openneuto.ts login +``` + +You can also create an API key on [OpenNeuro](https://openneuro.org/keygen) and specify this as an option or environment variable. + +```shell +# For scripts +export OPENNEURO_TOKEN= +deno run -A cli/openneuro.ts login --error-reporting true +``` + +### Uploading + +```shell +# Path to the dataset root (directory containing dataset_description.json) +deno run -A cli/openneuro.ts upload --affirmDefaced path/to/dataset +``` + +```shell +# To debug issues - enable logging and provide this log to support or open a GitHub issue +export OPENNEURO_LOG=INFO +deno run -A cli/openneuro.ts upload --affirmDefaced path/to/dataset +``` + +## Implementation Notes + +This tool uses isomorphic git to download, modify, and push datasets using OpenNeuro's [git interface](https://docs.openneuro.org/git.html). Other tools that support git and git-annex repositories such as [DataLad](https://www.datalad.org/) can also be used with the local copy. From 4f48ee1a12135efd9404a532c6e55d1cfb4e221b Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Mon, 18 Mar 2024 16:28:15 -0700 Subject: [PATCH 20/35] feat(cli): Allow configuration of API key with OPENNEURO_API_KEY environment variable --- cli/README.md | 2 +- cli/src/commands/login.ts | 19 +++++++++++++++---- cli/src/options.ts | 7 ++++--- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/cli/README.md b/cli/README.md index c066f4608..423658a50 100644 --- a/cli/README.md +++ b/cli/README.md @@ -23,7 +23,7 @@ You can also create an API key on [OpenNeuro](https://openneuro.org/keygen) and ```shell # For scripts -export OPENNEURO_TOKEN= +export OPENNEURO_API_KEY= deno run -A cli/openneuro.ts login --error-reporting true ``` diff --git a/cli/src/commands/login.ts b/cli/src/commands/login.ts index d94b8ae54..290c3f190 100644 --- a/cli/src/commands/login.ts +++ b/cli/src/commands/login.ts @@ -22,11 +22,22 @@ export async function loginAction(options: CommandOptions) { ], }) localStorage.setItem("url", url) - const token = options.token ? options.token : await Secret.prompt( - `Enter your API key for OpenNeuro (get an API key from ${url}/keygen).`, - ) + let token + // Environment variable + if (options.openneuroApiKey) { + token = options.openneuroApiKey + } + // Command line + if (options.token) { + token = options.token + } + if (!token) { + token = await Secret.prompt( + `Enter your API key for OpenNeuro (get an API key from ${url}/keygen).`, + ) + } localStorage.setItem("token", token) - const errorReporting = options.hasOwnProperty("errorReporting") + const errorReporting = Object.hasOwn(options, "errorReporting") ? options.errorReporting : await Confirm.prompt(messages.errorReporting) localStorage.setItem("errorReporting", errorReporting.toString()) diff --git a/cli/src/options.ts b/cli/src/options.ts index 67d59949a..7e6a42372 100644 --- a/cli/src/options.ts +++ b/cli/src/options.ts @@ -26,10 +26,11 @@ const openneuroCommand = new Command() // TODO - Sync this with the node packages .version("4.20.4") .globalType("debugLevel", new EnumType(LogLevelNames)) - .globalEnv("LOG=", "Enable debug output.") - .globalAction(({ log }) => { - setupLogging(log ? log : "ERROR") + .globalEnv("OPENNEURO_LOG=", "Enable debug output.") + .globalAction(({ openneuroLog }) => { + setupLogging(openneuroLog ? openneuroLog : "ERROR") }) + .globalEnv("OPENNEURO_API_KEY=", "Specify an OpenNeuro API key.") .command("login", login) .command("download", download) .command("upload", upload) From 4ec7f89b74473048cbe039d59613b2e959a2a49a Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Mon, 18 Mar 2024 16:45:20 -0700 Subject: [PATCH 21/35] fix(cli): Provide better feedback during uploads. --- cli/src/commands/upload.ts | 7 ------- cli/src/worker/git.ts | 10 ++++++++++ cli/src/worker/transferKey.ts | 9 ++++++++- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/cli/src/commands/upload.ts b/cli/src/commands/upload.ts index f60411d23..2de2b9401 100644 --- a/cli/src/commands/upload.ts +++ b/cli/src/commands/upload.ts @@ -151,13 +151,6 @@ export async function uploadAction( "logLevel": logger.levelName, }) - /* - const progressBar = new ProgressBar({ - title: "Upload", - total: 100, - }) - progressBar.render(0)*/ - logger.info(`Repo path: ${join(repoDir, datasetId)}`) worker.postMessage({ "command": "clone", diff --git a/cli/src/worker/git.ts b/cli/src/worker/git.ts index 1059998f5..54d8f9092 100644 --- a/cli/src/worker/git.ts +++ b/cli/src/worker/git.ts @@ -376,6 +376,14 @@ async function commit() { * `git push` and `git-annex copy --to=openneuro` */ async function push() { + const annexedObjects = Object.keys(annexKeys).length + if (annexedObjects > 0) { + console.log( + `Transferring ${annexedObjects} annexed file${ + annexedObjects === 1 ? "" : "s" + }.`, + ) + } // Git-annex copy --to=openneuro for (const [key, path] of Object.entries(annexKeys)) { const checkKeyResult = await checkKey({ @@ -402,10 +410,12 @@ async function push() { } } } + console.log("Pushing changes...") // Git push await git.push( gitOptions(context.repoPath), ) + console.log("Upload complete.") } // Queue of tasks to perform in order diff --git a/cli/src/worker/transferKey.ts b/cli/src/worker/transferKey.ts index c31a5a18a..773826756 100644 --- a/cli/src/worker/transferKey.ts +++ b/cli/src/worker/transferKey.ts @@ -1,3 +1,4 @@ +import { logger } from "../logger.ts" import { encodeBase64 } from "../deps.ts" /** Deno port of transferKey from Node.js CLI */ @@ -71,7 +72,13 @@ export async function storeKey( return -1 } } finally { - fileHandle?.close() + try { + fileHandle?.close() + } catch (err) { + if (err.name !== "BadResource") { + logger.error(err) + } + } } } From 5433a2ebd5e9e58f445960060e605a79f5b28cc2 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Tue, 19 Mar 2024 09:10:24 -0700 Subject: [PATCH 22/35] fix(cli): Add a retry for annex key transfer (three attempts) --- cli/src/worker/git.ts | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/cli/src/worker/git.ts b/cli/src/worker/git.ts index 54d8f9092..21b79f2c9 100644 --- a/cli/src/worker/git.ts +++ b/cli/src/worker/git.ts @@ -393,16 +393,26 @@ async function push() { if (checkKeyResult) { logger.info(`Skipping key "${key}" present on remote`) } else { - const storeKeyResult = await storeKey( - { - url: context.repoEndpoint, - token: context.authorization, - }, - key, - path, - ) + let storeKeyResult = -1 + let retries = 3 + while (storeKeyResult === -1 && retries > 0) { + retries -= 1 + storeKeyResult = await storeKey( + { + url: context.repoEndpoint, + token: context.authorization, + }, + key, + path, + ) + if (storeKeyResult === -1 && retries > 0) { + logger.warn(`Failed to transfer annex object "${key}" - retrying`) + } + } if (storeKeyResult === -1) { - logger.error(`Failed to transfer annex object "${key}"`) + logger.error( + `Failed to transfer annex object "${key}" after ${retries} attempts`, + ) } else { logger.info( `Stored ${storeKeyResult} bytes for key "${key}" from path "${path}"`, From 16b502306c041877a763eb24df0691b8210578e7 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Tue, 19 Mar 2024 09:39:28 -0700 Subject: [PATCH 23/35] fix(cli): Prevent failure to exit on downloads and add output --- cli/src/commands/download.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cli/src/commands/download.ts b/cli/src/commands/download.ts index 1281a0073..a467babf0 100644 --- a/cli/src/commands/download.ts +++ b/cli/src/commands/download.ts @@ -41,10 +41,14 @@ export async function downloadAction( "logLevel": logger.levelName, }) + console.log("Downloading...") + worker.postMessage({ "command": "clone", }) // Close after all tasks are queued - worker.postMessage({ command: "close" }) + worker.postMessage({ command: "done" }) + + console.log("Complete!") } From 23180e1e073e252d9f6af264562116fd9bffe0a4 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Tue, 19 Mar 2024 09:40:08 -0700 Subject: [PATCH 24/35] feat(cli): Add upload progress for annex objects --- cli/src/worker/git.ts | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/cli/src/worker/git.ts b/cli/src/worker/git.ts index 21b79f2c9..27e43d140 100644 --- a/cli/src/worker/git.ts +++ b/cli/src/worker/git.ts @@ -12,6 +12,7 @@ import { basename, dirname, join, LevelName, relative } from "../deps.ts" import { logger, setupLogging } from "../logger.ts" import { PromiseQueue } from "./queue.ts" import { checkKey, storeKey } from "./transferKey.ts" +import { ProgressBar } from "../deps.ts" /** * Why are we using hash wasm over web crypto? @@ -376,13 +377,14 @@ async function commit() { * `git push` and `git-annex copy --to=openneuro` */ async function push() { + let completed = 0 const annexedObjects = Object.keys(annexKeys).length + const progress = new ProgressBar({ + title: `Transferring annexed files`, + total: annexedObjects, + }) if (annexedObjects > 0) { - console.log( - `Transferring ${annexedObjects} annexed file${ - annexedObjects === 1 ? "" : "s" - }.`, - ) + await progress.render(completed) } // Git-annex copy --to=openneuro for (const [key, path] of Object.entries(annexKeys)) { @@ -414,6 +416,8 @@ async function push() { `Failed to transfer annex object "${key}" after ${retries} attempts`, ) } else { + completed += 1 + await progress.render(completed) logger.info( `Stored ${storeKeyResult} bytes for key "${key}" from path "${path}"`, ) From 388024ac7bc708864264fb0092db4a88efb846dd Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Tue, 19 Mar 2024 09:54:43 -0700 Subject: [PATCH 25/35] fix(cli): Show URL for dataset after uploading. --- cli/src/worker/git.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cli/src/worker/git.ts b/cli/src/worker/git.ts index 27e43d140..97d6dc460 100644 --- a/cli/src/worker/git.ts +++ b/cli/src/worker/git.ts @@ -429,7 +429,8 @@ async function push() { await git.push( gitOptions(context.repoPath), ) - console.log("Upload complete.") + const url = new URL(context.repoEndpoint) + console.log(`Upload complete, visit your dataset at ${url.protocol}//${url.host}/datasets/${context.datasetId}`) } // Queue of tasks to perform in order From d46f1bc2144931742636cd0888c634737692ea75 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Tue, 19 Mar 2024 09:58:11 -0700 Subject: [PATCH 26/35] fix(cli): Skip dotfiles in uploads (except .bidsignore and .gitattributes) --- cli/src/commands/upload.ts | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/cli/src/commands/upload.ts b/cli/src/commands/upload.ts index 2de2b9401..0f89307a9 100644 --- a/cli/src/commands/upload.ts +++ b/cli/src/commands/upload.ts @@ -8,7 +8,6 @@ import { logger } from "../logger.ts" import { Confirm, join, - ProgressBar, prompt, relative, resolve, @@ -48,11 +47,15 @@ export async function addGitFiles( }) ) { const relativePath = relative(dataset_directory_abs, walkEntry.path) - worker.postMessage({ - "command": "add", - "path": walkEntry.path, - "relativePath": relativePath, - }) + if (relativePath === ".bidsignore" || relativePath === ".gitattributes" || !relativePath.startsWith(".")) { + worker.postMessage({ + "command": "add", + "path": walkEntry.path, + "relativePath": relativePath, + }) + } else { + logger.warn(`Skipped file "${relativePath}"`) + } } } From aa239727c6d6666d49df280c02fffd1035ee070c Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Tue, 19 Mar 2024 14:02:32 -0700 Subject: [PATCH 27/35] fix(cli): Avoid creating hash-wasm objects on every file --- cli/src/worker/git.ts | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/cli/src/worker/git.ts b/cli/src/worker/git.ts index 97d6dc460..1553e740c 100644 --- a/cli/src/worker/git.ts +++ b/cli/src/worker/git.ts @@ -210,6 +210,9 @@ export async function hashDirMixed( return [`${letters[1]}${letters[0]}`, `${letters[3]}${letters[2]}`] } +const computeHashMD5 = await createMD5() +const computeHashSHA256 = await createSHA256() + /** * git-annex add equivalent */ @@ -239,8 +242,8 @@ async function add(event: GitWorkerEventAdd) { } // Compute hash const computeHash = annexed.startsWith("MD5") - ? await createMD5() - : await createSHA256() + ? computeHashMD5 + : computeHashSHA256 computeHash.init() const stream = fs.createReadStream(event.data.path, { highWaterMark: 1024 * 1024 * 10, @@ -430,7 +433,9 @@ async function push() { gitOptions(context.repoPath), ) const url = new URL(context.repoEndpoint) - console.log(`Upload complete, visit your dataset at ${url.protocol}//${url.host}/datasets/${context.datasetId}`) + console.log( + `Upload complete, visit your dataset at ${url.protocol}//${url.host}/datasets/${context.datasetId}`, + ) } // Queue of tasks to perform in order From 1d0e3ff7b05abe18aae0be5c39d9c35da6d416f8 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Mon, 1 Apr 2024 10:38:45 -0700 Subject: [PATCH 28/35] fix(cli): Add message describing how to get annexed files --- cli/src/commands/download.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/src/commands/download.ts b/cli/src/commands/download.ts index a467babf0..eb0b93427 100644 --- a/cli/src/commands/download.ts +++ b/cli/src/commands/download.ts @@ -50,5 +50,5 @@ export async function downloadAction( // Close after all tasks are queued worker.postMessage({ command: "done" }) - console.log("Complete!") + console.log("Download complete. To download all data files, use `datalad get` or `git-annex get`.") } From 6182354b141cc6ca46d3607f02ee49d8eae618ea Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Mon, 1 Apr 2024 10:53:30 -0700 Subject: [PATCH 29/35] fix(cli): Prevent mishandling of parent directories for git objects --- cli/src/worker/git.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cli/src/worker/git.ts b/cli/src/worker/git.ts index 1553e740c..0fd8f7e89 100644 --- a/cli/src/worker/git.ts +++ b/cli/src/worker/git.ts @@ -229,6 +229,8 @@ async function add(event: GitWorkerEventAdd) { filepath: event.data.relativePath, } const targetPath = join(context.repoPath, event.data.relativePath) + // Verify parent directories exist + await fs.promises.mkdir(dirname(targetPath), { recursive: true }) // Copy non-annexed files for git index creation await fs.promises.copyFile(event.data.path, targetPath) await git.add(options) From 2ea7ebb16008c8f2f996a6611b4b62c0ebdeaacb Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Mon, 1 Apr 2024 11:04:30 -0700 Subject: [PATCH 30/35] fix(cli): Add deno tests to CI --- .github/workflows/deno.yml | 35 +++++++++++++++++++++++++++++++++++ .gitignore | 2 ++ deno.json | 3 ++- 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/deno.yml diff --git a/.github/workflows/deno.yml b/.github/workflows/deno.yml new file mode 100644 index 000000000..02e026c64 --- /dev/null +++ b/.github/workflows/deno.yml @@ -0,0 +1,35 @@ +--- +name: Deno build + +on: + push: + branches: [master] + tags: ['*'] + pull_request: + branches: [master] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + runs-on: ${{ matrix.os }} + timeout-minutes: 5 + strategy: + matrix: + os: [ubuntu-22.04, macos-12, windows-2022] + fail-fast: false + + steps: + - uses: actions/checkout@v4 + - uses: denoland/setup-deno@v1.1.2 + with: + deno-version: v1.x + - name: Collect coverage + run: deno task coverage + if: ${{ always() }} + - uses: codecov/codecov-action@v4 + if: ${{ always() }} + with: + files: coverage.lcov diff --git a/.gitignore b/.gitignore index d48f05a4e..bd385a876 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,5 @@ dist !.yarn/versions .*.sw[po] .venv/ +coverage +coverage.lcov diff --git a/deno.json b/deno.json index 54553baa5..dc08c1f6c 100644 --- a/deno.json +++ b/deno.json @@ -14,6 +14,7 @@ ] }, "tasks": { - "tests": "deno test --allow-read --allow-write cli/" + "tests": "deno test --allow-read --allow-write cli/", + "coverage": "deno test --allow-read --allow-write --coverage cli/ && deno coverage ./coverage --lcov > coverage.lcov" } } From b47af16b6d503aac336b804c74a40bde09f21b54 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Mon, 1 Apr 2024 11:09:51 -0700 Subject: [PATCH 31/35] tests(cli): Fix test case on Windows with hardcoded path --- cli/src/deps.ts | 1 + cli/src/worker/git.test.ts | 18 +++++++++--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/cli/src/deps.ts b/cli/src/deps.ts index 14cf246d2..2e5f34b5f 100644 --- a/cli/src/deps.ts +++ b/cli/src/deps.ts @@ -42,6 +42,7 @@ export { join } from "https://deno.land/std@0.212.0/path/join.ts" export { extname } from "https://deno.land/std@0.212.0/path/extname.ts" export { basename } from "https://deno.land/std@0.212.0/path/basename.ts" export { dirname } from "https://deno.land/std@0.212.0/path/dirname.ts" +export { SEPARATOR } from "https://deno.land/std@0.221.0/path/mod.ts" // Test suites export { assert, diff --git a/cli/src/worker/git.test.ts b/cli/src/worker/git.test.ts index 39eace547..9182d1994 100644 --- a/cli/src/worker/git.test.ts +++ b/cli/src/worker/git.test.ts @@ -1,10 +1,10 @@ import { annexRelativePath, hashDirLower, hashDirMixed } from "./git.ts" -import { assertArrayIncludes, assertEquals, git, join, walk } from "../deps.ts" +import { assertArrayIncludes, assertEquals, git, join, walk, SEPARATOR } from "../deps.ts" import { addGitFiles } from "../commands/upload.ts" import fs from "node:fs" Deno.test("annexRelativePath() returns appropriate paths", () => { - assertEquals(annexRelativePath("sub-01/anat/sub-01_T1w.nii.gz"), "../..") + assertEquals(annexRelativePath("sub-01/anat/sub-01_T1w.nii.gz"), join('..', '..')) }) Deno.test("hashDirLower() returns the correct key prefix", async () => { @@ -124,13 +124,13 @@ LICENSE annex.largefiles=nothing`), await closedPromise const expectedFiles = [ - ".git/refs/heads/main", - ".git/config", - ".git/HEAD", - ".git/index", + join(".git", "refs", "heads", "main"), + join(".git", "config"), + join(".git", "HEAD"), + join(".git", "index"), ".gitattributes", "dataset_description.json", - "sub-01/anat/sub-01_T1w.nii.gz", + join("sub-01", "anat", "sub-01_T1w.nii.gz") ] let gitObjects = 0 for await ( @@ -139,8 +139,8 @@ LICENSE annex.largefiles=nothing`), includeSymlinks: true, }) ) { - const relativePath = walkEntry.path.split(testRepo + "/")[1] - if (relativePath.startsWith(".git/objects/")) { + const relativePath = walkEntry.path.split(testRepo + SEPARATOR)[1] + if (relativePath.startsWith(`.git${SEPARATOR}objects${SEPARATOR}`)) { gitObjects += 1 } else { assertArrayIncludes(expectedFiles, [relativePath]) From feae4c8da60765a25a523dd30d59dab501e2ae36 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Tue, 2 Apr 2024 07:18:41 -0700 Subject: [PATCH 32/35] fix(cli): Improve error handling for git-credential usage --- cli/src/commands/git-credential.ts | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/cli/src/commands/git-credential.ts b/cli/src/commands/git-credential.ts index 8fd178dbe..bf71c6fd6 100644 --- a/cli/src/commands/git-credential.ts +++ b/cli/src/commands/git-credential.ts @@ -25,10 +25,14 @@ export async function getRepoAccess(datasetId?: string) { }, }), }) - const { data } = await req.json() - return { - token: data.prepareRepoAccess.token, // Short lived repo access token - endpoint: data.prepareRepoAccess.endpoint, + const response = await req.json() + if (response.errors) { + throw Error(response.errors.map(error => error.message)) + } else { + return { + token: response.data.prepareRepoAccess.token, // Short lived repo access token + endpoint: response.data.prepareRepoAccess.endpoint, + } } } @@ -72,6 +76,7 @@ export const gitCredential = new Command() .description( "A git credentials helper for easier datalad or git-annex access to datasets.", ) - .action(() => { - console.log(gitCredentialAction()) + .command("fill") + .action(async () => { + console.log(await gitCredentialAction()) }) From bcdb318ce4649811710d46cfecd61347cbdbec5e Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Tue, 2 Apr 2024 07:24:12 -0700 Subject: [PATCH 33/35] fix(cli): Add types for GraphQL Errors --- cli/src/commands/git-credential.ts | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/cli/src/commands/git-credential.ts b/cli/src/commands/git-credential.ts index bf71c6fd6..b32d5822d 100644 --- a/cli/src/commands/git-credential.ts +++ b/cli/src/commands/git-credential.ts @@ -10,6 +10,16 @@ const prepareRepoAccess = ` } ` +interface GraphQLError { + message: string + locations: { line: number, column: number }[] + path: string[] + extensions: { + code: string, + stacktrace: string[] + } +} + export async function getRepoAccess(datasetId?: string) { const config = getConfig() const req = await fetch(`${config.url}/crn/graphql`, { @@ -27,7 +37,8 @@ export async function getRepoAccess(datasetId?: string) { }) const response = await req.json() if (response.errors) { - throw Error(response.errors.map(error => error.message)) + const errors: GraphQLError[] = response.errors + throw Error(errors.map(error => error.message).toString()) } else { return { token: response.data.prepareRepoAccess.token, // Short lived repo access token From 242aca9e4012b72e54f9a0cb1b524a0520ecde79 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Tue, 2 Apr 2024 07:34:50 -0700 Subject: [PATCH 34/35] chore: Cleanup unused tusd related code --- cli/src/commands/upload.test.ts | 0 docker-compose.yml | 15 ----- nginx/nginx.dev.conf | 19 ------- .../src/handlers/__tests__/tusd.spec.ts | 15 ----- .../openneuro-server/src/handlers/tusd.ts | 56 ------------------- packages/openneuro-server/src/routes.ts | 8 --- 6 files changed, 113 deletions(-) delete mode 100644 cli/src/commands/upload.test.ts delete mode 100644 packages/openneuro-server/src/handlers/__tests__/tusd.spec.ts delete mode 100644 packages/openneuro-server/src/handlers/tusd.ts diff --git a/cli/src/commands/upload.test.ts b/cli/src/commands/upload.test.ts deleted file mode 100644 index e69de29bb..000000000 diff --git a/docker-compose.yml b/docker-compose.yml index bff65bd93..63247b07e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -115,21 +115,6 @@ services: aliases: - datalad-0 - datalad-1 - - # Upload daemon - tusd: - image: docker.io/tusproject/tusd:v2.1.0 - user: root - command: - [ - '--base-path=/tusd/files/', - '--hooks-http=http://server:8111/api/tusd', - '--behind-proxy', - '--hooks-http-forward-headers=authorization', - '--upload-dir=/datasets/tusd' - ] - volumes: - - ${PERSISTENT_DIR}/datalad/tusd:/datasets/tusd:z # nginx + app web: diff --git a/nginx/nginx.dev.conf b/nginx/nginx.dev.conf index 0ce346a55..c14087c91 100644 --- a/nginx/nginx.dev.conf +++ b/nginx/nginx.dev.conf @@ -46,25 +46,6 @@ server { proxy_pass http://server:8111; } - location /tusd { - # Forward incoming requests to local tusd instance - proxy_pass http://tusd:8080; - - # Disable request and response buffering - proxy_request_buffering off; - proxy_buffering off; - proxy_http_version 1.1; - - # Add X-Forwarded-* headers - proxy_set_header X-Forwarded-Host $host:9876; - proxy_set_header X-Forwarded-Proto $scheme; - - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection "upgrade"; - client_max_body_size 0; - } - - # Sitemap path location /sitemap.xml { client_max_body_size 0; diff --git a/packages/openneuro-server/src/handlers/__tests__/tusd.spec.ts b/packages/openneuro-server/src/handlers/__tests__/tusd.spec.ts deleted file mode 100644 index 8e51185dc..000000000 --- a/packages/openneuro-server/src/handlers/__tests__/tusd.spec.ts +++ /dev/null @@ -1,15 +0,0 @@ -import { vi } from "vitest" -import { acceptUpload } from "../tusd" - -vi.mock("../../config.ts") - -describe("tusd handler", () => { - it("generates the expected upload ID", () => { - const accepted = acceptUpload("ds000001", "1234-5678", "path") - expect(accepted.ChangeFileInfo.ID).toMatch( - /^ds[0-9]{6}\/[0-9a-fA-F]{8}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{12}$/, - ) - expect(accepted.ChangeFileInfo.MetaData.datasetId).toEqual("ds000001") - expect(accepted.ChangeFileInfo.MetaData.uploaderId).toEqual("1234-5678") - }) -}) diff --git a/packages/openneuro-server/src/handlers/tusd.ts b/packages/openneuro-server/src/handlers/tusd.ts deleted file mode 100644 index 569c74da9..000000000 --- a/packages/openneuro-server/src/handlers/tusd.ts +++ /dev/null @@ -1,56 +0,0 @@ -import { checkDatasetWrite } from "../graphql/permissions" - -export function acceptUpload( - datasetId: string, - uploaderId: string, - path: string, -) { - return { - "ChangeFileInfo": { - ID: `${datasetId}:${uploaderId}:${path.replaceAll("/", ":")}`, - MetaData: { - datasetId, - uploaderId, - }, - }, - "RejectUpload": false, - } -} - -export const rejectUpload = { - "HTTPResponse": { - StatusCode: 403, - Body: '{"message":"access denied to dataset"}', - Header: { - "Content-Type": "application/json", - }, - }, - "RejectUpload": true, -} - -export const tusdHandler = (req, res, next) => { - try { - const userId = req.user.id - const userInfo = { - id: userId, - admin: req.user.admin, - } - if (req.body.Type === "pre-create") { - try { - const datasetId = req.body.Event.Upload.MetaData.datasetId - if (checkDatasetWrite(datasetId, userId, userInfo)) { - const path = req.body.Event.Upload.MetaData.relativePath - res.json(acceptUpload(datasetId, userId, path)) - } else { - res.json(rejectUpload) - } - } catch (_err) { - res.status(400) - res.send("`datasetId` MetaData parameter is required to upload") - } - } - } catch (err) { - res.status(401) - next(err) - } -} diff --git a/packages/openneuro-server/src/routes.ts b/packages/openneuro-server/src/routes.ts index 3f455c4fb..5e9a23132 100644 --- a/packages/openneuro-server/src/routes.ts +++ b/packages/openneuro-server/src/routes.ts @@ -14,7 +14,6 @@ import * as auth from "./libs/authentication/states" import * as doi from "./handlers/doi" import { sitemapHandler } from "./handlers/sitemap" import { reviewerHandler } from "./handlers/reviewer" -import { tusdHandler } from "./handlers/tusd" const noCache = (req, res, next) => { res.setHeader("Surrogate-Control", "no-store") @@ -171,13 +170,6 @@ const routes = [ url: "/sitemap", handler: sitemapHandler, }, - // tusd upload hooks - { - method: "post", - url: "/tusd", - middleware: [noCache, jwt.authenticate, auth.authenticated], - handler: tusdHandler, - }, ] // initialize routes ------------------------------- From ba3e27d20c31e5b078efeeeed7b695697da63749 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Tue, 2 Apr 2024 07:52:31 -0700 Subject: [PATCH 35/35] chore: Ignore deno tests in NodeJS test suite --- vite.config.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/vite.config.ts b/vite.config.ts index 9c7ef97e2..64b2d64a4 100644 --- a/vite.config.ts +++ b/vite.config.ts @@ -9,5 +9,6 @@ export default defineConfig({ clearMocks: true, maxConcurrency: 16, isolate: true, + exclude: ["./cli", "./.yarn"], }, })