diff --git a/.github/workflows/deno.yml b/.github/workflows/deno.yml new file mode 100644 index 000000000..02e026c64 --- /dev/null +++ b/.github/workflows/deno.yml @@ -0,0 +1,35 @@ +--- +name: Deno build + +on: + push: + branches: [master] + tags: ['*'] + pull_request: + branches: [master] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + runs-on: ${{ matrix.os }} + timeout-minutes: 5 + strategy: + matrix: + os: [ubuntu-22.04, macos-12, windows-2022] + fail-fast: false + + steps: + - uses: actions/checkout@v4 + - uses: denoland/setup-deno@v1.1.2 + with: + deno-version: v1.x + - name: Collect coverage + run: deno task coverage + if: ${{ always() }} + - uses: codecov/codecov-action@v4 + if: ${{ always() }} + with: + files: coverage.lcov diff --git a/.gitignore b/.gitignore index d48f05a4e..bd385a876 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,5 @@ dist !.yarn/versions .*.sw[po] .venv/ +coverage +coverage.lcov diff --git a/cli/README.md b/cli/README.md new file mode 100644 index 000000000..423658a50 --- /dev/null +++ b/cli/README.md @@ -0,0 +1,45 @@ +# OpenNeuro CLI for Deno + +Command line tools for OpenNeuro implemented in Deno. Deno eliminates the need to install the CLI and allows for more code reuse with OpenNeuro's web frontend. + +## Install + +Download deno via [any supported installation method](https://docs.deno.com/runtime/manual/getting_started/installation). + +## Usage + +OpenNeuro CLI will validate your dataset with the [bids-validator](https://github.com/bids-standard/bids-validator/) and then allow you to upload to OpenNeuro. If you wish to make changes to a dataset, the CLI can download, allow you to make local changes, and reupload only the changes to OpenNeuro. + +### Login + +To upload or download data from OpenNeuro, login with your account. + +```shell +# Run login and follow the prompts +deno run -A cli/openneuto.ts login +``` + +You can also create an API key on [OpenNeuro](https://openneuro.org/keygen) and specify this as an option or environment variable. + +```shell +# For scripts +export OPENNEURO_API_KEY= +deno run -A cli/openneuro.ts login --error-reporting true +``` + +### Uploading + +```shell +# Path to the dataset root (directory containing dataset_description.json) +deno run -A cli/openneuro.ts upload --affirmDefaced path/to/dataset +``` + +```shell +# To debug issues - enable logging and provide this log to support or open a GitHub issue +export OPENNEURO_LOG=INFO +deno run -A cli/openneuro.ts upload --affirmDefaced path/to/dataset +``` + +## Implementation Notes + +This tool uses isomorphic git to download, modify, and push datasets using OpenNeuro's [git interface](https://docs.openneuro.org/git.html). Other tools that support git and git-annex repositories such as [DataLad](https://www.datalad.org/) can also be used with the local copy. diff --git a/cli/openneuro.ts b/cli/openneuro.ts new file mode 100644 index 000000000..168c7eb68 --- /dev/null +++ b/cli/openneuro.ts @@ -0,0 +1,10 @@ +/** + * Entrypoint for OpenNeuro CLI + */ +import { commandLine } from "./src/options.ts" + +export async function main() { + await commandLine(Deno.args) +} + +await main() diff --git a/cli/src/bids_validator.ts b/cli/src/bids_validator.ts new file mode 100644 index 000000000..24becc495 --- /dev/null +++ b/cli/src/bids_validator.ts @@ -0,0 +1,5 @@ +// TODO - Switch to upstream after next release +export { validateCommand } from "https://raw.githubusercontent.com/bids-standard/bids-validator/master/bids-validator/src/setup/options.ts" +export { validate } from "https://deno.land/x/bids_validator@v1.14.0/main.ts" +export { readFileTree } from "https://deno.land/x/bids_validator@v1.14.0/files/deno.ts" +export { consoleFormat } from "https://deno.land/x/bids_validator@v1.14.0/utils/output.ts" diff --git a/cli/src/commands/download.ts b/cli/src/commands/download.ts new file mode 100644 index 000000000..eb0b93427 --- /dev/null +++ b/cli/src/commands/download.ts @@ -0,0 +1,54 @@ +import { Command } from "../deps.ts" +import { readConfig } from "../config.ts" +import { logger } from "../logger.ts" +import { getRepoAccess } from "./git-credential.ts" + +export const download = new Command() + .name("download") + .description("Download a dataset from OpenNeuro") + .arguments(" ") + .option( + "-d, --draft", + "Download a draft instead of the latest version snapshot.", + ) + .option( + "-v, --version", + "Download a specific version.", + ) + .action(downloadAction) + +export async function downloadAction( + options: CommandOptions, + accession_number: string, + download_directory: string, +) { + const datasetId = accession_number + const clientConfig = readConfig() + const { token, endpoint } = await getRepoAccess(datasetId) + + // Create the git worker + const worker = new Worker(new URL("../worker/git.ts", import.meta.url).href, { + type: "module", + }) + + // Configure worker + worker.postMessage({ + "command": "setup", + "datasetId": datasetId, + "repoPath": download_directory, + "repoEndpoint": `${clientConfig.url}/git/${endpoint}/${datasetId}`, + "authorization": token, + "logLevel": logger.levelName, + }) + + console.log("Downloading...") + + worker.postMessage({ + "command": "clone", + }) + + // Close after all tasks are queued + worker.postMessage({ command: "done" }) + + console.log("Download complete. To download all data files, use `datalad get` or `git-annex get`.") +} diff --git a/cli/src/commands/git-credential.test.ts b/cli/src/commands/git-credential.test.ts new file mode 100644 index 000000000..5f2117b16 --- /dev/null +++ b/cli/src/commands/git-credential.test.ts @@ -0,0 +1,20 @@ +import { assertEquals } from "../deps.ts" +import { gitCredentialAction } from "./git-credential.ts" + +Deno.test("git-credential parses stdin correctly", async () => { + const stdin = new ReadableStream({ + start(controller) { + controller.enqueue( + new TextEncoder().encode( + "host=staging.openneuro.org\nprotocol=https\npath=/datasets/ds000001\n", + ), + ) + controller.close() + }, + }) + const output = await gitCredentialAction( + stdin, + async () => ({ token: "token", endpoint: 2 }), + ) + assertEquals(output, "username=@openneuro/cli\npassword=token\n") +}) diff --git a/cli/src/commands/git-credential.ts b/cli/src/commands/git-credential.ts new file mode 100644 index 000000000..b32d5822d --- /dev/null +++ b/cli/src/commands/git-credential.ts @@ -0,0 +1,93 @@ +import { Command, TextLineStream } from "../deps.ts" +import { getConfig } from "../config.ts" + +const prepareRepoAccess = ` + mutation prepareRepoAccess($datasetId: ID!) { + prepareRepoAccess(datasetId: $datasetId) { + token + endpoint + } + } +` + +interface GraphQLError { + message: string + locations: { line: number, column: number }[] + path: string[] + extensions: { + code: string, + stacktrace: string[] + } +} + +export async function getRepoAccess(datasetId?: string) { + const config = getConfig() + const req = await fetch(`${config.url}/crn/graphql`, { + method: "POST", + headers: { + "Content-Type": "application/json", + "Authorization": `Bearer ${config.token}`, // Long lived token + }, + body: JSON.stringify({ + query: prepareRepoAccess, + variables: { + datasetId, + }, + }), + }) + const response = await req.json() + if (response.errors) { + const errors: GraphQLError[] = response.errors + throw Error(errors.map(error => error.message).toString()) + } else { + return { + token: response.data.prepareRepoAccess.token, // Short lived repo access token + endpoint: response.data.prepareRepoAccess.endpoint, + } + } +} + +/** + * Provide a git-credential helper for OpenNeuro + */ +export async function gitCredentialAction( + stdinReadable: ReadableStream = Deno.stdin.readable, + tokenGetter = getRepoAccess, +) { + let pipeOutput = "" + const credential: Record = {} + // Create a stream of lines from stdin + const lineStream = stdinReadable + .pipeThrough(new TextDecoderStream()) + .pipeThrough(new TextLineStream()) + for await (const line of lineStream) { + const [key, value] = line.split("=", 2) + credential[key] = value + } + if ("path" in credential && credential.path) { + const datasetId = credential.path.split("/").pop() + const { token } = await tokenGetter(datasetId) + const output: Record = { + username: "@openneuro/cli", + password: token, + } + for (const key in output) { + pipeOutput += `${key}=${output[key]}\n` + } + } else { + throw new Error( + "Invalid input from git, check the credential helper is configured correctly", + ) + } + return pipeOutput +} + +export const gitCredential = new Command() + .name("git-credential") + .description( + "A git credentials helper for easier datalad or git-annex access to datasets.", + ) + .command("fill") + .action(async () => { + console.log(await gitCredentialAction()) + }) diff --git a/cli/src/commands/login.test.ts b/cli/src/commands/login.test.ts new file mode 100644 index 000000000..613ed2b3d --- /dev/null +++ b/cli/src/commands/login.test.ts @@ -0,0 +1,33 @@ +import { assertEquals, assertSpyCalls, Select, stub } from "../deps.ts" +import { loginAction } from "./login.ts" + +Deno.test("login action supports non-interactive mode if all options are provided", async () => { + const SelectStub = stub(Select, "prompt", () => { + return new Promise(() => {}) + }) + await loginAction({ + url: "https://example.com", + token: "1234", + errorReporting: false, + }) + // Test to make sure we get here before the timeout + assertSpyCalls(SelectStub, 0) + SelectStub.restore() + localStorage.clear() +}) + +Deno.test("login action sets values in localStorage", async () => { + const loginOptions = { + url: "https://example.com", + token: "1234", + errorReporting: true, + } + await loginAction(loginOptions) + assertEquals(localStorage.getItem("url"), loginOptions.url) + assertEquals(localStorage.getItem("token"), loginOptions.token) + assertEquals( + localStorage.getItem("errorReporting"), + loginOptions.errorReporting.toString(), + ) + localStorage.clear() +}) diff --git a/cli/src/commands/login.ts b/cli/src/commands/login.ts new file mode 100644 index 000000000..290c3f190 --- /dev/null +++ b/cli/src/commands/login.ts @@ -0,0 +1,54 @@ +/** + * Configure credentials and other persistent settings for OpenNeuro + */ +import { Command, Confirm, Secret, Select } from "../deps.ts" +import type { CommandOptions } from "../deps.ts" + +const messages = { + url: + "URL for OpenNeuro instance to upload to (e.g. `https://openneuro.org`).", + token: "API key for OpenNeuro. See https://openneuro.org/keygen", + errorReporting: + "Enable error reporting. Errors and performance metrics are sent to the configured OpenNeuro instance.", +} + +export async function loginAction(options: CommandOptions) { + const url = options.url ? options.url : await Select.prompt({ + message: "Choose an OpenNeuro instance to use.", + options: [ + "https://openneuro.org", + "https://staging.openneuro.org", + "http://localhost:9876", + ], + }) + localStorage.setItem("url", url) + let token + // Environment variable + if (options.openneuroApiKey) { + token = options.openneuroApiKey + } + // Command line + if (options.token) { + token = options.token + } + if (!token) { + token = await Secret.prompt( + `Enter your API key for OpenNeuro (get an API key from ${url}/keygen).`, + ) + } + localStorage.setItem("token", token) + const errorReporting = Object.hasOwn(options, "errorReporting") + ? options.errorReporting + : await Confirm.prompt(messages.errorReporting) + localStorage.setItem("errorReporting", errorReporting.toString()) +} + +export const login = new Command() + .name("login") + .description( + "Setup credentials for OpenNeuro. Set -u, -t, and -e flags to skip interactive prompts.", + ) + .option("-u, --url ", messages.url) + .option("-t, --token ", messages.token) + .option("-e, --error-reporting ", messages.errorReporting) + .action(loginAction) diff --git a/cli/src/commands/upload.ts b/cli/src/commands/upload.ts new file mode 100644 index 000000000..0f89307a9 --- /dev/null +++ b/cli/src/commands/upload.ts @@ -0,0 +1,202 @@ +import { + consoleFormat, + readFileTree, + validate, + validateCommand, +} from "../bids_validator.ts" +import { logger } from "../logger.ts" +import { + Confirm, + join, + prompt, + relative, + resolve, + walk, +} from "../deps.ts" +import type { CommandOptions } from "../deps.ts" +import { getRepoAccess } from "./git-credential.ts" +import { readConfig } from "../config.ts" +import { createDataset } from "../graphq.ts" + +async function getRepoDir(url: URL): Promise { + const LOCAL_STORAGE_KEY = `openneuro_cli_${url.hostname}_` + const repoDir = localStorage.getItem(LOCAL_STORAGE_KEY) + if (repoDir) { + return repoDir + } else { + const tmpDir = await Deno.makeTempDir({ prefix: LOCAL_STORAGE_KEY }) + localStorage.setItem(LOCAL_STORAGE_KEY, tmpDir) + return tmpDir + } +} + +/** + * Add all files to a setup git worker + * @param worker The worker to use for this + * @param dataset_directory_abs An absolute path on the local system to upload files from (dataset root) + */ +export async function addGitFiles( + worker: Worker, + dataset_directory_abs: string, +) { + // Upload all files + for await ( + const walkEntry of walk(dataset_directory_abs, { + includeDirs: false, + includeSymlinks: false, + }) + ) { + const relativePath = relative(dataset_directory_abs, walkEntry.path) + if (relativePath === ".bidsignore" || relativePath === ".gitattributes" || !relativePath.startsWith(".")) { + worker.postMessage({ + "command": "add", + "path": walkEntry.path, + "relativePath": relativePath, + }) + } else { + logger.warn(`Skipped file "${relativePath}"`) + } + } +} + +export async function uploadAction( + options: CommandOptions, + dataset_directory: string, +) { + const clientConfig = readConfig() + const serverUrl = new URL(clientConfig.url) + const repoDir = await getRepoDir(serverUrl) + const dataset_directory_abs = resolve(dataset_directory) + logger.info( + `upload ${dataset_directory} resolved to ${dataset_directory_abs}`, + ) + + const schemaResult = await validate( + await readFileTree(dataset_directory_abs), + options, + ) + console.log(consoleFormat(schemaResult)) + + for (const issue of schemaResult.issues.values()) { + if (issue.severity === "error") { + console.log("Please correct validation errors before uploading.") + return + } + } + console.log("Validation complete, preparing upload.") + + let datasetId + if (options.dataset) { + datasetId = options.dataset + } else { + if (!options.create) { + const confirmation = await prompt([ + { + name: "create", + message: "Create a new dataset?", + type: Confirm, + }, + ]) + if (!confirmation.create) { + console.log("Specify --dataset to upload to an existing dataset.") + return + } + } + let affirmedDefaced = options.affirmDefaced + let affirmedConsent = options.affirmConsent + if (affirmedDefaced || affirmedConsent) { + datasetId = await createDataset(affirmedDefaced, affirmedConsent) + } else { + console.log("confirm") + const affirmed = await prompt([ + { + name: "affirmedDefaced", + message: + "All structural scans have been defaced, obscuring any tissue on or near the face that could potentially be used to reconstruct the facial structure.", + type: Confirm, + }, + { + name: "affirmedConsent", + message: + "I have explicit participant consent and ethical authorization to publish structural scans without defacing.", + type: Confirm, + }, + ]) + affirmedDefaced = affirmed.affirmedDefaced + affirmedConsent = affirmed.affirmedConsent + if (affirmedDefaced || affirmedConsent) { + datasetId = await createDataset(affirmedDefaced, affirmedConsent) + } else { + console.log( + "You must affirm defacing or consent to upload without defacing to continue.", + ) + return + } + } + } + + // Create the git worker + const worker = new Worker(new URL("../worker/git.ts", import.meta.url).href, { + type: "module", + }) + + const repoPath = join(repoDir, datasetId) + const { token, endpoint } = await getRepoAccess(datasetId) + await Deno.mkdir(repoPath, { recursive: true }) + // Configure worker + worker.postMessage({ + "command": "setup", + "datasetId": datasetId, + "sourcePath": dataset_directory_abs, + "repoPath": repoPath, + "repoEndpoint": `${clientConfig.url}/git/${endpoint}/${datasetId}`, + "authorization": token, + "logLevel": logger.levelName, + }) + + logger.info(`Repo path: ${join(repoDir, datasetId)}`) + worker.postMessage({ + "command": "clone", + }) + + // Upload all files + await addGitFiles(worker, dataset_directory_abs) + + // Generate a commit + worker.postMessage({ command: "commit" }) + + // Push git/annexed data + worker.postMessage({ command: "push" }) + + // Close after all tasks are queued + worker.postMessage({ command: "done" }) +} + +/** + * Upload is validate extended with upload features + */ +export const upload = validateCommand + .name("upload") + .description("Upload a dataset to OpenNeuro") + .option("--json", "Hidden for upload usage", { hidden: true, override: true }) + .option("--filenameMode", "Hidden for upload usage", { + hidden: true, + override: true, + }) + .option("-d, --dataset", "Specify an existing dataset to update.", { + conflicts: ["create"], + }) + .option("-c, --create", "Skip confirmation to create a new dataset.", { + conflicts: ["dataset"], + }) + .option( + "--affirmDefaced", + "All structural scans have been defaced, obscuring any tissue on or near the face that could potentially be used to reconstruct the facial structure.", + { default: false }, + ) + .option( + "--affirmConsent", + "I have explicit participant consent and ethical authorization to publish structural scans without defacing.", + { default: false }, + ) + .action(uploadAction) diff --git a/cli/src/config.ts b/cli/src/config.ts new file mode 100644 index 000000000..c3486dcbc --- /dev/null +++ b/cli/src/config.ts @@ -0,0 +1,40 @@ +import { logger } from "./logger.ts" +import { LoginError } from "./error.ts" + +export interface ClientConfig { + url: string + token: string + errorReporting: boolean +} + +/** + * Get credentials from local storage + */ +export function getConfig(): ClientConfig { + const url = localStorage.getItem("url") + const token = localStorage.getItem("token") + const errorReporting = localStorage.getItem("errorReporting") === "true" + if (url && token && errorReporting) { + const config: ClientConfig = { + url, + token, + errorReporting, + } + return config + } else { + throw new LoginError("Run `openneuro login` before upload.") + } +} + +export function readConfig(): ClientConfig { + const config = getConfig() + logger.info( + `configured with URL "${config.url}" and token "${ + config.token.slice( + 0, + 4, + ) + }...${config.token.slice(-4)}"`, + ) + return config +} diff --git a/cli/src/deps.ts b/cli/src/deps.ts new file mode 100644 index 000000000..2e5f34b5f --- /dev/null +++ b/cli/src/deps.ts @@ -0,0 +1,67 @@ +// Cliffy +export { + Command, + EnumType, +} from "https://deno.land/x/cliffy@v1.0.0-rc.3/command/mod.ts" +export { + Confirm, + prompt, + Secret, + Select, +} from "https://deno.land/x/cliffy@v1.0.0-rc.3/prompt/mod.ts" +export type { + ActionHandler, + CommandOptions, +} from "https://deno.land/x/cliffy@v1.0.0-rc.3/command/mod.ts" +// bids-validator +export * as bidsValidator from "https://deno.land/x/bids_validator@v1.14.0/main.ts" +export { validateCommand } from "https://deno.land/x/bids_validator@v1.14.0/setup/options.ts" +export type { ValidatorOptions } from "https://deno.land/x/bids_validator@v1.14.0/setup/options.ts" +// Logging +export { + critical, + debug, + error, + getLogger, + handlers, + info, + Logger, + LogLevels, + setup, + warning, +} from "https://deno.land/std@0.212.0/log/mod.ts" +export * as log from "https://deno.land/std@0.212.0/log/mod.ts" +export { LogLevelNames } from "https://deno.land/std@0.212.0/log/levels.ts" +export type { LevelName } from "https://deno.land/std@0.212.0/log/mod.ts" +export { TextLineStream } from "https://deno.land/std@0.212.0/streams/mod.ts" +// File handling +export { walk } from "https://deno.land/std@0.212.0/fs/walk.ts" +export { resolve } from "https://deno.land/std@0.212.0/path/resolve.ts" +export { relative } from "https://deno.land/std@0.212.0/path/relative.ts" +export { join } from "https://deno.land/std@0.212.0/path/join.ts" +export { extname } from "https://deno.land/std@0.212.0/path/extname.ts" +export { basename } from "https://deno.land/std@0.212.0/path/basename.ts" +export { dirname } from "https://deno.land/std@0.212.0/path/dirname.ts" +export { SEPARATOR } from "https://deno.land/std@0.221.0/path/mod.ts" +// Test suites +export { + assert, + assertArrayIncludes, + assertEquals, + assertObjectMatch, + assertStrictEquals, +} from "https://deno.land/std@0.212.0/assert/mod.ts" +export { + assertSpyCallArgs, + assertSpyCalls, + restore, + returnsNext, + stub, +} from "https://deno.land/std@0.212.0/testing/mock.ts" +// Progress bars +export { default as ProgressBar } from "https://deno.land/x/progress@v1.3.9/mod.ts" +// Ignore library +export { default as ignore } from "npm:ignore@5.3.0" +export { encodeBase64 } from "https://deno.land/std@0.220.1/encoding/base64.ts" +// Isomorphic git +export { default as git, STAGE, TREE } from "npm:isomorphic-git@1.25.6" diff --git a/cli/src/error.ts b/cli/src/error.ts new file mode 100644 index 000000000..91fc673da --- /dev/null +++ b/cli/src/error.ts @@ -0,0 +1,4 @@ +export class OpenNeuroCLIError extends Error {} +export class LoginError extends OpenNeuroCLIError {} +export class QueryError extends OpenNeuroCLIError {} +export class OptionError extends OpenNeuroCLIError {} diff --git a/cli/src/gitattributes.test.ts b/cli/src/gitattributes.test.ts new file mode 100644 index 000000000..087a44159 --- /dev/null +++ b/cli/src/gitattributes.test.ts @@ -0,0 +1,54 @@ +import { assertEquals, assertObjectMatch } from "./deps.ts" +import { matchGitAttributes, parseGitAttributes } from "./gitattributes.ts" + +const testAttributes = `* annex.backend=SHA256E +**/.git* annex.largefiles=nothing +*.bval annex.largefiles=nothing +*.bvec annex.largefiles=nothing +*.json annex.largefiles=largerthan=1mb +phenotype/*.tsv annex.largefiles=anything +*.tsv annex.largefiles=largerthan=1mb +dataset_description.json annex.largefiles=nothing +.bidsignore annex.largefiles=nothing +CHANGES annex.largefiles=nothing +README* annex.largefiles=nothing +LICENSE annex.largefiles=nothing annex.backend=MD5E +` + +Deno.test("parseGitAttributes() parses a git-annex .gitattributes file", async () => { + const parsed = parseGitAttributes(testAttributes) + assertObjectMatch(parsed, { + "*": { backend: "SHA256E" }, + "**/.git*": { + largefiles: Infinity, + }, + "*.bval": { largefiles: Infinity }, + "*.bvec": { largefiles: Infinity }, + "*.json": { largefiles: 1024 * 1024 }, + "phenotype/*.tsv": { largefiles: 0 }, + "*.tsv": { largefiles: 1024 * 1024 }, + "dataset_description.json": { + largefiles: Infinity, + }, + ".bidsignore": { largefiles: Infinity }, + "CHANGES": { largefiles: Infinity }, + "README*": { largefiles: Infinity }, + "LICENSE": { largefiles: Infinity, backend: "MD5E" }, + }) +}) + +Deno.test("matchGitAttributes() matches any relevant rules for a path", async () => { + const attr = parseGitAttributes(testAttributes) + assertEquals(matchGitAttributes(attr, "derivatives/test_file.json"), { + backend: "SHA256E", + largefiles: 1024 * 1024, + }) + assertEquals(matchGitAttributes(attr, "dataset_description.json"), { + backend: "SHA256E", + largefiles: Infinity, + }) + assertEquals(matchGitAttributes(attr, "LICENSE"), { + backend: "MD5E", + largefiles: Infinity, + }) +}) diff --git a/cli/src/gitattributes.ts b/cli/src/gitattributes.ts new file mode 100644 index 000000000..f67ee02d0 --- /dev/null +++ b/cli/src/gitattributes.ts @@ -0,0 +1,96 @@ +import { ignore } from "./deps.ts" + +/** + * Git annex supports many backends, we support a limited subset used by OpenNeuro (for now) + * https://git-annex.branchable.com/backends/ + */ +export type GitAnnexBackend = "GIT" | "SHA256" | "SHA256E" | "MD5" | "MD5E" + +/** + * Annex attributes for one path + */ +export interface GitAnnexAttributeOptions { + largefiles?: number + backend?: GitAnnexBackend + match: ignore.Ignore +} + +/** + * Minimal parsing of .gitattributes for uploader usage + */ +export type GitAnnexAttributes = Record + +/** + * Parse any relevant annex options from .gitattributes + * @param gitattributes A .gitattributes file in string format + */ +export function parseGitAttributes(gitattributes: string): GitAnnexAttributes { + const attributesObject: GitAnnexAttributes = {} + for (const line of gitattributes.split("\n")) { + if (line.length < 3) { + continue + } + const [prefix, ...rest] = line.split(" ") + attributesObject[prefix] = { + match: ignore.default().add(prefix), + } + for (const attr of rest) { + const eqIndex = attr.indexOf("=") + const key = attr.substring(0, eqIndex) + const value = attr.substring(eqIndex + 1) + if (key === "annex.largefiles") { + if (value === "nothing") { + attributesObject[prefix].largefiles = Infinity + } else if (value === "anything") { + attributesObject[prefix].largefiles = 0 + } else if (value.startsWith("largerthan=")) { + const size = value.split("largerthan=")[1].toLowerCase() + if (size.endsWith("kb")) { + attributesObject[prefix].largefiles = Number(size.slice(0, -2)) * + 1024 + } else if (size.endsWith("mb")) { + attributesObject[prefix].largefiles = Number(size.slice(0, -2)) * + 1024 * 1024 + } else if (size.endsWith("gb")) { + attributesObject[prefix].largefiles = Number(size.slice(0, -2)) * + 1024 * 1024 * 1024 + } else if (size.endsWith("tb")) { + attributesObject[prefix].largefiles = Number(size.slice(0, -2)) * + 1024 * 1024 * 1024 * 1024 + } else { + attributesObject[prefix].largefiles = Number(size) + } + } + } else if (key === "annex.backend") { + attributesObject[prefix].backend = value as GitAnnexBackend + } + } + } + return attributesObject +} + +interface MatchingAnnexAttributes { + backend?: GitAnnexBackend + largefiles?: number +} + +/** + * Return any matching values merged for a given path + */ +export function matchGitAttributes( + attributes: GitAnnexAttributes, + path: string, +) { + const matching: MatchingAnnexAttributes = {} + for (const [prefix, attr] of Object.entries(attributes)) { + if (attr.match.test(path).ignored == true) { + if ("backend" in attr) { + matching.backend = attr.backend + } + if ("largefiles" in attr) { + matching.largefiles = attr.largefiles + } + } + } + return matching +} diff --git a/cli/src/graphq.ts b/cli/src/graphq.ts new file mode 100644 index 000000000..a165d32ce --- /dev/null +++ b/cli/src/graphq.ts @@ -0,0 +1,91 @@ +/** + * Minimalist OpenNeuro client with no dependencies + */ + +import { getConfig } from "./config.ts" +import { QueryError } from "./error.ts" + +function request(query: string, variables = {}): Promise { + const config = getConfig() + return fetch(`${config.url}/crn/graphql`, { + method: "POST", + headers: { + Authorization: `Bearer ${config.token}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ query, variables }), + }) +} + +const createDatasetMutation = ` +mutation($affirmedConsent: Boolean, $affirmedDefaced: Boolean) { + createDataset(affirmedConsent: $affirmedConsent, affirmedDefaced: $affirmedDefaced) { + id + } +} +` + +interface CreateDatasetMutationResponse { + data?: { + createDataset: { + id: string + } + } + error?: { + message: string + } +} + +/** + * Create a new dataset + * @param affirmedDefaced Has the upload affirmed this dataset is defaced? + * @param affirmedConsent Has the uploader affirmed they have obtained participant conset to share non-defaced images? + * @returns Dataset ID + */ +export async function createDataset( + affirmedDefaced: boolean, + affirmedConsent: boolean, +): Promise { + const res = await request(createDatasetMutation, { + affirmedDefaced, + affirmedConsent, + }) + const body: CreateDatasetMutationResponse = await res.json() + if (body.error) { + throw new QueryError(JSON.stringify(body.error)) + } + if (body.data) { + return body?.data?.createDataset?.id + } else { + throw new QueryError("Invalid response") + } +} + +const prepareUploadMutation = ` +mutation($datasetId: ID!, $uploadId: ID!) { + prepareUpload(datasetId: $datasetId, uploadId: $uploadId) { + id + } +} +` + +/** + * Setup an upload on the server side + * @param datasetId Accession number `e.g. ds000001` + * @param uploadId UUID for the upload if this is a resume operation + * @returns The UUID for this upload + */ +export async function prepareUpload( + datasetId: string, + uploadId: string | undefined, +) { + const uuid = uploadId ? uploadId : crypto.randomUUID() + await request(prepareUploadMutation, { + datasetId, + uploadId: uuid, + }) + return uuid +} + +export async function finishUpload() { +} diff --git a/cli/src/logger.ts b/cli/src/logger.ts new file mode 100644 index 000000000..b81eb76a4 --- /dev/null +++ b/cli/src/logger.ts @@ -0,0 +1,49 @@ +import { getLogger, handlers, LevelName, Logger, setup } from "./deps.ts" + +const loggerName = "@openneuro/cli" + +/** + * Setup a console logger used with the --debug flag + */ +export function setupLogging(level: LevelName) { + setup({ + handlers: { + console: new handlers.ConsoleHandler(level), + }, + + loggers: { + "@openneuro/cli": { + level, + handlers: ["console"], + }, + }, + }) +} + +export function parseStack(stack: string) { + const lines = stack.split("\n") + const caller = lines[2].trim() + const token = caller.split("at ") + return token[1] +} + +const loggerProxyHandler = { + // deno-lint-ignore no-explicit-any + get: function (_: any, prop: keyof Logger) { + const logger = getLogger(loggerName) + if (prop === "levelName" || prop === "level") { + return logger[prop] + } + const stack = new Error().stack + if (stack) { + const callerLocation = parseStack(stack) + logger.debug(`Logger invoked at "${callerLocation}"`) + } + const logFunc = logger[prop] as typeof logger.warning + return logFunc.bind(logger) + }, +} + +const logger = new Proxy(getLogger(loggerName), loggerProxyHandler) + +export { logger } diff --git a/cli/src/options.ts b/cli/src/options.ts new file mode 100644 index 000000000..7e6a42372 --- /dev/null +++ b/cli/src/options.ts @@ -0,0 +1,52 @@ +import { + Command, + EnumType, + LevelName, + LogLevelNames, + ValidatorOptions, +} from "./deps.ts" + +import { setupLogging } from "./logger.ts" +import { login } from "./commands/login.ts" +import { upload } from "./commands/upload.ts" +import { download } from "./commands/download.ts" +import { gitCredential } from "./commands/git-credential.ts" + +export type OpenNeuroOptions = { + localPath?: string + validatorOptions?: ValidatorOptions + debug: LevelName +} + +const openneuroCommand = new Command() + .name("openneuro") + .description( + "OpenNeuro command line tools for uploading, downloading, or syncing datasets. See https://docs.openneuro.org for detailed guides.", + ) + // TODO - Sync this with the node packages + .version("4.20.4") + .globalType("debugLevel", new EnumType(LogLevelNames)) + .globalEnv("OPENNEURO_LOG=", "Enable debug output.") + .globalAction(({ openneuroLog }) => { + setupLogging(openneuroLog ? openneuroLog : "ERROR") + }) + .globalEnv("OPENNEURO_API_KEY=", "Specify an OpenNeuro API key.") + .command("login", login) + .command("download", download) + .command("upload", upload) + .command("git-credential", gitCredential) + +/** + * Parse command line options and return a OpenNeuroOptions config + * @param argumentOverride Override the arguments instead of using Deno.args + */ +export async function commandLine( + argumentOverride: string[], +): Promise { + const { args, options } = await openneuroCommand.parse(argumentOverride) + + return { + datasetPath: args[0], + ...options, + } +} diff --git a/cli/src/tests/fetch-stub.test.ts b/cli/src/tests/fetch-stub.test.ts new file mode 100644 index 000000000..828b05e9d --- /dev/null +++ b/cli/src/tests/fetch-stub.test.ts @@ -0,0 +1,19 @@ +import { assertEquals } from "../deps.ts" +import { assertSpyCallArgs, assertSpyCalls } from "../deps.ts" +import { mockFetch } from "./fetch-stub.ts" + +Deno.test({ + name: "fetch test", + async fn() { + const mockFetchText = "__MOCK_FETCH__" + const fetchStub = mockFetch(new Response(mockFetchText)) + try { + const res = await fetch("foo.com") + assertEquals(await res.text(), mockFetchText) + assertSpyCallArgs(fetchStub, 0, ["foo.com"]) + assertSpyCalls(fetchStub, 1) + } finally { + fetchStub.restore() + } + }, +}) diff --git a/cli/src/tests/fetch-stub.ts b/cli/src/tests/fetch-stub.ts new file mode 100644 index 000000000..61dc8f7e1 --- /dev/null +++ b/cli/src/tests/fetch-stub.ts @@ -0,0 +1,9 @@ +import { stub } from "../deps.ts" + +export function mockFetch(response: Response) { + return stub( + globalThis, + "fetch", + () => Promise.resolve(response), + ) +} diff --git a/cli/src/worker/git.test.ts b/cli/src/worker/git.test.ts new file mode 100644 index 000000000..9182d1994 --- /dev/null +++ b/cli/src/worker/git.test.ts @@ -0,0 +1,150 @@ +import { annexRelativePath, hashDirLower, hashDirMixed } from "./git.ts" +import { assertArrayIncludes, assertEquals, git, join, walk, SEPARATOR } from "../deps.ts" +import { addGitFiles } from "../commands/upload.ts" +import fs from "node:fs" + +Deno.test("annexRelativePath() returns appropriate paths", () => { + assertEquals(annexRelativePath("sub-01/anat/sub-01_T1w.nii.gz"), join('..', '..')) +}) + +Deno.test("hashDirLower() returns the correct key prefix", async () => { + assertEquals( + await hashDirLower( + "SHA256E-s311112--c3527d7944a9619afb57863a34e6af7ec3fe4f108e56c860d9e700699ff806fb.nii.gz", + ), + ["2ed", "6ea"], + ) +}) + +Deno.test("hashDirMixed() returns the correct key prefix", async () => { + assertEquals( + await hashDirMixed( + "SHA256E-s311112--c3527d7944a9619afb57863a34e6af7ec3fe4f108e56c860d9e700699ff806fb.nii.gz", + ), + ["Xk", "Mx"], + ) +}) + +Deno.test("adds git and annexed content given a directory of files", async () => { + const testUpload = await Deno.makeTempDir() + const testRepo = await Deno.makeTempDir() + const testUrl = "https://example.com/repo.git" + + await git.init({ + fs, + dir: testRepo, + defaultBranch: "main", + }) + + const textEncoder = new TextEncoder() + + // Add .gitattributes directly here (add requires it) + await Deno.writeFile( + join(testRepo, ".gitattributes"), + textEncoder.encode(`* annex.backend=SHA256E +**/.git* annex.largefiles=nothing +*.bval annex.largefiles=nothing +*.bvec annex.largefiles=nothing +*.json annex.largefiles=largerthan=1mb +*.tsv annex.largefiles=largerthan=1mb +dataset_description.json annex.largefiles=nothing +.bidsignore annex.largefiles=nothing +CHANGES annex.largefiles=nothing +README* annex.largefiles=nothing +LICENSE annex.largefiles=nothing`), + ) + await git.add({ fs, dir: testRepo, filepath: ".gitattributes" }) + await git.commit({ + fs, + dir: testRepo, + author: { + name: "OpenNeuro", + email: "git@openneuro.org", + }, + message: "Test suite repo, please ignore", + }) + + // dataset_description.json + await Deno.writeFile( + join(testUpload, "dataset_description.json"), + textEncoder.encode(JSON.stringify({ + "Name": "Test Experiment", + "BIDSVersion": "1.8.0", + "DatasetType": "raw", + "License": "CC0", + "Authors": [ + "J. Doe", + "J. Doe", + ], + })), + ) + + // An annexed nifti file + const fakeNifti = new Uint8Array(65536) + crypto.getRandomValues(fakeNifti) + await Deno.mkdir(join(testUpload, "sub-01", "anat"), { recursive: true }) + await Deno.writeFile( + join(testUpload, "sub-01", "anat", "sub-01_T1w.nii.gz"), + fakeNifti, + ) + + // Create the git worker + const worker = new Worker(new URL("../worker/git.ts", import.meta.url).href, { + type: "module", + }) + + // Configure worker + worker.postMessage({ + "command": "setup", + "datasetId": "test_dataset", + "repoPath": testRepo, + "repoEndpoint": testUrl, + "authorization": + "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJmNjZhNzRjNS05ZDhmLTQ2M2MtOGE2ZS1lYTE3ODljYTNiOTIiLCJlbWFpbCI6Im5lbGxAZGV2LW5lbGwuY29tIiwicHJvdmlkZXIiOiJnb29nbGUiLCJuYW1lIjoiTmVsbCBIYXJkY2FzdGxlIiwiYWRtaW4iOnRydWUsImlhdCI6MTcwMDUyNDIzNCwiZXhwIjoxNzMyMDYwMjM0fQ.5glc_uoxqcRJ4KWn2EvRR0hH-ono2MPJH0wqvcXBIOg", + "logLevel": "INFO", + }) + + await addGitFiles(worker, testUpload) + + // Setup a way to make sure the worker is finished + const closedPromise = new Promise((resolve) => { + worker.onmessage = (event) => { + if (event.data.command === "closed") { + resolve(true) + } + } + }) + + worker.postMessage({ command: "commit" }) + + // Close after all tasks are queued + worker.postMessage({ command: "done" }) + + // Wait until the worker says it's closed + await closedPromise + + const expectedFiles = [ + join(".git", "refs", "heads", "main"), + join(".git", "config"), + join(".git", "HEAD"), + join(".git", "index"), + ".gitattributes", + "dataset_description.json", + join("sub-01", "anat", "sub-01_T1w.nii.gz") + ] + let gitObjects = 0 + for await ( + const walkEntry of walk(testRepo, { + includeDirs: false, + includeSymlinks: true, + }) + ) { + const relativePath = walkEntry.path.split(testRepo + SEPARATOR)[1] + if (relativePath.startsWith(`.git${SEPARATOR}objects${SEPARATOR}`)) { + gitObjects += 1 + } else { + assertArrayIncludes(expectedFiles, [relativePath]) + } + } + assertEquals(gitObjects, 9) +}) diff --git a/cli/src/worker/git.ts b/cli/src/worker/git.ts new file mode 100644 index 000000000..0fd8f7e89 --- /dev/null +++ b/cli/src/worker/git.ts @@ -0,0 +1,470 @@ +import { git, STAGE, TREE } from "../deps.ts" +import http from "npm:isomorphic-git@1.25.3/http/node/index.js" +import fs from "node:fs" +import { decode } from "https://deno.land/x/djwt@v3.0.1/mod.ts" +import { + GitAnnexAttributes, + GitAnnexBackend, + matchGitAttributes, + parseGitAttributes, +} from "../gitattributes.ts" +import { basename, dirname, join, LevelName, relative } from "../deps.ts" +import { logger, setupLogging } from "../logger.ts" +import { PromiseQueue } from "./queue.ts" +import { checkKey, storeKey } from "./transferKey.ts" +import { ProgressBar } from "../deps.ts" + +/** + * Why are we using hash wasm over web crypto? + * Web crypto cannot do streaming hashes of the common git-annex functions yet. + */ +import { createMD5, createSHA256 } from "npm:hash-wasm" + +interface GitContext { + // Current working dataset + datasetId: string + // The path being uploaded from to OpenNeuro + sourcePath: string + // The path of our local clone (possibly in virtual fs) + repoPath: string + // URL for the remote git repo + repoEndpoint: string + // OpenNeuro git access short lived API key + authorization: string + // Author name + name: string + // Author email + email: string +} + +/** + * Events with no arguments + */ +interface GitWorkerEventGeneric { + data: { + command: "clone" | "commit" | "done" | "push" + } +} + +interface GitWorkerEventSetupData extends GitContext { + command: "setup" + logLevel: LevelName +} + +/** Setup event to set dataset and repo state for commands until next call */ +interface GitWorkerEventSetup { + data: GitWorkerEventSetupData +} + +/** Add event to add one file */ +interface GitWorkerEventAdd { + data: { + command: "add" + // Absolute path on the local system + path: string + // Dataset relative path + relativePath: string + } +} + +type GitWorkerEvent = + | GitWorkerEventSetup + | GitWorkerEventGeneric + | GitWorkerEventAdd + +let context: GitContext +let attributesCache: GitAnnexAttributes + +/** + * Paths to upload to the remote annex + */ +const annexKeys: Record = {} + +async function done() { + logger.info("Git worker shutdown.") + // @ts-ignore + await globalThis.postMessage({ + command: "closed", + }) + await globalThis.close() +} + +function gitOptions(dir: string) { + return { + fs, + http, + dir, + url: context.repoEndpoint, + headers: { + Authorization: `Bearer ${context.authorization}`, + }, + } +} + +/** + * Clone or fetch the draft + */ +async function update() { + const options = gitOptions(context.repoPath) + try { + await fs.promises.access(join(context.repoPath, ".git")) + logger.info( + `Fetching ${context.datasetId} draft from "${context.repoEndpoint}"`, + ) + await git.fetch(options) + } catch (_err) { + logger.info( + `Cloning ${context.datasetId} draft from "${context.repoEndpoint}"`, + ) + await git.clone({ + ...options, + }) + } + logger.info(`${context.datasetId} draft fetched!`) +} + +/** + * Load or return a cache copy of .gitattributes + */ +async function getGitAttributes(): Promise { + if (!attributesCache) { + const options = gitOptions(context.repoPath) + try { + const oid = await git.resolveRef({ ...options, ref: "main" }) || + await git.resolveRef({ ...options, ref: "master" }) + const rawAttributes = await git.readBlob({ + ...options, + oid, + filepath: ".gitattributes", + }) + const stringAttributes = new TextDecoder().decode(rawAttributes.blob) + attributesCache = parseGitAttributes(stringAttributes) + } catch (_err) { + logger.error( + "Dataset repository is missing .gitattributes and may be improperly initialized.", + ) + await done() + } + } + return attributesCache +} + +/** + * Decide if this incoming file is annexed or not + */ +async function shouldBeAnnexed( + relativePath: string, + size: number, +): Promise { + const gitAttributes = await getGitAttributes() + const attributes = matchGitAttributes(gitAttributes, relativePath) + if (attributes.largefiles) { + if (size > attributes.largefiles && attributes.backend) { + return attributes.backend + } else { + return "GIT" + } + } + // No rules matched, default to annex + return "SHA256E" +} + +/** + * git-annex hashDirLower implementation based on https://git-annex.branchable.com/internals/hashing/ + * Compute the directory path from a git-annex filename + */ +export async function hashDirLower( + annexKey: string, +): Promise<[string, string]> { + const computeMD5 = await createMD5() + computeMD5.init() + computeMD5.update(annexKey) + const digest = computeMD5.digest("hex") + return [digest.slice(0, 3), digest.slice(3, 6)] +} + +/** + * Return the relative path to the .git/annex directory from a repo relative path + * + * Used for symlink path creation + */ +export function annexRelativePath(path: string) { + return relative(dirname(join("/", path)), "/") +} + +/** + * git-annex hashDirMixed implementation based on https://git-annex.branchable.com/internals/hashing/ + */ +export async function hashDirMixed( + annexKey: string, +): Promise<[string, string]> { + const computeMD5 = await createMD5() + computeMD5.init() + computeMD5.update(annexKey) + const digest = computeMD5.digest("binary") + const firstWord = new DataView(digest.buffer).getUint32(0, true) + const nums = Array.from({ length: 4 }, (_, i) => (firstWord >> (6 * i)) & 31) + const letters = nums.map( + (num) => "0123456789zqjxkmvwgpfZQJXKMVWGPF".charAt(num), + ) + return [`${letters[1]}${letters[0]}`, `${letters[3]}${letters[2]}`] +} + +const computeHashMD5 = await createMD5() +const computeHashSHA256 = await createSHA256() + +/** + * git-annex add equivalent + */ +async function add(event: GitWorkerEventAdd) { + const { size } = await fs.promises.stat(event.data.path) + const annexed = await shouldBeAnnexed( + event.data.relativePath, + size, + ) + if (annexed === "GIT") { + // Simple add case + const options = { + ...gitOptions(context.repoPath), + filepath: event.data.relativePath, + } + const targetPath = join(context.repoPath, event.data.relativePath) + // Verify parent directories exist + await fs.promises.mkdir(dirname(targetPath), { recursive: true }) + // Copy non-annexed files for git index creation + await fs.promises.copyFile(event.data.path, targetPath) + await git.add(options) + logger.info(`Add\t${event.data.relativePath}`) + } else { + // E in the backend means include the file extension + let extension = "" + if (annexed.endsWith("E")) { + const filename = basename(event.data.relativePath) + extension = filename.substring(filename.indexOf(".")) + } + // Compute hash + const computeHash = annexed.startsWith("MD5") + ? computeHashMD5 + : computeHashSHA256 + computeHash.init() + const stream = fs.createReadStream(event.data.path, { + highWaterMark: 1024 * 1024 * 10, + }) + for await (const data of stream) { + computeHash.update(data) + } + const digest = computeHash.digest("hex") + const annexKey = `${annexed}-s${size}--${digest}${extension}` + const annexPath = join( + ".git", + "annex", + "objects", + ...(await hashDirMixed(annexKey)), + annexKey, + annexKey, + ) + // Path to this file in our repo + const fileRepoPath = join(context.repoPath, event.data.relativePath) + + let link + let forceAdd = false + try { + // Test if the repo already has this object + link = await fs.promises.readlink(fileRepoPath) + } catch (_err) { + forceAdd = true + } + + // Calculate the relative symlinks for our file + const symlinkTarget = join( + annexRelativePath(event.data.relativePath), + annexPath, + ) + + // Key has changed if the existing link points to another object + if (forceAdd || link !== symlinkTarget) { + // Upload this key after the git commit + annexKeys[annexKey] = event.data.path + // This object has a new annex hash, update the symlink and add it + const symlinkTarget = join( + annexRelativePath(event.data.relativePath), + annexPath, + ) + // Verify parent directories exist + await fs.promises.mkdir(dirname(fileRepoPath), { recursive: true }) + // Remove the existing symlink or git file + await fs.promises.rm(fileRepoPath, { force: true }) + // Create our new symlink pointing at the right annex object + await fs.promises.symlink(symlinkTarget, fileRepoPath) + const options = { + ...gitOptions(context.repoPath), + filepath: event.data.relativePath, + } + await git.add(options) + logger.info(`Annexed\t${event.data.relativePath}`) + } else { + logger.info(`Unchanged\t${event.data.relativePath}`) + } + } +} + +/** + * Git repo specific token + */ +interface OpenNeuroGitToken { + sub: string + email: string + provider: string + name: string + admin: boolean + scopes: [string] + dataset: string + iat: number + exp: number +} + +/** + * `git commit` equivalent + */ +async function commit() { + const options = gitOptions(context.repoPath) + const decodedToken = decode(context.authorization) + const { email, name } = decodedToken[1] as OpenNeuroGitToken + let generateCommit = false + let changes = 0 + const tree = await git.walk({ + ...options, + trees: [TREE({ ref: "HEAD" }), STAGE()], + map: async function (filepath, [A, B]) { + if (await A?.type() === "blob" || await B?.type() === "blob") { + const Aoid = await A?.oid() + const Boid = await B?.oid() + let type = "equal" + if (Aoid !== Boid && Aoid !== undefined && Boid !== undefined) { + logger.info(`modified:\t${filepath}`) + type = "modify" + } + if (Aoid === undefined) { + logger.info(`new file:\t${filepath}`) + type = "add" + } + if (Boid === undefined) { + logger.info(`deleted:\t${filepath}`) + type = "remove" + } + if (type !== "equal") { + generateCommit = true + changes += 1 + } + } + }, + }) + if (generateCommit) { + console.log( + `Detected ${changes} change${changes === 1 ? "" : "s"}.`, + ) + const commitHash = await git.commit({ + ...options, + author: { + name, + email, + }, + message: "[OpenNeuro] Added local files", + }) + logger.info(`Committed as "${commitHash}"`) + } else { + console.log("No changes found, not uploading.") + workQueue.enqueue(done) + } +} + +/** + * `git push` and `git-annex copy --to=openneuro` + */ +async function push() { + let completed = 0 + const annexedObjects = Object.keys(annexKeys).length + const progress = new ProgressBar({ + title: `Transferring annexed files`, + total: annexedObjects, + }) + if (annexedObjects > 0) { + await progress.render(completed) + } + // Git-annex copy --to=openneuro + for (const [key, path] of Object.entries(annexKeys)) { + const checkKeyResult = await checkKey({ + url: context.repoEndpoint, + token: context.authorization, + }, key) + if (checkKeyResult) { + logger.info(`Skipping key "${key}" present on remote`) + } else { + let storeKeyResult = -1 + let retries = 3 + while (storeKeyResult === -1 && retries > 0) { + retries -= 1 + storeKeyResult = await storeKey( + { + url: context.repoEndpoint, + token: context.authorization, + }, + key, + path, + ) + if (storeKeyResult === -1 && retries > 0) { + logger.warn(`Failed to transfer annex object "${key}" - retrying`) + } + } + if (storeKeyResult === -1) { + logger.error( + `Failed to transfer annex object "${key}" after ${retries} attempts`, + ) + } else { + completed += 1 + await progress.render(completed) + logger.info( + `Stored ${storeKeyResult} bytes for key "${key}" from path "${path}"`, + ) + } + } + } + console.log("Pushing changes...") + // Git push + await git.push( + gitOptions(context.repoPath), + ) + const url = new URL(context.repoEndpoint) + console.log( + `Upload complete, visit your dataset at ${url.protocol}//${url.host}/datasets/${context.datasetId}`, + ) +} + +// Queue of tasks to perform in order +const workQueue = new PromiseQueue() + +// @ts-ignore Expected for workers +self.onmessage = (event: GitWorkerEvent) => { + if (event.data.command === "setup") { + context = { + datasetId: event.data.datasetId, + sourcePath: event.data.sourcePath, + repoPath: event.data.repoPath, + repoEndpoint: event.data.repoEndpoint, + authorization: event.data.authorization, + name: event.data.name, + email: event.data.email, + } + setupLogging(event.data.logLevel) + } else if (event.data.command === "clone") { + workQueue.enqueue(update) + } else if (event.data.command === "add") { + workQueue.enqueue(add, event) + } else if (event.data.command === "commit") { + workQueue.enqueue(commit) + } else if (event.data.command === "push") { + workQueue.enqueue(push) + } else if (event.data.command === "done") { + workQueue.enqueue(done) + } +} diff --git a/cli/src/worker/queue.test.ts b/cli/src/worker/queue.test.ts new file mode 100644 index 000000000..2021831a1 --- /dev/null +++ b/cli/src/worker/queue.test.ts @@ -0,0 +1,23 @@ +import { assertEquals } from "../deps.ts" +import { PromiseQueue } from "./queue.ts" + +Deno.test("PromiseQueue should execute promises in order", async () => { + const order: number[] = [] + const promiseQueue = new PromiseQueue() + + promiseQueue.enqueue(async () => { + await new Promise((resolve) => setTimeout(resolve, 10)) + order.push(1) + }) + promiseQueue.enqueue(async () => { + await new Promise((resolve) => setTimeout(resolve, 5)) + order.push(2) + }) + promiseQueue.enqueue(async () => { + order.push(3) + }) + + await new Promise((resolve) => setTimeout(resolve, 20)) + + assertEquals(order, [1, 2, 3]) +}) diff --git a/cli/src/worker/queue.ts b/cli/src/worker/queue.ts new file mode 100644 index 000000000..aa07b17cd --- /dev/null +++ b/cli/src/worker/queue.ts @@ -0,0 +1,23 @@ +export class PromiseQueue { + private queue: ((...args: any[]) => Promise)[] = [] + private running = false + + enqueue(promiseFn: (...args: any[]) => Promise, ...args: any[]) { + this.queue.push(async () => promiseFn(...args)) + this.processQueue() + } + + private async processQueue() { + if (this.running) return + + this.running = true + try { + while (this.queue.length > 0) { + const promiseFn = this.queue.shift() + if (promiseFn) await promiseFn() + } + } finally { + this.running = false + } + } +} diff --git a/cli/src/worker/transferKey.test.ts b/cli/src/worker/transferKey.test.ts new file mode 100644 index 000000000..deaf97371 --- /dev/null +++ b/cli/src/worker/transferKey.test.ts @@ -0,0 +1,181 @@ +import type { FetchOptions, TransferKeyState } from "./transferKey.ts" +import { + checkKey, + keyRequest, + removeKey, + retrieveKey, + storeKey, +} from "./transferKey.ts" +import { assertEquals, assertStrictEquals } from "../deps.ts" +import { mockFetch } from "../tests/fetch-stub.ts" + +Deno.test({ + name: "keyRequest() generates correct Request object", + fn() { + const state: TransferKeyState = { + url: "https://api.example.com", + token: "secret_token", + } + const key = "sample_git_annex_key" + const options: FetchOptions = { + method: "POST", + headers: { "Content-Type": "application/json" }, + } + + const result = keyRequest(state, key, options) + + assertEquals(result.method, "POST") + assertEquals( + result.url, + "https://api.example.com/annex/sample_git_annex_key", + ) + assertStrictEquals( + result.headers.get("Authorization"), + "Basic b3Blbm5ldXJvLWNsaTpzZWNyZXRfdG9rZW4=", + ) + assertStrictEquals(result.headers.get("Content-Type"), "application/json") + }, +}) + +Deno.test({ + name: "storeKey() uploads successfully", + async fn() { + const mocked = mockFetch(new Response("", { status: 200 })) + const testData = "test data here" + const tmpFilePath = await Deno.makeTempFile() + const textEncoder = new TextEncoder() + await Deno.writeFile(tmpFilePath, textEncoder.encode(testData)) + + try { + const testFileSize = testData.length + + const result = await storeKey( + { url: "http://localhost", token: "" }, + "key", + tmpFilePath, + ) + assertEquals(result, testFileSize) + } finally { + mocked.restore() + } + }, +}) + +Deno.test({ + name: "storeKey() handles upload failure", + async fn() { + const mocked = mockFetch(new Response("", { status: 500 })) + + try { + const result = await storeKey( + { url: "http://localhost", token: "" }, + "key", + "./deno.json", + ) + assertEquals(result, -1) + } finally { + mocked.restore() + } + }, +}) + +Deno.test({ + name: "retrieveKey() downloads successfully", + async fn() { + const testData = "test data here" + const tmpFilePath = await Deno.makeTempFile() + const mocked = mockFetch(new Response(testData, { status: 200 })) + + try { + const result = await retrieveKey( + { url: "http://localhost", token: "" }, + "key", + tmpFilePath, + ) + assertEquals(result, true) + } finally { + mocked.restore() + } + }, +}) + +Deno.test({ + name: "retrieveKey() handles download failure (e.g., 404)", + async fn() { + const mocked = mockFetch(new Response("", { status: 404 })) + try { + const result = await retrieveKey( + { url: "http://localhost", token: "" }, + "key", + "output.file", + ) + assertEquals(result, false) + } finally { + mocked.restore() + } + }, +}) + +Deno.test({ + name: "checkKey() confirms key exists (status 200)", + async fn() { + const mocked = mockFetch(new Response("", { status: 200 })) + try { + const result = await checkKey( + { url: "http://localhost", token: "" }, + "key", + ) + assertEquals(result, true) + } finally { + mocked.restore() + } + }, +}) + +Deno.test({ + name: "checkKey() identifies when key doesn't exist", + async fn() { + const mocked = mockFetch(new Response("", { status: 404 })) + try { + const result = await checkKey( + { url: "http://localhost", token: "" }, + "key", + ) + assertEquals(result, false) + } finally { + mocked.restore() + } + }, +}) + +Deno.test({ + name: "removeKey() successful deletion (status 204)", + async fn() { + const mocked = mockFetch(new Response(null, { status: 204 })) + try { + const result = await removeKey( + { url: "http://localhost", token: "" }, + "key", + ) + assertEquals(result, true) + } finally { + mocked.restore() + } + }, +}) + +Deno.test({ + name: "removeKey() handles failed deletion", + async fn() { + const mocked = mockFetch(new Response("", { status: 500 })) + try { + const result = await removeKey( + { url: "http://localhost", token: "" }, + "key", + ) + assertEquals(result, false) + } finally { + mocked.restore() + } + }, +}) diff --git a/cli/src/worker/transferKey.ts b/cli/src/worker/transferKey.ts new file mode 100644 index 000000000..773826756 --- /dev/null +++ b/cli/src/worker/transferKey.ts @@ -0,0 +1,148 @@ +import { logger } from "../logger.ts" +import { encodeBase64 } from "../deps.ts" + +/** Deno port of transferKey from Node.js CLI */ + +export interface TransferKeyState { + // Base URL + url: string + // Basic auth token for repos + token: string +} + +export interface FetchOptions { + method?: "GET" | "POST" | "PUT" | "DELETE" | string + headers?: { [key: string]: string } | Headers // Key-value pairs for request headers + body?: BodyInit +} + +/** + * Create a Request object for this url and key + * @param {object} state + * @param {string} state.url Base URL + * @param {string} state.token Basic auth token for repos + * @param {string} key git-annex key + * @param {object} options fetch options + * @returns {Request} Configured fetch Request object + */ +export function keyRequest( + state: TransferKeyState, + key: string, + options: FetchOptions, +) { + const headers = new Headers( + "headers" in options && options.headers || undefined, + ) + headers.set( + "Authorization", + "Basic " + encodeBase64(`openneuro-cli:${state.token}`), + ) + const requestUrl = `${state.url}/annex/${key}` + return new Request(requestUrl, { ...options, headers }) +} + +/** + * Call POST to upload a key to a remote + * @param {object} state + * @param {string} state.url Base URL + * @param {string} state.token Basic auth token for repos + * @param {string} key Git-annex key + * @param {string} file File path + */ +export async function storeKey( + state: TransferKeyState, + key: string, + file: string, +) { + let fileHandle + try { + fileHandle = await Deno.open(file) + const fileStat = await fileHandle.stat() + const requestOptions = { + method: "POST", + headers: { + "Content-Length": fileStat.size.toString(), + }, + } + const request = keyRequest(state, key, requestOptions) + const response = await fetch(request, { body: fileHandle.readable }) + if (response.status === 200) { + return fileStat.size + } else { + return -1 + } + } finally { + try { + fileHandle?.close() + } catch (err) { + if (err.name !== "BadResource") { + logger.error(err) + } + } + } +} + +/** + * Call GET to download a key from a remote + * @param {object} state + * @param {string} state.url Base URL + * @param {string} state.token Basic auth token for repos + * @param {string} key Git-annex key + * @param {string} file File path + */ +export async function retrieveKey( + state: TransferKeyState, + key: string, + file: string, +) { + try { + const request = keyRequest(state, key, { method: "GET" }) + const response = await fetch(request) + if (response.status === 200 && response.body) { + const fileHandle = await Deno.open(file, { write: true, create: true }) + await response.body.pipeTo(fileHandle.writable) + return true + } else { + return false + } + } catch (err) { + console.error(err) + return false + } +} + +/** + * Call HEAD to check if key exists on remote + * @param {object} state + * @param {string} state.url Base URL + * @param {string} state.token Basic auth token for repos + * @param {string} key + * @returns {Promise} True or false if key exists + */ +export async function checkKey(state: TransferKeyState, key: string) { + const request = keyRequest(state, key, { method: "HEAD" }) + const response = await fetch(request) + if (response.status === 200) { + return true + } else { + return false + } +} + +/** + * Call DELETE to remove a key from the remote + * @param {object} state + * @param {string} state.url Base URL + * @param {string} state.token Basic auth token for repos + * @param {string} key + * @returns {Promise} True or false if key exists + */ +export async function removeKey(state: TransferKeyState, key: string) { + const request = keyRequest(state, key, { method: "DELETE" }) + const response = await fetch(request) + if (response.status === 204) { + return true + } else { + return false + } +} diff --git a/deno.json b/deno.json index 3e151e5ac..dc08c1f6c 100644 --- a/deno.json +++ b/deno.json @@ -12,5 +12,9 @@ "services/datalad/tests/.pytest_cache", "**/__pycache/**" ] + }, + "tasks": { + "tests": "deno test --allow-read --allow-write cli/", + "coverage": "deno test --allow-read --allow-write --coverage cli/ && deno coverage ./coverage --lcov > coverage.lcov" } } diff --git a/tsconfig.json b/tsconfig.json index 689f0dfaa..af8e1e466 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -9,7 +9,7 @@ "module": "commonjs", "moduleResolution": "node", "resolveJsonModule": true, - "lib": ["es2020", "dom", "webworker"], + "lib": ["es2021", "dom", "webworker"], "skipLibCheck": true, "incremental": true, "composite": true, diff --git a/vite.config.ts b/vite.config.ts index 9c7ef97e2..64b2d64a4 100644 --- a/vite.config.ts +++ b/vite.config.ts @@ -9,5 +9,6 @@ export default defineConfig({ clearMocks: true, maxConcurrency: 16, isolate: true, + exclude: ["./cli", "./.yarn"], }, })