Skip to content

Commit

Permalink
refactor(cli): Migrate annexAdd to its own module
Browse files Browse the repository at this point in the history
  • Loading branch information
nellh committed Apr 5, 2024
1 parent d96e63d commit c64aa47
Show file tree
Hide file tree
Showing 5 changed files with 291 additions and 228 deletions.
27 changes: 27 additions & 0 deletions cli/src/worker/annex.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import { assertEquals, join } from "../deps.ts"
import { annexRelativePath, hashDirLower, hashDirMixed } from "./annex.ts"

Deno.test("annexRelativePath() returns appropriate paths", () => {
assertEquals(
annexRelativePath("sub-01/anat/sub-01_T1w.nii.gz"),
join("..", ".."),
)
})

Deno.test("hashDirLower() returns the correct key prefix", async () => {
assertEquals(
await hashDirLower(
"SHA256E-s311112--c3527d7944a9619afb57863a34e6af7ec3fe4f108e56c860d9e700699ff806fb.nii.gz",
),
["2ed", "6ea"],
)
})

Deno.test("hashDirMixed() returns the correct key prefix", async () => {
assertEquals(
await hashDirMixed(
"SHA256E-s311112--c3527d7944a9619afb57863a34e6af7ec3fe4f108e56c860d9e700699ff806fb.nii.gz",
),
["Xk", "Mx"],
)
})
144 changes: 144 additions & 0 deletions cli/src/worker/annex.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import { GitWorkerContext } from "./types/git-context.ts"
import { basename, dirname, git, join, relative } from "../deps.ts"
import { logger } from "../logger.ts"

/**
* Why are we using hash wasm over web crypto?
* Web crypto cannot do streaming hashes of the common git-annex functions yet.
*/
import { createMD5, createSHA256 } from "npm:hash-wasm"

/**
* Reusable hash factories
*/
const computeHashMD5 = await createMD5()
const computeHashSHA256 = await createSHA256()

/**
* git-annex hashDirLower implementation based on https://git-annex.branchable.com/internals/hashing/
* Compute the directory path from a git-annex filename
*/
export async function hashDirLower(
annexKey: string,
): Promise<[string, string]> {
const computeMD5 = await createMD5()
computeMD5.init()
computeMD5.update(annexKey)
const digest = computeMD5.digest("hex")
return [digest.slice(0, 3), digest.slice(3, 6)]
}

/**
* git-annex hashDirMixed implementation based on https://git-annex.branchable.com/internals/hashing/
*/
export async function hashDirMixed(
annexKey: string,
): Promise<[string, string]> {
const computeMD5 = await createMD5()
computeMD5.init()
computeMD5.update(annexKey)
const digest = computeMD5.digest("binary")
const firstWord = new DataView(digest.buffer).getUint32(0, true)
const nums = Array.from({ length: 4 }, (_, i) => (firstWord >> (6 * i)) & 31)
const letters = nums.map(
(num) => "0123456789zqjxkmvwgpfZQJXKMVWGPF".charAt(num),
)
return [`${letters[1]}${letters[0]}`, `${letters[3]}${letters[2]}`]
}

/**
* Return the relative path to the .git/annex directory from a repo relative path
*
* Used for symlink path cr\eation
*/
export function annexRelativePath(path: string) {
return relative(dirname(join("/", path)), "/")
}

/**
* Add a file to a configured annex
* @param annexKeys Object with key to
* @param hash Git annex hash string (e.g. MD5E or SHA256)
* @param path Absolute path to the file being added
* @param relativePath Repo relative path for file being added
* @param size File size (to avoid additional stat call)
* @param context GitWorkerContext objects
*/
export async function annexAdd(
annexKeys: Record<string, string>,
hash: string,
path: string,
relativePath: string,
size: number,
context: GitWorkerContext,
): Promise<boolean> {
// E in the backend means include the file extension
let extension = ""
if (hash.endsWith("E")) {
const filename = basename(relativePath)
extension = filename.substring(filename.indexOf("."))
}
// Compute hash
const computeHash = hash.startsWith("MD5")
? computeHashMD5
: computeHashSHA256
computeHash.init()
const stream = context.fs.createReadStream(path, {
highWaterMark: 1024 * 1024 * 10,
})
for await (const data of stream) {
computeHash.update(data)
}
const digest = computeHash.digest("hex")
const annexKey = `${hash}-s${size}--${digest}${extension}`
const annexPath = join(
".git",
"annex",
"objects",
...(await hashDirMixed(annexKey)),
annexKey,
annexKey,
)
// Path to this file in our repo
const fileRepoPath = join(context.repoPath, relativePath)

let link
let forceAdd = false
try {
// Test if the repo already has this object
link = await context.fs.promises.readlink(fileRepoPath)
} catch (_err) {
forceAdd = true
}

// Calculate the relative symlinks for our file
const symlinkTarget = join(
annexRelativePath(relativePath),
annexPath,
)

// Key has changed if the existing link points to another object
if (forceAdd || link !== symlinkTarget) {
// Upload this key after the git commit
annexKeys[annexKey] = path
// This object has a new annex hash, update the symlink and add it
const symlinkTarget = join(
annexRelativePath(relativePath),
annexPath,
)
// Verify parent directories exist
await context.fs.promises.mkdir(dirname(fileRepoPath), { recursive: true })
// Remove the existing symlink or git file
await context.fs.promises.rm(fileRepoPath, { force: true })
// Create our new symlink pointing at the right annex object
await context.fs.promises.symlink(symlinkTarget, fileRepoPath)
const options = {
...context.config(),
filepath: relativePath,
}
await git.add(options)
return true
} else {
return false
}
}
23 changes: 0 additions & 23 deletions cli/src/worker/git.test.ts
Original file line number Diff line number Diff line change
@@ -1,30 +1,7 @@
import { annexRelativePath, hashDirLower, hashDirMixed } from "./git.ts"
import { assertArrayIncludes, assertEquals, git, join, walk, SEPARATOR } from "../deps.ts"
import { addGitFiles } from "../commands/upload.ts"
import fs from "node:fs"

Deno.test("annexRelativePath() returns appropriate paths", () => {
assertEquals(annexRelativePath("sub-01/anat/sub-01_T1w.nii.gz"), join('..', '..'))
})

Deno.test("hashDirLower() returns the correct key prefix", async () => {
assertEquals(
await hashDirLower(
"SHA256E-s311112--c3527d7944a9619afb57863a34e6af7ec3fe4f108e56c860d9e700699ff806fb.nii.gz",
),
["2ed", "6ea"],
)
})

Deno.test("hashDirMixed() returns the correct key prefix", async () => {
assertEquals(
await hashDirMixed(
"SHA256E-s311112--c3527d7944a9619afb57863a34e6af7ec3fe4f108e56c860d9e700699ff806fb.nii.gz",
),
["Xk", "Mx"],
)
})

Deno.test("adds git and annexed content given a directory of files", async () => {
const testUpload = await Deno.makeTempDir()
const testRepo = await Deno.makeTempDir()
Expand Down
Loading

0 comments on commit c64aa47

Please sign in to comment.