Skip to content

Commit

Permalink
Decode .tar.gz archives using liblzma (via WebAssembly) (#173)
Browse files Browse the repository at this point in the history
feat: support `.tar.gz` archives using liblzma (via WebAssembly)
  • Loading branch information
jmillikin authored Oct 24, 2024
1 parent 1b005c2 commit bf932c6
Show file tree
Hide file tree
Showing 11 changed files with 475 additions and 4 deletions.
22 changes: 21 additions & 1 deletion src/domain/release-archive.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import path from "node:path";
import { parse as parseUrl } from "node:url";
import tar from "tar";
import { UserFacingError } from "./error.js";
import { decompress as decompressXz } from "../infrastructure/xzdec/xzdec.js";

import { ModuleFile } from "./module-file.js";

Expand Down Expand Up @@ -61,7 +62,7 @@ export class ReleaseArchive {
public async extractModuleFile(): Promise<ModuleFile> {
this.extractDir = path.dirname(this._diskPath);

if (this._diskPath.endsWith(".tar.gz")) {
if (this.isSupportedTarball()) {
await this.extractReleaseTarball(this.extractDir);
} else if (this._diskPath.endsWith(".zip")) {
await this.extractReleaseZip(this.extractDir);
Expand All @@ -81,7 +82,26 @@ export class ReleaseArchive {
return new ModuleFile(extractedModulePath);
}

private isSupportedTarball(): boolean {
if (this._diskPath.endsWith(".tar.gz")) {
return true;
}
if (this._diskPath.endsWith(".tar.xz")) {
return true;
}
return false;
}

private async extractReleaseTarball(extractDir: string): Promise<void> {
if (this._diskPath.endsWith(".tar.xz")) {
const reader = fs.createReadStream(this._diskPath);
const writer = tar.x({
cwd: extractDir
});
await decompressXz(reader, writer);
return;
}

await tar.x({
cwd: extractDir,
file: this._diskPath,
Expand Down
2 changes: 2 additions & 0 deletions src/infrastructure/xzdec/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/bazel-*
/MODULE.bazel.lock
33 changes: 33 additions & 0 deletions src/infrastructure/xzdec/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
load("@aspect_bazel_lib//lib:write_source_files.bzl", "write_source_file")
load(":wasm.bzl", "wasm_binary")

cc_binary(
name = "xzdec",
srcs = ["xzdec.c"],
linkopts = [
"-nostdlib",
"-lc",
"-Wl,--no-entry",
],
tags = ["manual"],
deps = ["@xz//:lzma"],
)

wasm_binary(
name = "xzdec_wasm",
out = "xzdec.wasm",
lib = ":xzdec",
)

genrule(
name = "xzdec_wasm_gz",
srcs = [":xzdec_wasm"],
outs = ["xzdec_wasm_gz/xzdec.wasm.gz"],
cmd = "cat $< | gzip -9 -k -n > $@",
)

write_source_file(
name = "write_xzdec_wasm_gz_to_source_tree",
in_file = ":xzdec_wasm_gz",
out_file = "xzdec.wasm.gz",
)
45 changes: 45 additions & 0 deletions src/infrastructure/xzdec/MODULE.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
module(name = "publish-to-bcr")

bazel_dep(name = "aspect_bazel_lib", version = "2.9.3")
bazel_dep(name = "platforms", version = "0.0.10")
bazel_dep(name = "toolchains_llvm", version = "1.2.0")
bazel_dep(name = "xz", version = "5.4.5.bcr.5")

# https://github.com/bazel-contrib/toolchains_llvm/pull/405
#
# FIXME: Remove when a new `toolchains_llvm` has been released.
git_override(
module_name = "toolchains_llvm",
commit = "bda1c9fbf232b682c30d039f8e4a5e3cf3025d0f",
remote = "https://github.com/bazel-contrib/toolchains_llvm",
)

llvm = use_extension("@toolchains_llvm//toolchain/extensions:llvm.bzl", "llvm")
llvm.toolchain(
libclang_rt = {
"@libclang_rt-wasm32-wasi//:libclang_rt.builtins-wasm32.a": "wasm32-unknown-unknown/libclang_rt.builtins.a",
},
llvm_versions = {
# Pin to an older LLVM version due to a stray Homebrew dependency
# in the macOS build of v19.1.0.
#
# https://github.com/llvm/llvm-project/issues/110070
"": "18.1.8",
},
stdlib = {"wasm32": "libc"},
)
llvm.sysroot(
label = "@wasi-sysroot//sysroots/wasm32-wasip2",
targets = ["wasm32"],
)
use_repo(llvm, "llvm_toolchain")

register_toolchains("@llvm_toolchain//:all")

wasi_sysroot = use_repo_rule("//:wasm.bzl", "wasi_sysroot")

wasm32_libclang_rt = use_repo_rule("//:wasm.bzl", "wasm32_libclang_rt")

wasi_sysroot(name = "wasi-sysroot")

wasm32_libclang_rt(name = "libclang_rt-wasm32-wasi")
33 changes: 33 additions & 0 deletions src/infrastructure/xzdec/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# xz decompressor

This directory contains a WebAssembly module for decompressing an Xz file using
[liblzma], along with a JavaScript wrapper that adapts `xzdec.wasm` for use
with the Node.js [`node:stream`] library.

[liblzma]: https://github.com/tukaani-project/xz/tree/v5.4.5/src/liblzma
[`node:stream`]: https://nodejs.org/docs/latest-v18.x/api/stream.html

Files:
- `xzdec.c` is a thin wrapper around liblzma that exports functions with a
WebAssembly-style ABI. It compiles to `xzdec.wasm`.
- `xzdec.wasm.gz` is a gzip-compressed `xzdec.wasm`, to reduce the size impact
of checking generated build artifacts into Git.
- `xzdec.ts` exports the `decompress(r: stream.Readable, w: stream.Writable)`
function, which instantiates a WebAssembly module from `xzdec.wasm.gz` and
decompresses an Xz bitstream.

When building a new version of `xzdec.wasm.gz`, or verifying that the checked-in
artifact matches the expected output, Bazel should be run with `-c opt` so that
the compiled output is optimized.

```
$ cd src/infrastructure/xzdec
$ bazel build -c opt //:xzdec_wasm_gz
$ diff -s xzdec.wasm.gz bazel-bin/xzdec_wasm_gz/xzdec.wasm.gz
Files xzdec.wasm.gz and bazel-bin/xzdec_wasm_gz/xzdec.wasm.gz are identical
$
```

Note that variations in the gzip compression may cause spurious differences
between `xzdec.wasm.gz` -- in this case, decompressing the two files and
comparing `xzdec.wasm` directly may provide more consistent behavior.
86 changes: 86 additions & 0 deletions src/infrastructure/xzdec/wasm.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
_WASM_ABIS = [
"wasm32-wasip2",
]

def _platform_transition(settings, attr):
return {"//command_line_option:platforms": str(attr._platform)}

platform_transition = transition(
implementation = _platform_transition,
inputs = [],
outputs = ["//command_line_option:platforms"],
)

def _wasm_binary(ctx):
out = ctx.outputs.out
if not out:
out = ctx.actions.declare_file(ctx.attr.name + ".wasm")
ctx.actions.symlink(output = out, target_file = ctx.file.lib)
return DefaultInfo(files = depset([out]))

wasm_binary = rule(
implementation = _wasm_binary,
attrs = {
"lib": attr.label(
allow_single_file = True,
cfg = platform_transition,
),
"out": attr.output(),
"_platform": attr.label(
default = Label("@toolchains_llvm//platforms:wasm32"),
),
"_allowlist_function_transition": attr.label(
default = "@bazel_tools//tools/allowlists/function_transition_allowlist",
),
},
)

_SYSROOT_BUILD = """
filegroup(
name = {name},
srcs = glob(["include/**/*", "lib/**/*", "share/**/*"], allow_empty=True),
visibility = ["//visibility:public"],
)
"""

def _wasi_sysroot(ctx):
ctx.download_and_extract(
integrity = "sha256-NRcvfSeZSFsVpGsdh/UKWF2RXsZiCA8AXZkVOlCIjwg=",
stripPrefix = "wasi-sysroot-24.0",
url = ["https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-24/wasi-sysroot-24.0.tar.gz"],
)

ctx.file("BUILD.bazel", "")
ctx.file("sysroots/BUILD.bazel", "")
for abi in _WASM_ABIS:
ctx.file("sysroots/%s/BUILD.bazel" % (abi,), _SYSROOT_BUILD.format(
name = repr(abi),
))
ctx.execute(["mv", "include/" + abi, "sysroots/%s/include" % (abi,)])
ctx.execute(["mv", "lib/" + abi, "sysroots/%s/lib" % (abi,)])
ctx.execute(["mv", "share/" + abi, "sysroots/%s/share" % (abi,)])

wasi_sysroot = repository_rule(
implementation = _wasi_sysroot,
)

def _wasm32_libclang_rt(ctx):
ctx.file("BUILD.bazel", """
exports_files(["libclang_rt.builtins-wasm32.a"])
filegroup(
name = "libclang_rt-wasm32-wasi",
srcs = ["libclang_rt.builtins-wasm32.a"],
visibility = ["//visibility:public"],
)
""")

ctx.download_and_extract(
integrity = "sha256-fjPA33WLkEabHePKFY4tCn9xk01YhFJbpqNy3gs7Dsc=",
stripPrefix = "libclang_rt.builtins-wasm32-wasi-24.0",
url = ["https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-24/libclang_rt.builtins-wasm32-wasi-24.0.tar.gz"],
)

wasm32_libclang_rt = repository_rule(
implementation = _wasm32_libclang_rt,
)
95 changes: 95 additions & 0 deletions src/infrastructure/xzdec/xzdec.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#include <stdint.h>
#include <stdlib.h>

#include <lzma.h>

typedef uint32_t xzdec_lzma_ret;

struct Xzdec {
lzma_stream stream;
};

__attribute__((export_name("xzdec_allocate")))
uint8_t *xzdec_allocate(uint32_t len) {
return malloc(len);
}

__attribute__((export_name("xzdec_deallocate")))
void xzdec_deallocate(uint8_t *ptr) {
free(ptr);
}

__attribute__((export_name("xzdec_new_stream_decoder")))
xzdec_lzma_ret xzdec_new_stream_decoder(
uint32_t memlimit,
uint32_t flags,
struct Xzdec **xzdec_ptr
) {
lzma_stream stream = LZMA_STREAM_INIT;
lzma_ret rc = lzma_stream_decoder(&stream, memlimit, flags);
if (rc != LZMA_OK) {
return rc;
}
*xzdec_ptr = malloc(sizeof(struct Xzdec));
(*xzdec_ptr)->stream = stream;
return LZMA_OK;
}

__attribute__((export_name("xzdec_drop")))
void xzdec_drop(struct Xzdec *xzdec) {
lzma_end(&(xzdec->stream));
free(xzdec);
}

__attribute__((export_name("xzdec_input_empty")))
uint32_t xzdec_input_empty(struct Xzdec *xzdec) {
if (xzdec->stream.avail_in == 0) {
return 1;
}
return 0;
}

__attribute__((export_name("xzdec_set_input")))
void xzdec_set_input(
struct Xzdec *xzdec,
const uint8_t *input_buf,
uint32_t input_buf_len
) {
xzdec->stream.next_in = input_buf;
xzdec->stream.avail_in = input_buf_len;
}

__attribute__((export_name("xzdec_next_output")))
xzdec_lzma_ret xzdec_next_output(
struct Xzdec *xzdec,
uint8_t *output_buf,
uint32_t output_buf_cap,
uint32_t *output_buf_len
) {
xzdec->stream.next_out = output_buf;
xzdec->stream.avail_out = output_buf_cap;
lzma_ret rc = lzma_code(&(xzdec->stream), LZMA_RUN);
*output_buf_len = output_buf_cap - xzdec->stream.avail_out;
return rc;
}

__attribute__((export_name("xzdec_finish")))
xzdec_lzma_ret xzdec_finish(
struct Xzdec *xzdec,
uint8_t *output_buf,
uint32_t output_buf_cap,
uint32_t *output_buf_len
) {
xzdec->stream.next_out = output_buf;
xzdec->stream.avail_out = output_buf_cap;
lzma_ret rc = lzma_code(&(xzdec->stream), LZMA_FINISH);
*output_buf_len = output_buf_cap - xzdec->stream.avail_out;
return rc;
}

// Prevent Clang from wrapping every inserted function and injecting calls
// to `__wasm_call_dtors()`.
void _initialize() {
void __wasm_call_ctors();
__wasm_call_ctors();
}
Loading

0 comments on commit bf932c6

Please sign in to comment.