Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ability to encode raw binary strings #6

Merged
merged 4 commits into from
Jul 31, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion src/Encoder.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { utf8Count, utf8Encode } from "./utils/utf8";
import { ExtensionCodec, ExtensionCodecType } from "./ExtensionCodec";
import { setInt64, setUint64 } from "./utils/int";
import { ensureUint8Array, compareUint8Arrays } from "./utils/typedArrays";
import { ensureUint8Array, compareUint8Arrays, RawBinaryString } from "./utils/typedArrays";
import type { ExtData } from "./ExtData";
import type { ContextOf } from "./context";

Expand Down Expand Up @@ -326,6 +326,8 @@ export class Encoder<ContextType = undefined> {
this.encodeArray(object, depth);
} else if (ArrayBuffer.isView(object)) {
this.encodeBinary(object);
} else if (object instanceof RawBinaryString) {
this.encodeBinaryAsString(object);
} else if (typeof object === "bigint") {
// this is here instead of in doEncode so that we can try encoding with an extension first,
// otherwise we would break existing extensions for bigints
Expand Down Expand Up @@ -361,6 +363,13 @@ export class Encoder<ContextType = undefined> {
this.writeU8a(bytes);
}

private encodeBinaryAsString(binaryString: RawBinaryString) {
const object = binaryString.rawBinaryValue;
this.writeStringHeader(object.byteLength);
const bytes = ensureUint8Array(object);
this.writeU8a(bytes);
}

private encodeArray(object: Array<unknown>, depth: number) {
const size = object.length;
if (size < 16) {
Expand Down Expand Up @@ -464,6 +473,8 @@ export class Encoder<ContextType = undefined> {
this.encodeBigInt(key);
} else if (ArrayBuffer.isView(key)) {
this.encodeBinary(key);
} else if (key instanceof RawBinaryString) {
this.encodeBinaryAsString(key);
} else {
throw new Error(`Unsupported map key type: ${Object.prototype.toString.apply(key)}`);
}
Expand Down
2 changes: 2 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ import { Encoder } from "./Encoder";
export { Encoder };
import type { EncoderOptions } from "./Encoder";
export type { EncoderOptions };
import { RawBinaryString } from "./utils/typedArrays";
export { RawBinaryString };

// Utilities for Extension Types:

Expand Down
19 changes: 19 additions & 0 deletions src/utils/typedArrays.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,22 @@ export function compareUint8Arrays(a: Uint8Array, b: Uint8Array): number {
}
return a.length - b.length;
}

/**
* Represents a binary value that should be encoded as if it were a string.
*
* Effectively, this is a string that has already been UTF-8 encoded to a binary string. This is
* useful if you need to encode a value as a string, but that value contains invalid UTF-8 sequences;
* ideally this situation should be avoided and the value should be encoded as binary, not string,
* but this may be necessary for compatibility with non-ideal systems.
*/
export class RawBinaryString {
/**
* Create a new RawBinaryString from an ArrayBufferView.
*/
public constructor(public readonly rawBinaryValue: ArrayBufferView) {
if (!ArrayBuffer.isView(rawBinaryValue)) {
throw new TypeError("RawBinaryString: rawBinaryValue must be an ArrayBufferView");
}
}
}
49 changes: 39 additions & 10 deletions test/decode-raw-strings.test.ts → test/raw-strings.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,44 @@
import assert from "assert";
import { encode, decode } from "../src";
import { encode, decode, RawBinaryString } from "../src";
import type { DecoderOptions } from "../src";

const invalidUtf8String = Uint8Array.from([
61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247, 19, 50, 176, 184,
221, 66, 188, 171, 36, 135, 121,
]);

describe("encode with RawBinaryString", () => {
it("encodes a RawBinaryString value as a string", () => {
const actual = encode(new RawBinaryString(Uint8Array.from([0x66, 0x6f, 0x6f])));
const expected = encode("foo");
assert.deepStrictEqual(actual, expected);
});

it("encodes an invalid UTF-8 RawBinaryString value as a string", () => {
const actual = encode(new RawBinaryString(invalidUtf8String));
const expected = Uint8Array.from([
217, 32, 61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247, 19, 50,
176, 184, 221, 66, 188, 171, 36, 135, 121,
]);
assert.deepStrictEqual(actual, expected);
});

it("encodes a RawBinaryString map key as a string", () => {
const actual = encode(new Map([[new RawBinaryString(Uint8Array.from([0x6b, 0x65, 0x79])), "foo"]]));
const expected = encode({ key: "foo" });
assert.deepStrictEqual(actual, expected);
});

it("encodes an invalid UTF-8 RawBinaryString map key as a string", () => {
const actual = encode(new Map([[new RawBinaryString(invalidUtf8String), "abc"]]));
const expected = Uint8Array.from([
129, 217, 32, 61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247,
19, 50, 176, 184, 221, 66, 188, 171, 36, 135, 121, 163, 97, 98, 99,
]);
assert.deepStrictEqual(actual, expected);
});
});

describe("decode with rawBinaryStringValues specified", () => {
const options = { rawBinaryStringValues: true } satisfies DecoderOptions;

Expand All @@ -12,12 +49,8 @@ describe("decode with rawBinaryStringValues specified", () => {
});

it("decodes invalid UTF-8 string values as binary", () => {
const invalidUtf8String = Uint8Array.from([
61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247, 19, 50, 176,
184, 221, 66, 188, 171, 36, 135, 121,
]);
const encoded = Uint8Array.from([
196, 32, 61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247, 19, 50,
217, 32, 61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247, 19, 50,
jasonpaulos marked this conversation as resolved.
Show resolved Hide resolved
176, 184, 221, 66, 188, 171, 36, 135, 121,
]);

Expand Down Expand Up @@ -64,10 +97,6 @@ describe("decode with rawBinaryStringKeys specified", () => {
});

it("decodes invalid UTF-8 string keys as binary", () => {
const invalidUtf8String = Uint8Array.from([
61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247, 19, 50, 176,
184, 221, 66, 188, 171, 36, 135, 121,
]);
const encodedMap = Uint8Array.from([
129, 217, 32, 61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247,
19, 50, 176, 184, 221, 66, 188, 171, 36, 135, 121, 163, 97, 98, 99,
Expand Down
Loading