Skip to content

Commit

Permalink
Add JS Map support for encoding & decoding (#5)
Browse files Browse the repository at this point in the history
  • Loading branch information
jasonpaulos authored Mar 12, 2024
1 parent 0d6e172 commit b92d75b
Show file tree
Hide file tree
Showing 9 changed files with 684 additions and 63 deletions.
107 changes: 87 additions & 20 deletions src/Decoder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,50 @@ export type DecoderOptions<ContextType = undefined> = Readonly<
*
* This is useful if the strings may contain invalid UTF-8 sequences.
*
* Note that this option only applies to string values, not map keys. Additionally, when
* enabled, raw string length is limited by the maxBinLength option.
* When enabled, raw string length is limited by the maxBinLength option.
*
* Note that this option only applies to string values, not map keys. See `rawBinaryStringKeys`
* for map keys.
*/
rawBinaryStringValues: boolean;

/**
* By default, map keys will be decoded as UTF-8 strings. However, if this option is true, map
* keys will be returned as Uint8Arrays without additional decoding.
*
* Requires `useMap` to be true, since plain objects do not support binary keys.
*
* When enabled, raw string length is limited by the maxBinLength option.
*
* Note that this option only applies to map keys, not string values. See `rawBinaryStringValues`
* for string values.
*/
rawBinaryStringKeys: boolean;

/**
* If true, the decoder will use the Map object to store map values. If false, it will use plain
* objects. Defaults to false.
*
* Besides the type of container, the main difference is that Map objects support a wider range
* of key types. Plain objects only support string keys (though you can enable
* `supportObjectNumberKeys` to coerce number keys to strings), while Map objects support
* strings, numbers, bigints, and Uint8Arrays.
*/
useMap: boolean;

/**
* If true, the decoder will support decoding numbers as map keys on plain objects. Defaults to
* false.
*
* Note that any numbers used as object keys will be converted to strings, so there is a risk of
* key collision as well as the inability to re-encode the object to the same representation.
*
* This option is ignored if `useMap` is true.
*
* This is useful for backwards compatibility before `useMap` was introduced. Consider instead
* using `useMap` for new code.
*/
useRawBinaryStrings: boolean;
supportObjectNumberKeys: boolean;

/**
* Maximum string length.
Expand Down Expand Up @@ -82,18 +122,22 @@ const STATE_ARRAY = "array";
const STATE_MAP_KEY = "map_key";
const STATE_MAP_VALUE = "map_value";

type MapKeyType = string | number;
type MapKeyType = string | number | bigint | Uint8Array;

const isValidMapKeyType = (key: unknown): key is MapKeyType => {
return typeof key === "string" || typeof key === "number";
};
function isValidMapKeyType(key: unknown, useMap: boolean, supportObjectNumberKeys: boolean): key is MapKeyType {
if (useMap) {
return typeof key === "string" || typeof key === "number" || typeof key === "bigint" || key instanceof Uint8Array;
}
// Plain objects support a more limited set of key types
return typeof key === "string" || (supportObjectNumberKeys && typeof key === "number");
}

type StackMapState = {
type: typeof STATE_MAP_KEY | typeof STATE_MAP_VALUE;
size: number;
key: MapKeyType | null;
readCount: number;
map: Record<string, unknown>;
map: Record<string, unknown> | Map<MapKeyType, unknown>;
};

type StackArrayState = {
Expand All @@ -107,6 +151,8 @@ class StackPool {
private readonly stack: Array<StackState> = [];
private stackHeadPosition = -1;

constructor(private readonly useMap: boolean) {}

public get length(): number {
return this.stackHeadPosition + 1;
}
Expand All @@ -130,7 +176,7 @@ class StackPool {
state.type = STATE_MAP_KEY;
state.readCount = 0;
state.size = size;
state.map = {};
state.map = this.useMap ? new Map() : {};
}

private getUninitializedStateFromPool() {
Expand Down Expand Up @@ -213,7 +259,10 @@ export class Decoder<ContextType = undefined> {
private readonly extensionCodec: ExtensionCodecType<ContextType>;
private readonly context: ContextType;
private readonly intMode: IntMode;
private readonly useRawBinaryStrings: boolean;
private readonly rawBinaryStringValues: boolean;
private readonly rawBinaryStringKeys: boolean;
private readonly useMap: boolean;
private readonly supportObjectNumberKeys: boolean;
private readonly maxStrLength: number;
private readonly maxBinLength: number;
private readonly maxArrayLength: number;
Expand All @@ -227,20 +276,29 @@ export class Decoder<ContextType = undefined> {
private view = EMPTY_VIEW;
private bytes = EMPTY_BYTES;
private headByte = HEAD_BYTE_REQUIRED;
private readonly stack = new StackPool();
private readonly stack: StackPool;

public constructor(options?: DecoderOptions<ContextType>) {
this.extensionCodec = options?.extensionCodec ?? (ExtensionCodec.defaultCodec as ExtensionCodecType<ContextType>);
this.context = (options as { context: ContextType } | undefined)?.context as ContextType; // needs a type assertion because EncoderOptions has no context property when ContextType is undefined

this.intMode = options?.intMode ?? (options?.useBigInt64 ? IntMode.AS_ENCODED : IntMode.UNSAFE_NUMBER);
this.useRawBinaryStrings = options?.useRawBinaryStrings ?? false;
this.rawBinaryStringValues = options?.rawBinaryStringValues ?? false;
this.rawBinaryStringKeys = options?.rawBinaryStringKeys ?? false;
this.useMap = options?.useMap ?? false;
this.supportObjectNumberKeys = options?.supportObjectNumberKeys ?? false;
this.maxStrLength = options?.maxStrLength ?? UINT32_MAX;
this.maxBinLength = options?.maxBinLength ?? UINT32_MAX;
this.maxArrayLength = options?.maxArrayLength ?? UINT32_MAX;
this.maxMapLength = options?.maxMapLength ?? UINT32_MAX;
this.maxExtLength = options?.maxExtLength ?? UINT32_MAX;
this.keyDecoder = options?.keyDecoder !== undefined ? options.keyDecoder : sharedCachedKeyDecoder;

if (this.rawBinaryStringKeys && !this.useMap) {
throw new Error("rawBinaryStringKeys is only supported when useMap is true");
}

this.stack = new StackPool(this.useMap);
}

private reinitializeState() {
Expand Down Expand Up @@ -404,7 +462,7 @@ export class Decoder<ContextType = undefined> {
this.complete();
continue DECODE;
} else {
object = {};
object = this.useMap ? new Map() : {};
}
} else if (headByte < 0xa0) {
// fixarray (1001 xxxx) 0x90 - 0x9f
Expand Down Expand Up @@ -571,10 +629,15 @@ export class Decoder<ContextType = undefined> {
continue DECODE;
}
} else if (state.type === STATE_MAP_KEY) {
if (!isValidMapKeyType(object)) {
throw new DecodeError("The type of key must be string or number but " + typeof object);
if (!isValidMapKeyType(object, this.useMap, this.supportObjectNumberKeys)) {
const acceptableTypes = this.useMap
? "string, number, bigint, or Uint8Array"
: this.supportObjectNumberKeys
? "string or number"
: "string";
throw new DecodeError(`The type of key must be ${acceptableTypes} but got ${typeof object}`);
}
if (object === "__proto__") {
if (!this.useMap && object === "__proto__") {
throw new DecodeError("The key __proto__ is not allowed");
}

Expand All @@ -584,7 +647,11 @@ export class Decoder<ContextType = undefined> {
} else {
// it must be `state.type === State.MAP_VALUE` here

state.map[state.key!] = object;
if (this.useMap) {
(state.map as Map<MapKeyType, unknown>).set(state.key!, object);
} else {
(state.map as Record<string, unknown>)[state.key as string] = object;
}
state.readCount++;

if (state.readCount === state.size) {
Expand Down Expand Up @@ -650,10 +717,10 @@ export class Decoder<ContextType = undefined> {
}

private decodeString(byteLength: number, headerOffset: number): string | Uint8Array {
if (!this.useRawBinaryStrings || this.stateIsMapKey()) {
return this.decodeUtf8String(byteLength, headerOffset);
if (this.stateIsMapKey() ? this.rawBinaryStringKeys : this.rawBinaryStringValues) {
return this.decodeBinary(byteLength, headerOffset);
}
return this.decodeBinary(byteLength, headerOffset);
return this.decodeUtf8String(byteLength, headerOffset);
}

private decodeUtf8String(byteLength: number, headerOffset: number): string {
Expand Down
76 changes: 66 additions & 10 deletions src/Encoder.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { utf8Count, utf8Encode } from "./utils/utf8";
import { ExtensionCodec, ExtensionCodecType } from "./ExtensionCodec";
import { setInt64, setUint64 } from "./utils/int";
import { ensureUint8Array } from "./utils/typedArrays";
import { ensureUint8Array, compareUint8Arrays } from "./utils/typedArrays";
import type { ExtData } from "./ExtData";
import type { ContextOf } from "./context";

Expand Down Expand Up @@ -41,6 +41,15 @@ export type EncoderOptions<ContextType = undefined> = Partial<
* binary is canonical and thus comparable to another encoded binary.
*
* Defaults to `false`. If enabled, it spends more time in encoding objects.
*
* If enabled, the encoder will throw an error if the NaN value is included in the keys of a
* map, since it is not comparable.
*
* If enabled and the keys of a map include multiple different types, each type will be sorted
* separately, and the order of the types will be as follows:
* 1. Numbers (including bigints)
* 2. Strings
* 3. Binary data
*/
sortKeys: boolean;

Expand Down Expand Up @@ -321,8 +330,10 @@ export class Encoder<ContextType = undefined> {
// this is here instead of in doEncode so that we can try encoding with an extension first,
// otherwise we would break existing extensions for bigints
this.encodeBigInt(object);
} else if (object instanceof Map) {
this.encodeMap(object, depth);
} else if (typeof object === "object") {
this.encodeMap(object as Record<string, unknown>, depth);
this.encodeMapObject(object as Record<string, unknown>, depth);
} else {
// symbol, function and other special object come here unless extensionCodec handles them.
throw new Error(`Unrecognized object: ${Object.prototype.toString.apply(object)}`);
Expand Down Expand Up @@ -371,25 +382,60 @@ export class Encoder<ContextType = undefined> {
}
}

private countWithoutUndefined(object: Record<string, unknown>, keys: ReadonlyArray<string>): number {
private countWithoutUndefined(map: Map<unknown, unknown>, keys: ReadonlyArray<unknown>): number {
let count = 0;

for (const key of keys) {
if (object[key] !== undefined) {
if (map.get(key) !== undefined) {
count++;
}
}

return count;
}

private encodeMap(object: Record<string, unknown>, depth: number) {
const keys = Object.keys(object);
private sortMapKeys(keys: Array<unknown>): Array<unknown> {
const numericKeys: Array<number | bigint> = [];
const stringKeys: Array<string> = [];
const binaryKeys: Array<Uint8Array> = [];
for (const key of keys) {
if (typeof key === "number") {
if (isNaN(key)) {
throw new Error("Cannot sort map keys with NaN value");
}
numericKeys.push(key);
} else if (typeof key === "bigint") {
numericKeys.push(key);
} else if (typeof key === "string") {
stringKeys.push(key);
} else if (ArrayBuffer.isView(key)) {
binaryKeys.push(ensureUint8Array(key));
} else {
throw new Error(`Unsupported map key type: ${Object.prototype.toString.apply(key)}`);
}
}
numericKeys.sort((a, b) => (a < b ? -1 : a > b ? 1 : 0)); // Avoid using === to compare numbers and bigints
stringKeys.sort();
binaryKeys.sort(compareUint8Arrays);
// At the moment this arbitrarily orders the keys as numeric, string, binary
return ([] as Array<unknown>).concat(numericKeys, stringKeys, binaryKeys);
}

private encodeMapObject(object: Record<string, unknown>, depth: number) {
this.encodeMap(new Map<string, unknown>(Object.entries(object)), depth);
}

private encodeMap(map: Map<unknown, unknown>, depth: number) {
let keys = Array.from(map.keys());
if (this.sortKeys) {
keys.sort();
keys = this.sortMapKeys(keys);
}

const size = this.ignoreUndefined ? this.countWithoutUndefined(object, keys) : keys.length;
// Map keys may encode to the same underlying value. For example, the number 3 and the bigint 3.
// This is also possible with ArrayBufferViews. We may want to introduce a new encoding option
// which checks for duplicate keys in this sense and throws an error if they are found.

const size = this.ignoreUndefined ? this.countWithoutUndefined(map, keys) : keys.length;

if (size < 16) {
// fixmap
Expand All @@ -407,10 +453,20 @@ export class Encoder<ContextType = undefined> {
}

for (const key of keys) {
const value = object[key];
const value = map.get(key);

if (!(this.ignoreUndefined && value === undefined)) {
this.encodeString(key);
if (typeof key === "string") {
this.encodeString(key);
} else if (typeof key === "number") {
this.encodeNumber(key);
} else if (typeof key === "bigint") {
this.encodeBigInt(key);
} else if (ArrayBuffer.isView(key)) {
this.encodeBinary(key);
} else {
throw new Error(`Unsupported map key type: ${Object.prototype.toString.apply(key)}`);
}
this.doEncode(value, depth + 1);
}
}
Expand Down
11 changes: 11 additions & 0 deletions src/utils/typedArrays.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,14 @@ export function createDataView(buffer: ArrayLike<number> | ArrayBufferView | Arr
const bufferView = ensureUint8Array(buffer);
return new DataView(bufferView.buffer, bufferView.byteOffset, bufferView.byteLength);
}

export function compareUint8Arrays(a: Uint8Array, b: Uint8Array): number {
const length = Math.min(a.length, b.length);
for (let i = 0; i < length; i++) {
const diff = a[i]! - b[i]!;
if (diff !== 0) {
return diff;
}
}
return a.length - b.length;
}
30 changes: 15 additions & 15 deletions test/codec-bigint.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -253,24 +253,24 @@ describe("codec BigInt", () => {
const encoded = encode(value, { extensionCodec });
assert.deepStrictEqual(decode(encoded, { extensionCodec }), value);
});
});

it("encodes and decodes 100n", () => {
const value = BigInt(100);
const encoded = encode(value, { extensionCodec });
assert.deepStrictEqual(decode(encoded, { extensionCodec }), value);
});
it("encodes and decodes 100n", () => {
const value = BigInt(100);
const encoded = encode(value, { extensionCodec });
assert.deepStrictEqual(decode(encoded, { extensionCodec }), value);
});

it("encodes and decodes -100n", () => {
const value = BigInt(-100);
const encoded = encode(value, { extensionCodec });
assert.deepStrictEqual(decode(encoded, { extensionCodec }), value);
});
it("encodes and decodes -100n", () => {
const value = BigInt(-100);
const encoded = encode(value, { extensionCodec });
assert.deepStrictEqual(decode(encoded, { extensionCodec }), value);
});

it("encodes and decodes MAX_SAFE_INTEGER+1", () => {
const value = BigInt(Number.MAX_SAFE_INTEGER) + BigInt(1);
const encoded = encode(value, { extensionCodec });
assert.deepStrictEqual(decode(encoded, { extensionCodec }), value);
it("encodes and decodes MAX_SAFE_INTEGER+1", () => {
const value = BigInt(Number.MAX_SAFE_INTEGER) + BigInt(1);
const encoded = encode(value, { extensionCodec });
assert.deepStrictEqual(decode(encoded, { extensionCodec }), value);
});
});

context("native", () => {
Expand Down
Loading

0 comments on commit b92d75b

Please sign in to comment.