Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename unicode helpers #250

Merged
merged 2 commits into from
Jan 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { strByteLen, toUTF8 } from "../../misc";
import { bytesToString, strUTF8Len } from "../../misc";
import { ASTNode } from "../ast_node";
import { ASTContext, ASTNodePostprocessor, FileMap } from "../ast_reader";
import { RawComment, parseComments } from "../comments";
Expand Down Expand Up @@ -31,7 +31,7 @@ export class StructuredDocumentationReconstructor {
source: Uint8Array
): StructuredDocumentation | undefined {
const [from, to, sourceIndex] = coords;
const fragment = toUTF8(source.slice(from, to));
const fragment = bytesToString(source.slice(from, to));

const parsedCommentsSoup = parseComments(fragment);

Expand Down Expand Up @@ -66,9 +66,9 @@ export class StructuredDocumentationReconstructor {
return undefined;
}

const byteOffsetFromFragment = strByteLen(fragment.slice(0, lastComment.loc.start));
const byteOffsetFromFragment = strUTF8Len(fragment.slice(0, lastComment.loc.start));
const offset = from + byteOffsetFromFragment;
const length = strByteLen(lastComment.text);
const length = strUTF8Len(lastComment.text);
const src = `${offset}:${length}:${sourceIndex}`;

return new StructuredDocumentation(0, src, lastComment.internalText.trim());
Expand Down
4 changes: 2 additions & 2 deletions src/ast/writing/writer.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { strByteLen } from "../../misc";
import { strUTF8Len } from "../../misc";
import { ASTNode, ASTNodeConstructor } from "../ast_node";
import { YulNode } from "../implementation/statement/inline_assembly";
import { SourceFormatter } from "./formatter";
Expand Down Expand Up @@ -117,7 +117,7 @@ export class ASTWriter {
for (const element of current) {
if (typeof element === "string") {
source += element;
size += strByteLen(element);
size += strUTF8Len(element);
} else {
const [node, nodeDesc] = element;
const start = size;
Expand Down
4 changes: 2 additions & 2 deletions src/bin/compile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import {
ASTNodeFormatter,
ASTReader,
ASTWriter,
bytesToString,
CACHE_DIR,
CompilationOutput,
CompileFailedError,
Expand All @@ -32,7 +33,6 @@ import {
PrettyFormatter,
SourceUnit,
StateVariableVisibility,
toUTF8,
VariableDeclaration,
XPath
} from "..";
Expand Down Expand Up @@ -326,7 +326,7 @@ function error(message: string): never {
data.sources[key] = {};
}

data.sources[key].source = toUTF8(value);
data.sources[key].source = bytesToString(value);
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/compile/compiler_selection.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { toUTF8 } from "../misc";
import { bytesToString } from "../misc";
import { CompilerSeries, CompilerVersions } from "./constants";
import { extractSpecifiersFromSource, getCompilerVersionsBySpecifiers } from "./version";

Expand Down Expand Up @@ -80,7 +80,7 @@ export class VersionDetectionStrategy implements CompilerVersionSelectionStrateg
fallback: CompilerVersionSelectionStrategy,
descending = true
) {
this.sources = sources.map(toUTF8);
this.sources = sources.map(bytesToString);
this.fallback = fallback;
this.descending = descending;
}
Expand Down
4 changes: 2 additions & 2 deletions src/compile/inference/imports.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import fse from "fs-extra";
import { dirname, normalize } from "path";
import { CompileInferenceError, ImportResolver, Remapping } from "..";
import { FileMap, assert, toUTF8 } from "../..";
import { FileMap, assert, bytesToString } from "../..";
import {
AnyFileLevelNode,
FileLevelNodeKind,
Expand Down Expand Up @@ -161,7 +161,7 @@ export async function findAllFiles(
let flds: AnyFileLevelNode[];

try {
flds = parseFileLevelDefinitions(toUTF8(content));
flds = parseFileLevelDefinitions(bytesToString(content));
} catch (e: any) {
if (e instanceof PeggySyntaxError) {
const start = e.location.start.offset;
Expand Down
4 changes: 2 additions & 2 deletions src/compile/input.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { FileMap } from "../ast";
import { toUTF8 } from "../misc";
import { bytesToString } from "../misc";
import { CompilationOutput } from "./constants";

export interface PartialSolcInput {
Expand Down Expand Up @@ -81,7 +81,7 @@ export function createCompilerInput(
partialInp.sources = {};

for (const [fileName, content] of files.entries()) {
partialInp.sources[fileName] = { content: toUTF8(content) };
partialInp.sources[fileName] = { content: bytesToString(content) };
}

const input = partialInp as SolcInput;
Expand Down
8 changes: 4 additions & 4 deletions src/compile/utils.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import fse from "fs-extra";
import path from "path";
import { FileSystemResolver, getCompilerForVersion, LocalNpmResolver } from ".";
import { assert, fromUTF8 } from "../misc";
import { assert, stringToBytes } from "../misc";
import {
CompilerVersionSelectionStrategy,
LatestVersionInEachSeriesStrategy,
Expand Down Expand Up @@ -114,7 +114,7 @@ export function parsePathRemapping(remapping: string[]): Remapping[] {
function fillFilesFromSources(files: FileMap, sources: { [fileName: string]: any }): void {
for (const [fileName, section] of Object.entries(sources)) {
if (section && typeof section.source === "string") {
files.set(fileName, fromUTF8(section.source));
files.set(fileName, stringToBytes(section.source));
}
}
}
Expand Down Expand Up @@ -209,7 +209,7 @@ export async function compileSourceString(
const resolvers = [fsResolver, npmResolver];

const parsedRemapping = parsePathRemapping(remapping);
const files = new Map([[fileName, fromUTF8(sourceCode)]]);
const files = new Map([[fileName, stringToBytes(sourceCode)]]);
const resolvedFileNames = new Map([[fileName, fileName]]);

await findAllFiles(files, resolvedFileNames, parsedRemapping, resolvers);
Expand Down Expand Up @@ -386,7 +386,7 @@ export async function compileJsonData(

if (consistentlyContainsOneOf(sources, "source")) {
for (const [fileName, fileData] of Object.entries<{ source: string }>(sources)) {
files.set(fileName, fromUTF8(fileData.source));
files.set(fileName, stringToBytes(fileData.source));
}

const compilerVersionStrategy = getCompilerVersionStrategy([...files.values()], version);
Expand Down
66 changes: 58 additions & 8 deletions src/misc/unicode.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,64 @@
const decoder = new TextDecoder();
const encoder = new TextEncoder();
const utf8Enc = new TextEncoder();
const utf8Dec = new TextDecoder();
const scratch = new Uint8Array(4);

export function toUTF8(buf: Uint8Array): string {
return decoder.decode(buf);
/**
* Convert a UTF-8 encoded bytes into a JS UTF-16 string
*/
export function bytesToString(buf: Uint8Array): string {
return utf8Dec.decode(buf);
}

export function fromUTF8(str: string): Uint8Array {
return encoder.encode(str);
/**
* Convert JS UTF-16 string into UTF-8 encoded bytes
*/
export function stringToBytes(str: string): Uint8Array {
return utf8Enc.encode(str);
}

export function strByteLen(str: string): number {
return fromUTF8(str).length;
/**
* Compute the length of a JS string when encoded as UTF-8 bytes
*/
export function strUTF8Len(s: string): number {
let len = 0;
for (const ch of s) {
len += utf8Enc.encodeInto(ch, scratch).written;
}

return len;
}

/**
* Given a JS string `s` and an index `idx` of a character in it, compute the
* corresponding byte offset of the character in the UTF-8 encoding of the
* string.
*/
export function strUTF16IndexToUTF8Offset(s: string, idx: number): number {
let i = 0,
off = 0;

for (const ch of s) {
if (i === idx) {
return off;
}

const charBytes = utf8Enc.encodeInto(ch, scratch).written;

i += charBytes <= 2 ? 1 : 2;
off += charBytes;

if (i === idx) {
return off;
}

if (i >= idx) {
throw new Error(`No unicode character index ${idx} in string ${s}.`);
}
}

if (i === idx) {
return off;
}

throw new Error(`No unicode character index ${idx} in string ${s}.`);
}
6 changes: 3 additions & 3 deletions test/unit/ast/ast_node.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ import {
ASTReader,
Block,
compileJson,
fromUTF8,
FunctionDefinition,
Literal,
SourceUnit
SourceUnit,
stringToBytes
} from "../../../src";

describe("ASTNode", () => {
Expand Down Expand Up @@ -102,7 +102,7 @@ describe("ASTNode", () => {
it("extractSourceFragment()", () => {
const increment = nodes[nodes.length - 2];

expect(increment.extractSourceFragment(source)).toEqual(fromUTF8("a++"));
expect(increment.extractSourceFragment(source)).toEqual(stringToBytes("a++"));
});
});
}
Expand Down
8 changes: 4 additions & 4 deletions test/unit/ast/unicode.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ import {
SrcRangeMap,
StructuredDocumentation,
assert,
bytesToString,
compileSol,
compileSourceString,
detectCompileErrors,
fromUTF8,
toUTF8
stringToBytes
} from "../../../src";

const samples: string[] = [
Expand All @@ -30,7 +30,7 @@ async function strToAst(
contents: string,
version: string
): Promise<[SourceUnit, ASTReader]> {
const sources: FileMap = new Map([[name, fromUTF8(contents)]]);
const sources: FileMap = new Map([[name, stringToBytes(contents)]]);
const canonicalResult = await compileSourceString(name, contents, version);

const errors = detectCompileErrors(canonicalResult.data);
Expand Down Expand Up @@ -85,7 +85,7 @@ describe("Unicode tests", () => {

for (const doc of docs) {
const coords = doc.sourceInfo;
const actual = toUTF8(
const actual = bytesToString(
contents.slice(coords.offset, coords.offset + coords.length)
).trim();

Expand Down
4 changes: 2 additions & 2 deletions test/unit/compile/compiler_selection.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ import {
CompilerVersions07,
CompilerVersions08,
CompilerVersionSelectionStrategy,
fromUTF8,
LatestAndFirstVersionInEachSeriesStrategy,
LatestCompilerVersion,
LatestVersionInEachSeriesStrategy,
RangeVersionStrategy,
stringToBytes,
VersionDetectionStrategy
} from "../../../src";

Expand Down Expand Up @@ -180,7 +180,7 @@ describe("VersionDetectionStrategy", () => {
it(`Returns ${JSON.stringify(range)} for ${JSON.stringify(source)} and ${
fallback.constructor.name
} in constructor`, () => {
const strategy = new VersionDetectionStrategy([fromUTF8(source)], fallback);
const strategy = new VersionDetectionStrategy([stringToBytes(source)], fallback);

expect(strategy.select()).toEqual(range);
});
Expand Down
6 changes: 3 additions & 3 deletions test/unit/compile/inference/findAllFiles.spec.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import expect from "expect";
import fse from "fs-extra";
import { join } from "path";
import { FileMap, FileSystemResolver, findAllFiles, fromUTF8 } from "../../../../src";
import { FileMap, FileSystemResolver, findAllFiles, stringToBytes } from "../../../../src";

const SAMPLES_DIR = join("test", "samples", "solidity");

Expand Down Expand Up @@ -63,7 +63,7 @@ describe("findAllFiles() throws proper errors", () => {
const files: FileMap = new Map([
[
"foo.sol",
fromUTF8(`import a
stringToBytes(`import a
contract Foo {
}
`)
Expand All @@ -79,7 +79,7 @@ contract Foo {
const files: FileMap = new Map([
[
"foo.sol",
fromUTF8(`import "a.sol";
stringToBytes(`import "a.sol";
contract Foo {
}
`)
Expand Down
9 changes: 6 additions & 3 deletions test/unit/compile/utils.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ import {
CompilerKind,
detectCompileErrors,
FileMap,
fromUTF8,
getCompilerForVersion,
LatestAndFirstVersionInEachSeriesStrategy,
LatestCompilerVersion,
NativeCompiler,
parsePathRemapping,
stringToBytes,
WasmCompiler
} from "../../../src";

Expand Down Expand Up @@ -102,9 +102,12 @@ describe("Compile general utils", () => {
const expectedFiles: FileMap = new Map([
[
"./test/sol_files/json_code/B.sol",
fromUTF8("import './A.sol';\n\ncontract B {\n int16 test;\n}\n")
stringToBytes("import './A.sol';\n\ncontract B {\n int16 test;\n}\n")
],
["./test/sol_files/json_code/A.sol", fromUTF8("contract A {\n uint8 test;\n}\n")]
[
"./test/sol_files/json_code/A.sol",
stringToBytes("contract A {\n uint8 test;\n}\n")
]
]);

const cases: Array<[string, string | undefined, RegExp | undefined]> = [
Expand Down
Loading