Skip to content

Commit

Permalink
Switch sources to typed arrays instead of strings (#242)
Browse files Browse the repository at this point in the history
* Store source files as Uint8Arrays instead of strings internally
* Fix ASTNode.extractSourceFragment() to use typed array instead of string
  • Loading branch information
cd1m0 authored Jan 11, 2024
1 parent 9ebcad8 commit c11d592
Show file tree
Hide file tree
Showing 41 changed files with 986 additions and 217 deletions.
2 changes: 2 additions & 0 deletions .eslintrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
"ignorePatterns": [
"src/compile/inference/file_level_definitions_parser_header.ts",
"src/compile/inference/file_level_definitions_parser.ts",
"src/ast/comments/comments_parser_header.ts",
"src/ast/comments/comments_parser.ts",
"test/utils/typeStrings/typeString_parser_header.ts",
"test/utils/typeStrings/typeString_parser.ts"
]
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ docs
coverage
*.tgz
src/compile/inference/file_level_definitions_parser.ts
src/ast/comments/comments_parser.ts
test/utils/typeStrings/typeString_parser.ts
.idea
3 changes: 2 additions & 1 deletion .nycrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
"**/coverage/**",
"**/docs/**",
"**/.compiler_cache/**",
"src/compile/inference/file_level_definitions_parser*.ts"
"src/compile/inference/file_level_definitions_parser*.ts",
"src/ast/comments/comments_parser*.ts"
],
"reporter": ["lcov", "text-summary"],
"all": true,
Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
"transpile": "tsc",
"build-type-parser": "tspegjs -o test/utils/typeStrings/typeString_parser.ts --custom-header-file test/utils/typeStrings/typeString_parser_header.ts --cache test/utils/typeStrings/typeString_grammar.pegjs",
"build-file-level-definitions-parser": "tspegjs -o src/compile/inference/file_level_definitions_parser.ts --custom-header-file src/compile/inference/file_level_definitions_parser_header.ts --cache src/compile/inference/file_level_definitions.pegjs",
"build": "npm run clean && npm run build-file-level-definitions-parser && npm run transpile && chmod u+x dist/bin/compile.js",
"build-comments-parser": "tspegjs -o src/ast/comments/comments_parser.ts --custom-header-file src/ast/comments/comments_parser_header.ts --cache src/ast/comments/comments_grammar.pegjs",
"build": "npm run clean && npm run build-comments-parser && npm run build-file-level-definitions-parser && npm run transpile && chmod u+x dist/bin/compile.js",
"lint": "eslint src/ test/ --ext=ts",
"lint:fix": "eslint src/ test/ --ext=ts --fix",
"test": "npm run build-type-parser && NODE_OPTIONS='--max-old-space-size=2048' nyc mocha",
Expand Down
6 changes: 3 additions & 3 deletions src/ast/ast_node.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { ASTNodeFormatter } from "./ast_node_formatter";
import { ASTContext } from "./ast_reader";
import { parseSourceLocation, SourceLocation } from "./utils";
import { SourceLocation, parseSourceLocation } from "./utils";

export type ASTNodeCallback = (node: ASTNode) => void;
export type ASTNodeSelector = (node: ASTNode) => boolean;
Expand Down Expand Up @@ -328,10 +328,10 @@ export class ASTNode {
*
* In other words, returns corresponding code fragment substring.
*/
extractSourceFragment(source: string): string {
extractSourceFragment(source: Uint8Array): Uint8Array {
const { offset, length } = this.sourceInfo;

return source.substr(offset, length);
return source.slice(offset, offset + length);
}

private createWalker(callback: ASTNodeCallback): ASTNodeCallback {
Expand Down
12 changes: 8 additions & 4 deletions src/ast/ast_reader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ import { ModernConfiguration } from "./modern";
import { DefaultNodePostprocessorList } from "./postprocessing";
import { sequence } from "./utils";

// We store source files as byte arrays since AST src maps are byte-offset
// based.
export type FileMap = Map<string, Uint8Array>;

export interface ASTNodeProcessor<T extends ASTNode> {
process(
reader: ASTReader,
Expand All @@ -14,7 +18,7 @@ export interface ASTNodeProcessor<T extends ASTNode> {
}

export interface ASTNodePostprocessor<T extends ASTNode> {
process(node: T, context: ASTContext, sources?: Map<string, string>): void;
process(node: T, context: ASTContext, sources?: FileMap): void;
isSupportedNode(node: ASTNode): node is T;
}

Expand Down Expand Up @@ -133,15 +137,15 @@ export class ASTPostprocessor {
);
}

processNode(node: ASTNode, context: ASTContext, sources?: Map<string, string>): void {
processNode(node: ASTNode, context: ASTContext, sources?: FileMap): void {
const postprocessors = this.getPostprocessorsForNode(node);

for (const postprocessor of postprocessors) {
postprocessor.process(node, context, sources);
}
}

processContext(context: ASTContext, sources?: Map<string, string>): void {
processContext(context: ASTContext, sources?: FileMap): void {
for (const postprocessor of this.nodePostprocessors) {
for (const node of context.nodes) {
if (postprocessor.isSupportedNode(node)) {
Expand Down Expand Up @@ -185,7 +189,7 @@ export class ASTReader {
*
* @returns An array of `SourceUnit`s for each of the source entries in the input.
*/
read(data: any, kind = ASTKind.Any, sources?: Map<string, string>): SourceUnit[] {
read(data: any, kind = ASTKind.Any, sources?: FileMap): SourceUnit[] {
const entries: Array<[string, any]> = Object.entries(data.sources);
const rootNodeTypeName = "SourceUnit";
const result: SourceUnit[] = [];
Expand Down
35 changes: 35 additions & 0 deletions src/ast/comments/comment.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import { RawCommentKind } from "../constants";

export interface CommentLoc {
start: number;
end: number;
}

export class RawComment {
/**
* Type of comment
*/
kind: RawCommentKind;

/**
* The entire text of the comment include *s and /s
*/
text: string;

/**
* The text of the comment without * and /. I.e. only the actual comment body
*/
internalText: string;

/**
* The location of this comment
*/
loc: CommentLoc;

constructor(kind: RawCommentKind, text: string, internalText: string, loc: CommentLoc) {
this.kind = kind;
this.text = text;
this.internalText = internalText;
this.loc = loc;
}
}
168 changes: 168 additions & 0 deletions src/ast/comments/comments_grammar.pegjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
{
expected;
error;
peg$anyExpectation;
peg$parse__;
}

CommentSoup =
t: (
([^"'/]+ (!("//" / "///" / "/*") "/")?) { return text(); } // non-comment, non-string-literal anything
/ StringLiteral { return text(); } // string literal
/ (c: Comment __ { return c; }) // comment
)* { return t; }

Comment
= BlockComment
/ NatspecLineGroup
/ LineComment


FirstBlockLine = "/*" body: ((!"*/" NonLineTerminator)* { return text(); }) LineTerminator { return body; }
BlockLine = (PrimitiveWhiteSpace* (!"*/" "*"))? body: ((!"*/" NonLineTerminator)* { return text(); }) LineTerminator { return body; }
LastBlockLine = (PrimitiveWhiteSpace* (!"*/" "*"))? body: ((!"*/" NonLineTerminator)* { return text(); }) "*/" { return body; }

MultiLineBlockComment = start: FirstBlockLine inner: BlockLine* last: LastBlockLine {
const isNatSpec = start[0] === "*";

// For NatSpec comments we strip 1 space from each inner line (if present)
// to be compatible with the Solidity compiler's behavior
if (isNatSpec) {
inner = inner.map((l: string) => l.startsWith(" ") ? l.slice(1) : l);
last = last.startsWith(" ") ? last.slice(1) : last;
}

let body = [start, ...inner, last].join("\n")

// for natspec skip the second *
body = isNatSpec ? body.slice(1) : body;

const kind = isNatSpec ? RawCommentKind.BlockNatSpec : RawCommentKind.BlockComment;

return new RawComment(kind, text(), body, mkLoc(location()) )
}

SingleLineBlockComment = "/*" body: ((!"*/" NonLineTerminator)* { return text(); }) "*/" {
const isNatSpec = body[0] === "*";
return new RawComment(
isNatSpec ? RawCommentKind.BlockNatSpec : RawCommentKind.BlockComment,
text(),
isNatSpec ? body.slice(1) : body, // for natspec skip the second *
mkLoc(location())
);
}

BlockComment = MultiLineBlockComment / SingleLineBlockComment

NonLineTerminator =
[^\n\r\u2028\u2029]

LineComment =
"//" body: (NonLineTerminator* { return text(); }) LineTerminator {
return new RawComment(RawCommentKind.SingleLineComment, text(), body, mkLoc(location()));
}

LineNatspec =
PrimitiveWhiteSpace* "///" body: (NonLineTerminator* { return text(); }) LineTerminator {
return body.startsWith(" ") ? body.slice(1) : body;
}

NatspecLineGroup =
bodies: LineNatspec+ {
return new RawComment(RawCommentKind.LineGroupNatSpec, text(), bodies.join("\n"), mkLoc(location()));
}

// ==== White space

PrimitiveWhiteSpace =
"\t"
/ "\v"
/ "\f"
/ " "
/ "\u00A0"
/ "\uFEFF"
/ Zs

// Separator, Space
Zs =
[\u0020\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000]

LineTerminator =
[\n\r\u2028\u2029]

__ =
(PrimitiveWhiteSpace / LineTerminator)*

StringLiteral =
"'" chars: SingleStringChar* "'" { return chars.join(""); }
/ '"' chars: DoubleStringChar* '"' { return chars.join(""); }

AnyChar =
.

DoubleStringChar =
!('"' / "\\" / LineTerminator) AnyChar { return text(); }
/ "\\" sequence: EscapeSequence { return sequence; }
/ LineContinuation

SingleStringChar =
!("'" / "\\" / LineTerminator) AnyChar { return text(); }
/ "\\" sequence: EscapeSequence { return sequence; }
/ LineContinuation

LineContinuation =
"\\" LineTerminatorSequence { return ""; }

EscapeSequence =
CharEscapeSequence
/ "0" !DecDigit { return "\0"; }
/ HexEscapeSequence
/ UnicodeEscapeSequence
/ AnyChar // Allow invalid hex sequences as a fallback

CharEscapeSequence =
SingleEscapeChar
/ NonEscapeChar

SingleEscapeChar =
"'"
/ '"'
/ "\\"
/ "b" { return "\b"; }
/ "f" { return "\f"; }
/ "n" { return "\n"; }
/ "r" { return "\r"; }
/ "t" { return "\t"; }
/ "v" { return "\v"; }

NonEscapeChar =
!(EscapeChar / LineTerminator) AnyChar { return text(); }

HexDigit =
[0-9a-f]i

DecDigit =
[0-9]

EscapeChar =
SingleEscapeChar
/ DecDigit
/ "x"
/ "u"

HexEscapeSequence =
"x" digits:$(HexDigit HexDigit) {
return String.fromCharCode(parseInt(digits, 16));
}

UnicodeEscapeSequence =
"u" digits:$(HexDigit HexDigit HexDigit HexDigit) {
return String.fromCharCode(parseInt(digits, 16));
}

LineTerminatorSequence =
"\n"
/ "\r\n"
/ "\r"
/ "\u2028"
/ "\u2029"
11 changes: 11 additions & 0 deletions src/ast/comments/comments_parser_header.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import { CommentLoc, RawComment } from "./comment";
import { RawCommentKind } from "../constants";

function mkLoc(raw: any): CommentLoc {
return { start: raw.start.offset, end: raw.end.offset };
}

export function parseComments(contents: string): (RawComment | string)[] {
// @ts-ignore
return parse(contents);
}
2 changes: 2 additions & 0 deletions src/ast/comments/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
export * from "./comment";
export { parseComments } from "./comments_parser";
7 changes: 7 additions & 0 deletions src/ast/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,13 @@ export enum TimeUnit {
Years = "years"
}

export enum RawCommentKind {
SingleLineComment = "single_line",
BlockComment = "block_comment",
LineGroupNatSpec = "line_group_natspec",
BlockNatSpec = "block_natspec"
}

export const PossibleDataLocations = new Set<string>(Object.values(DataLocation));

export const PossibleFunctionVisibilities = new Set<string>(Object.values(FunctionVisibility));
Expand Down
1 change: 1 addition & 0 deletions src/ast/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ export * from "./dispatch";
export * from "./definitions";
export * from "./utils";
export * from "./xpath";
export * from "./comments";
Loading

0 comments on commit c11d592

Please sign in to comment.