Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch sources to typed arrays instead of strings #242

Merged
merged 8 commits into from
Jan 11, 2024
Merged
2 changes: 2 additions & 0 deletions .eslintrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
"ignorePatterns": [
"src/compile/inference/file_level_definitions_parser_header.ts",
"src/compile/inference/file_level_definitions_parser.ts",
"src/ast/comments/comments_parser_header.ts",
"src/ast/comments/comments_parser.ts",
"test/utils/typeStrings/typeString_parser_header.ts",
"test/utils/typeStrings/typeString_parser.ts"
]
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ docs
coverage
*.tgz
src/compile/inference/file_level_definitions_parser.ts
src/ast/comments/comments_parser.ts
test/utils/typeStrings/typeString_parser.ts
.idea
3 changes: 2 additions & 1 deletion .nycrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
"**/coverage/**",
"**/docs/**",
"**/.compiler_cache/**",
"src/compile/inference/file_level_definitions_parser*.ts"
"src/compile/inference/file_level_definitions_parser*.ts",
"src/ast/comments/comments_parser*.ts"
],
"reporter": ["lcov", "text-summary"],
"all": true,
Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
"transpile": "tsc",
"build-type-parser": "tspegjs -o test/utils/typeStrings/typeString_parser.ts --custom-header-file test/utils/typeStrings/typeString_parser_header.ts --cache test/utils/typeStrings/typeString_grammar.pegjs",
"build-file-level-definitions-parser": "tspegjs -o src/compile/inference/file_level_definitions_parser.ts --custom-header-file src/compile/inference/file_level_definitions_parser_header.ts --cache src/compile/inference/file_level_definitions.pegjs",
"build": "npm run clean && npm run build-file-level-definitions-parser && npm run transpile && chmod u+x dist/bin/compile.js",
"build-comments-parser": "tspegjs -o src/ast/comments/comments_parser.ts --custom-header-file src/ast/comments/comments_parser_header.ts --cache src/ast/comments/comments_grammar.pegjs",
"build": "npm run clean && npm run build-comments-parser && npm run build-file-level-definitions-parser && npm run transpile && chmod u+x dist/bin/compile.js",
"lint": "eslint src/ test/ --ext=ts",
"lint:fix": "eslint src/ test/ --ext=ts --fix",
"test": "npm run build-type-parser && NODE_OPTIONS='--max-old-space-size=2048' nyc mocha",
Expand Down
6 changes: 3 additions & 3 deletions src/ast/ast_node.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { ASTNodeFormatter } from "./ast_node_formatter";
import { ASTContext } from "./ast_reader";
import { parseSourceLocation, SourceLocation } from "./utils";
import { SourceLocation, parseSourceLocation } from "./utils";

export type ASTNodeCallback = (node: ASTNode) => void;
export type ASTNodeSelector = (node: ASTNode) => boolean;
Expand Down Expand Up @@ -328,10 +328,10 @@ export class ASTNode {
*
* In other words, returns corresponding code fragment substring.
*/
extractSourceFragment(source: string): string {
extractSourceFragment(source: Uint8Array): Uint8Array {
const { offset, length } = this.sourceInfo;

return source.substr(offset, length);
return source.slice(offset, offset + length);
}

private createWalker(callback: ASTNodeCallback): ASTNodeCallback {
Expand Down
12 changes: 8 additions & 4 deletions src/ast/ast_reader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ import { ModernConfiguration } from "./modern";
import { DefaultNodePostprocessorList } from "./postprocessing";
import { sequence } from "./utils";

// We store source files as byte arrays since AST src maps are byte-offset
// based.
export type FileMap = Map<string, Uint8Array>;

export interface ASTNodeProcessor<T extends ASTNode> {
process(
reader: ASTReader,
Expand All @@ -14,7 +18,7 @@ export interface ASTNodeProcessor<T extends ASTNode> {
}

export interface ASTNodePostprocessor<T extends ASTNode> {
process(node: T, context: ASTContext, sources?: Map<string, string>): void;
process(node: T, context: ASTContext, sources?: FileMap): void;
isSupportedNode(node: ASTNode): node is T;
}

Expand Down Expand Up @@ -133,15 +137,15 @@ export class ASTPostprocessor {
);
}

processNode(node: ASTNode, context: ASTContext, sources?: Map<string, string>): void {
processNode(node: ASTNode, context: ASTContext, sources?: FileMap): void {
const postprocessors = this.getPostprocessorsForNode(node);

for (const postprocessor of postprocessors) {
postprocessor.process(node, context, sources);
}
}

processContext(context: ASTContext, sources?: Map<string, string>): void {
processContext(context: ASTContext, sources?: FileMap): void {
for (const postprocessor of this.nodePostprocessors) {
for (const node of context.nodes) {
if (postprocessor.isSupportedNode(node)) {
Expand Down Expand Up @@ -185,7 +189,7 @@ export class ASTReader {
*
* @returns An array of `SourceUnit`s for each of the source entries in the input.
*/
read(data: any, kind = ASTKind.Any, sources?: Map<string, string>): SourceUnit[] {
read(data: any, kind = ASTKind.Any, sources?: FileMap): SourceUnit[] {
const entries: Array<[string, any]> = Object.entries(data.sources);
const rootNodeTypeName = "SourceUnit";
const result: SourceUnit[] = [];
Expand Down
35 changes: 35 additions & 0 deletions src/ast/comments/comment.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import { RawCommentKind } from "../constants";

export interface CommentLoc {
start: number;
end: number;
}

export class RawComment {
/**
* Type of comment
*/
kind: RawCommentKind;

/**
* The entire text of the comment include *s and /s
*/
text: string;

/**
* The text of the comment without * and /. I.e. only the actual comment body
*/
internalText: string;

/**
* The location of this comment
*/
loc: CommentLoc;

constructor(kind: RawCommentKind, text: string, internalText: string, loc: CommentLoc) {
this.kind = kind;
this.text = text;
this.internalText = internalText;
this.loc = loc;
}
}
168 changes: 168 additions & 0 deletions src/ast/comments/comments_grammar.pegjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
{
expected;
error;
peg$anyExpectation;
peg$parse__;
}

CommentSoup =
t: (
([^"'/]+ (!("//" / "///" / "/*") "/")?) { return text(); } // non-comment, non-string-literal anything
/ StringLiteral { return text(); } // string literal
/ (c: Comment __ { return c; }) // comment
)* { return t; }

Comment
= BlockComment
/ NatspecLineGroup
/ LineComment


FirstBlockLine = "/*" body: ((!"*/" NonLineTerminator)* { return text(); }) LineTerminator { return body; }
BlockLine = (PrimitiveWhiteSpace* (!"*/" "*"))? body: ((!"*/" NonLineTerminator)* { return text(); }) LineTerminator { return body; }
LastBlockLine = (PrimitiveWhiteSpace* (!"*/" "*"))? body: ((!"*/" NonLineTerminator)* { return text(); }) "*/" { return body; }

MultiLineBlockComment = start: FirstBlockLine inner: BlockLine* last: LastBlockLine {
const isNatSpec = start[0] === "*";

// For NatSpec comments we strip 1 space from each inner line (if present)
// to be compatible with the Solidity compiler's behavior
if (isNatSpec) {
inner = inner.map((l: string) => l.startsWith(" ") ? l.slice(1) : l);
last = last.startsWith(" ") ? last.slice(1) : last;
}

let body = [start, ...inner, last].join("\n")

// for natspec skip the second *
body = isNatSpec ? body.slice(1) : body;

const kind = isNatSpec ? RawCommentKind.BlockNatSpec : RawCommentKind.BlockComment;

return new RawComment(kind, text(), body, mkLoc(location()) )
}

SingleLineBlockComment = "/*" body: ((!"*/" NonLineTerminator)* { return text(); }) "*/" {
const isNatSpec = body[0] === "*";
return new RawComment(
isNatSpec ? RawCommentKind.BlockNatSpec : RawCommentKind.BlockComment,
text(),
isNatSpec ? body.slice(1) : body, // for natspec skip the second *
mkLoc(location())
);
}

BlockComment = MultiLineBlockComment / SingleLineBlockComment

NonLineTerminator =
[^\n\r\u2028\u2029]

LineComment =
"//" body: (NonLineTerminator* { return text(); }) LineTerminator {
return new RawComment(RawCommentKind.SingleLineComment, text(), body, mkLoc(location()));
}

LineNatspec =
PrimitiveWhiteSpace* "///" body: (NonLineTerminator* { return text(); }) LineTerminator {
return body.startsWith(" ") ? body.slice(1) : body;
}

NatspecLineGroup =
bodies: LineNatspec+ {
return new RawComment(RawCommentKind.LineGroupNatSpec, text(), bodies.join("\n"), mkLoc(location()));
}

// ==== White space

PrimitiveWhiteSpace =
"\t"
/ "\v"
/ "\f"
/ " "
/ "\u00A0"
/ "\uFEFF"
/ Zs

// Separator, Space
Zs =
[\u0020\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000]

LineTerminator =
[\n\r\u2028\u2029]

__ =
(PrimitiveWhiteSpace / LineTerminator)*

StringLiteral =
"'" chars: SingleStringChar* "'" { return chars.join(""); }
/ '"' chars: DoubleStringChar* '"' { return chars.join(""); }

AnyChar =
.

DoubleStringChar =
!('"' / "\\" / LineTerminator) AnyChar { return text(); }
/ "\\" sequence: EscapeSequence { return sequence; }
/ LineContinuation

SingleStringChar =
!("'" / "\\" / LineTerminator) AnyChar { return text(); }
/ "\\" sequence: EscapeSequence { return sequence; }
/ LineContinuation

LineContinuation =
"\\" LineTerminatorSequence { return ""; }

EscapeSequence =
CharEscapeSequence
/ "0" !DecDigit { return "\0"; }
/ HexEscapeSequence
/ UnicodeEscapeSequence
/ AnyChar // Allow invalid hex sequences as a fallback

CharEscapeSequence =
SingleEscapeChar
/ NonEscapeChar

SingleEscapeChar =
"'"
/ '"'
/ "\\"
/ "b" { return "\b"; }
/ "f" { return "\f"; }
/ "n" { return "\n"; }
/ "r" { return "\r"; }
/ "t" { return "\t"; }
/ "v" { return "\v"; }

NonEscapeChar =
!(EscapeChar / LineTerminator) AnyChar { return text(); }

HexDigit =
[0-9a-f]i

DecDigit =
[0-9]

EscapeChar =
SingleEscapeChar
/ DecDigit
/ "x"
/ "u"

HexEscapeSequence =
"x" digits:$(HexDigit HexDigit) {
return String.fromCharCode(parseInt(digits, 16));
}

UnicodeEscapeSequence =
"u" digits:$(HexDigit HexDigit HexDigit HexDigit) {
return String.fromCharCode(parseInt(digits, 16));
}

LineTerminatorSequence =
"\n"
/ "\r\n"
/ "\r"
/ "\u2028"
/ "\u2029"
11 changes: 11 additions & 0 deletions src/ast/comments/comments_parser_header.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import { CommentLoc, RawComment } from "./comment";
import { RawCommentKind } from "../constants";

function mkLoc(raw: any): CommentLoc {
return { start: raw.start.offset, end: raw.end.offset };
}

export function parseComments(contents: string): (RawComment | string)[] {
// @ts-ignore
return parse(contents);
}
2 changes: 2 additions & 0 deletions src/ast/comments/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
export * from "./comment";
export { parseComments } from "./comments_parser";
7 changes: 7 additions & 0 deletions src/ast/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,13 @@ export enum TimeUnit {
Years = "years"
}

export enum RawCommentKind {
SingleLineComment = "single_line",
BlockComment = "block_comment",
LineGroupNatSpec = "line_group_natspec",
BlockNatSpec = "block_natspec"
}

export const PossibleDataLocations = new Set<string>(Object.values(DataLocation));

export const PossibleFunctionVisibilities = new Set<string>(Object.values(FunctionVisibility));
Expand Down
1 change: 1 addition & 0 deletions src/ast/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ export * from "./dispatch";
export * from "./definitions";
export * from "./utils";
export * from "./xpath";
export * from "./comments";
Loading