From 246f4a7e089a3f8ff6dff79a52b627d4fa68c1c5 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 12 Mar 2024 11:11:36 -0400 Subject: [PATCH] feat[tool]: add `node_id` map to source map (#3811) this commit adds a new, AST-based map to the source map which links program counters (pcs) directly back to the AST output. this should improve the ability of third parties to implement source code integrations (debuggers, storage map tracers, etc). refactors: - get rid of `vyper.codegen.core.getpos()` - rename `IRnode.source_pos` to `IRnode.ast_source` - refactor a couple places in codegen which were passing `IRnode`s to the `Expr` constructor - rewrote the source map compression routine a bit. it might have gotten broken but at this point the compressed source map does not seem widely used. --- docs/compiling-a-contract.rst | 5 +- .../unit/cli/vyper_json/test_compile_json.py | 26 ++++-- .../cli/vyper_json/test_output_selection.py | 10 +++ tests/unit/compiler/test_source_map.py | 58 +++++++++--- vyper/ast/nodes.py | 2 +- vyper/ast/nodes.pyi | 4 + vyper/builtins/_utils.py | 8 +- vyper/cli/vyper_json.py | 9 +- vyper/codegen/core.py | 9 -- vyper/codegen/expr.py | 43 +++++---- .../function_definitions/external_function.py | 12 +-- vyper/codegen/ir_node.py | 26 +++--- vyper/codegen/stmt.py | 41 +++------ vyper/compiler/output.py | 88 ++++++++++++------- vyper/ir/compile_ir.py | 54 ++++++------ vyper/ir/optimizer.py | 12 +-- vyper/semantics/analysis/getters.py | 3 +- 17 files changed, 239 insertions(+), 171 deletions(-) diff --git a/docs/compiling-a-contract.rst b/docs/compiling-a-contract.rst index c4e8bad636..83571203e8 100644 --- a/docs/compiling-a-contract.rst +++ b/docs/compiling-a-contract.rst @@ -275,11 +275,14 @@ The following example describes the expected input format of ``vyper-json``. Com // evm.bytecode.opcodes - Opcodes list // evm.deployedBytecode.object - Deployed bytecode object // evm.deployedBytecode.opcodes - Deployed opcodes list - // evm.deployedBytecode.sourceMap - Deployed source mapping (useful for debugging) + // evm.deployedBytecode.sourceMap - Solidity-style source mapping + // evm.deployedBytecode.sourceMapFull - Deployed source mapping (useful for debugging) // evm.methodIdentifiers - The list of function hashes // // Using `evm`, `evm.bytecode`, etc. will select every target part of that output. // Additionally, `*` can be used as a wildcard to request everything. + // Note that the sourceMapFull.pc_ast_map is the recommended source map to use; + // the other types are included for legacy and compatibility reasons. // "outputSelection": { "*": ["evm.bytecode", "abi"], // Enable the abi and bytecode outputs for every single contract diff --git a/tests/unit/cli/vyper_json/test_compile_json.py b/tests/unit/cli/vyper_json/test_compile_json.py index e5f7384068..4fe2111f43 100644 --- a/tests/unit/cli/vyper_json/test_compile_json.py +++ b/tests/unit/cli/vyper_json/test_compile_json.py @@ -151,7 +151,11 @@ def test_compile_json(input_json, input_bundle): for source_id, contract_name in [(0, "foo"), (2, "library"), (3, "bar")]: path = f"contracts/{contract_name}.vy" data = compile_code_results[path] - assert output_json["sources"][path] == {"id": source_id, "ast": data["ast_dict"]["ast"]} + assert output_json["sources"][path] == { + "id": source_id, + "ast": data["ast_dict"]["ast"], + "annotated_ast": data["annotated_ast_dict"]["ast"], + } assert output_json["contracts"][path][contract_name] == { "abi": data["abi"], "devdoc": data["devdoc"], @@ -260,15 +264,25 @@ def test_exc_handler_to_dict_compiler(input_json): def test_source_ids_increment(input_json): - input_json["settings"]["outputSelection"] = {"*": ["evm.deployedBytecode.sourceMap"]} + input_json["settings"]["outputSelection"] = {"*": ["ast", "evm.deployedBytecode.sourceMapFull"]} result = compile_json(input_json) def get(filename, contractname): - return result["contracts"][filename][contractname]["evm"]["deployedBytecode"]["sourceMap"] + ast = result["sources"][filename]["ast"] + ret = ast["source_id"] + + # grab it via source map to sanity check + contract_info = result["contracts"][filename][contractname]["evm"] + pc_ast_map = contract_info["deployedBytecode"]["sourceMapFull"]["pc_ast_map"] + pc_item = next(iter(pc_ast_map.values())) + source_id, node_id = pc_item + assert ret == source_id + + return ret - assert get("contracts/foo.vy", "foo").startswith("-1:-1:0") - assert get("contracts/library.vy", "library").startswith("-1:-1:2") - assert get("contracts/bar.vy", "bar").startswith("-1:-1:3") + assert get("contracts/foo.vy", "foo") == 0 + assert get("contracts/library.vy", "library") == 2 + assert get("contracts/bar.vy", "bar") == 3 def test_relative_import_paths(input_json): diff --git a/tests/unit/cli/vyper_json/test_output_selection.py b/tests/unit/cli/vyper_json/test_output_selection.py index 5383190a66..f7fbfe673c 100644 --- a/tests/unit/cli/vyper_json/test_output_selection.py +++ b/tests/unit/cli/vyper_json/test_output_selection.py @@ -45,6 +45,16 @@ def test_star(): assert result == {PurePath("foo.vy"): expected, PurePath("bar.vy"): expected} +def test_ast(): + input_json = { + "sources": {"foo.vy": ""}, + "settings": {"outputSelection": {"foo.vy": ["ast", "annotated_ast"]}}, + } + expected = sorted([TRANSLATE_MAP[k] for k in ["ast", "annotated_ast"]]) + result = get_output_formats(input_json) + assert result == {PurePath("foo.vy"): expected} + + def test_evm(): input_json = { "sources": {"foo.vy": ""}, diff --git a/tests/unit/compiler/test_source_map.py b/tests/unit/compiler/test_source_map.py index 5b478dd2aa..04bd141185 100644 --- a/tests/unit/compiler/test_source_map.py +++ b/tests/unit/compiler/test_source_map.py @@ -1,14 +1,18 @@ +from collections import namedtuple + from vyper.compiler import compile_code from vyper.compiler.output import _compress_source_map from vyper.compiler.utils import expand_source_map TEST_CODE = """ +x: public(uint256) + @internal def _baz(a: int128) -> int128: b: int128 = a for i: int128 in range(2, 5): b *= i - if b > 31337: + if b > 31336 + 1: break return b @@ -82,22 +86,19 @@ def update_foo(): def test_compress_source_map(): - code = """ -@external -def foo() -> uint256: - return 42 - """ + # mock the required VyperNode fields in compress_source_map + # fake_node = namedtuple("fake_node", ("lineno", "col_offset", "end_lineno", "end_col_offset")) + fake_node = namedtuple("fake_node", ["src"]) + compressed = _compress_source_map( - code, {"0": None, "2": (2, 0, 4, 13), "3": (2, 0, 2, 8), "5": (2, 0, 2, 8)}, {"3": "o"}, 2 + {2: fake_node("-1:-1:-1"), 3: fake_node("1:45"), 5: fake_node("45:49")}, {3: "o"}, 6 ) - assert compressed == "-1:-1:2:-;1:45;:8::o;" + assert compressed == "-1:-1:-1;-1:-1:-1;-1:-1:-1;1:45:o;-1:-1:-1;45:49" def test_expand_source_map(): - compressed = "-1:-1:0:-;;13:42:1;:21;::0:o;:::-;1::1;" + compressed = "13:42:1;:21;::0:o;:::-;1::1;" expanded = [ - [-1, -1, 0, "-"], - [-1, -1, 0, None], [13, 42, 1, None], [13, 21, 1, None], [13, 21, 0, "o"], @@ -105,3 +106,38 @@ def test_expand_source_map(): [1, 21, 1, None], ] assert expand_source_map(compressed) == expanded + + +def _construct_node_id_map(ast_struct): + if isinstance(ast_struct, dict): + ret = {} + if "node_id" in ast_struct: + ret[ast_struct["node_id"]] = ast_struct + for item in ast_struct.values(): + ret.update(_construct_node_id_map(item)) + return ret + + elif isinstance(ast_struct, list): + ret = {} + for item in ast_struct: + ret.update(_construct_node_id_map(item)) + return ret + + else: + return {} + + +def test_node_id_map(): + code = TEST_CODE + out = compile_code(code, output_formats=["annotated_ast_dict", "source_map", "ir"]) + assert out["source_map"]["pc_ast_map_item_keys"] == ("source_id", "node_id") + + pc_ast_map = out["source_map"]["pc_ast_map"] + + ast_node_map = _construct_node_id_map(out["annotated_ast_dict"]) + + for pc, (source_id, node_id) in pc_ast_map.items(): + assert isinstance(pc, int), pc + assert isinstance(source_id, int), source_id + assert isinstance(node_id, int), node_id + assert node_id in ast_node_map diff --git a/vyper/ast/nodes.py b/vyper/ast/nodes.py index 2ca199bd7e..02c7e15686 100644 --- a/vyper/ast/nodes.py +++ b/vyper/ast/nodes.py @@ -146,7 +146,7 @@ def _to_node(obj, parent): if isinstance(obj, VyperNode): # if object is already a vyper node, make sure the parent is set correctly # and fix any missing source offsets - obj._parent = parent + obj.set_parent(parent) for field_name in NODE_SRC_ATTRIBUTES: if getattr(obj, field_name) is None: setattr(obj, field_name, getattr(parent, field_name, None)) diff --git a/vyper/ast/nodes.pyi b/vyper/ast/nodes.pyi index 4ebb61e76e..f673bb765c 100644 --- a/vyper/ast/nodes.pyi +++ b/vyper/ast/nodes.pyi @@ -17,6 +17,10 @@ def get_node( class VyperNode: full_source_code: str = ... node_source_code: str = ... + lineno: int = ... + col_offset: int = ... + end_lineno: int = ... + end_col_offset: int = ... _metadata: dict = ... _original_node: Optional[VyperNode] = ... def __init__(self, parent: Optional[VyperNode] = ..., **kwargs: Any) -> None: ... diff --git a/vyper/builtins/_utils.py b/vyper/builtins/_utils.py index 3fad225b48..0ee7ecd0b7 100644 --- a/vyper/builtins/_utils.py +++ b/vyper/builtins/_utils.py @@ -7,10 +7,10 @@ from vyper.semantics.types.module import ModuleT -def _strip_source_pos(ir_node): - ir_node.source_pos = None +def _strip_ast_source(ir_node): + ir_node.ast_source = None for x in ir_node.args: - _strip_source_pos(x) + _strip_ast_source(x) def generate_inline_function(code, variables, variables_2, memory_allocator): @@ -38,5 +38,5 @@ def generate_inline_function(code, variables, variables_2, memory_allocator): # NOTE if we ever use this for inlining user-code, it would make # sense to fix the offsets of the source positions in the generated # code instead of stripping them. - _strip_source_pos(generated_ir) + _strip_ast_source(generated_ir) return new_context, generated_ir diff --git a/vyper/cli/vyper_json.py b/vyper/cli/vyper_json.py index 032d7ebe64..21073cabeb 100755 --- a/vyper/cli/vyper_json.py +++ b/vyper/cli/vyper_json.py @@ -17,6 +17,7 @@ TRANSLATE_MAP = { "abi": "abi", "ast": "ast_dict", + "annotated_ast": "annotated_ast_dict", "devdoc": "devdoc", "evm.methodIdentifiers": "method_identifiers", "evm.bytecode.object": "bytecode", @@ -313,8 +314,12 @@ def format_to_output_dict(compiler_data: dict) -> dict: for path, data in compiler_data.items(): path = str(path) # Path breaks json serializability output_dict["sources"][path] = {"id": data["source_id"]} - if "ast_dict" in data: - output_dict["sources"][path]["ast"] = data["ast_dict"]["ast"] + + for k in ("ast_dict", "annotated_ast_dict"): + if k in data: + # un-translate the key + k2 = k.removesuffix("_dict") + output_dict["sources"][path][k2] = data[k]["ast"] name = PurePath(path).stem output_dict["contracts"][path] = {name: {}} diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index ecf05d1a49..2cb2876088 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -432,15 +432,6 @@ def pop_dyn_array(darray_node, return_popped_item): return IRnode.from_list(b1.resolve(b2.resolve(ret)), typ=typ, location=location) -def getpos(node): - return ( - node.lineno, - node.col_offset, - getattr(node, "end_lineno", None), - getattr(node, "end_col_offset", None), - ) - - # add an offset to a pointer, keeping location and encoding info def add_ofst(ptr, ofst): ret = ["add", ptr, ofst] diff --git a/vyper/codegen/expr.py b/vyper/codegen/expr.py index d0c5154cbe..7c39a4f5cf 100644 --- a/vyper/codegen/expr.py +++ b/vyper/codegen/expr.py @@ -13,7 +13,6 @@ ensure_in_memory, get_dyn_array_count, get_element_ptr, - getpos, is_array_like, is_bytes_m_type, is_flag_type, @@ -72,13 +71,6 @@ class Expr: # TODO: Once other refactors are made reevaluate all inline imports def __init__(self, node, context, is_stmt=False): - if isinstance(node, IRnode): - # this is a kludge for parse_AugAssign to pass in IRnodes - # directly. - # TODO fixme! - self.ir_node = node - return - assert isinstance(node, vy_ast.VyperNode) if node.has_folded_value: node = node.get_folded_value() @@ -94,7 +86,7 @@ def __init__(self, node, context, is_stmt=False): assert isinstance(self.ir_node, IRnode), self.ir_node self.ir_node.annotation = self.expr.get("node_source_code") - self.ir_node.source_pos = getpos(self.expr) + self.ir_node.ast_source = self.expr def parse_Int(self): typ = self.expr._metadata["type"] @@ -382,7 +374,14 @@ def parse_BinOp(self): left = Expr.parse_value_expr(self.expr.left, self.context) right = Expr.parse_value_expr(self.expr.right, self.context) - is_shift_op = isinstance(self.expr.op, (vy_ast.LShift, vy_ast.RShift)) + return Expr.handle_binop(self.expr.op, left, right, self.context) + + @classmethod + def handle_binop(cls, op, left, right, context): + assert not left.is_pointer + assert not right.is_pointer + + is_shift_op = isinstance(op, (vy_ast.LShift, vy_ast.RShift)) if is_shift_op: assert is_numeric_type(left.typ) @@ -391,25 +390,25 @@ def parse_BinOp(self): # Sanity check - ensure that we aren't dealing with different types # This should be unreachable due to the type check pass if left.typ != right.typ: - raise TypeCheckFailure(f"unreachable, {left.typ} != {right.typ}", self.expr) + raise TypeCheckFailure(f"unreachable: {left.typ} != {right.typ}") assert is_numeric_type(left.typ) or is_flag_type(left.typ) out_typ = left.typ - if isinstance(self.expr.op, vy_ast.BitAnd): + if isinstance(op, vy_ast.BitAnd): return IRnode.from_list(["and", left, right], typ=out_typ) - if isinstance(self.expr.op, vy_ast.BitOr): + if isinstance(op, vy_ast.BitOr): return IRnode.from_list(["or", left, right], typ=out_typ) - if isinstance(self.expr.op, vy_ast.BitXor): + if isinstance(op, vy_ast.BitXor): return IRnode.from_list(["xor", left, right], typ=out_typ) - if isinstance(self.expr.op, vy_ast.LShift): + if isinstance(op, vy_ast.LShift): new_typ = left.typ if new_typ.bits != 256: # TODO implement me. ["and", 2**bits - 1, shl(right, left)] raise TypeCheckFailure("unreachable") return IRnode.from_list(shl(right, left), typ=new_typ) - if isinstance(self.expr.op, vy_ast.RShift): + if isinstance(op, vy_ast.RShift): new_typ = left.typ if new_typ.bits != 256: # TODO implement me. promote_signed_int(op(right, left), bits) @@ -421,17 +420,17 @@ def parse_BinOp(self): assert is_numeric_type(left.typ) with left.cache_when_complex("x") as (b1, x), right.cache_when_complex("y") as (b2, y): - if isinstance(self.expr.op, vy_ast.Add): + if isinstance(op, vy_ast.Add): ret = arithmetic.safe_add(x, y) - elif isinstance(self.expr.op, vy_ast.Sub): + elif isinstance(op, vy_ast.Sub): ret = arithmetic.safe_sub(x, y) - elif isinstance(self.expr.op, vy_ast.Mult): + elif isinstance(op, vy_ast.Mult): ret = arithmetic.safe_mul(x, y) - elif isinstance(self.expr.op, (vy_ast.Div, vy_ast.FloorDiv)): + elif isinstance(op, (vy_ast.Div, vy_ast.FloorDiv)): ret = arithmetic.safe_div(x, y) - elif isinstance(self.expr.op, vy_ast.Mod): + elif isinstance(op, vy_ast.Mod): ret = arithmetic.safe_mod(x, y) - elif isinstance(self.expr.op, vy_ast.Pow): + elif isinstance(op, vy_ast.Pow): ret = arithmetic.safe_pow(x, y) else: # pragma: nocover raise CompilerPanic("Unreachable") diff --git a/vyper/codegen/function_definitions/external_function.py b/vyper/codegen/function_definitions/external_function.py index b380eab2ce..6f783bb9c5 100644 --- a/vyper/codegen/function_definitions/external_function.py +++ b/vyper/codegen/function_definitions/external_function.py @@ -1,6 +1,6 @@ from vyper.codegen.abi_encoder import abi_encoding_matches_vyper from vyper.codegen.context import Context, VariableRecord -from vyper.codegen.core import get_element_ptr, getpos, make_setter, needs_clamp +from vyper.codegen.core import get_element_ptr, make_setter, needs_clamp from vyper.codegen.expr import Expr from vyper.codegen.function_definitions.common import ( EntryPointInfo, @@ -39,7 +39,7 @@ def _register_function_args(func_t: ContractFunctionT, context: Context) -> list dst = IRnode(p, typ=arg.typ, location=MEMORY) copy_arg = make_setter(dst, arg_ir) - copy_arg.source_pos = getpos(arg.ast_source) + copy_arg.ast_source = arg.ast_source ret.append(copy_arg) else: assert abi_encoding_matches_vyper(arg.typ) @@ -101,18 +101,18 @@ def handler_for(calldata_kwargs, default_kwargs): rhs = get_element_ptr(calldata_kwargs_ofst, k, array_bounds_check=False) copy_arg = make_setter(lhs, rhs) - copy_arg.source_pos = getpos(arg_meta.ast_source) + copy_arg.ast_source = arg_meta.ast_source ret.append(copy_arg) for x in default_kwargs: dst = context.lookup_var(x.name).pos lhs = IRnode(dst, location=MEMORY, typ=x.typ) - lhs.source_pos = getpos(x.ast_source) + lhs.ast_source = x.ast_source kw_ast_val = func_t.default_values[x.name] # e.g. `3` in x: int = 3 rhs = Expr(kw_ast_val, context).ir_node copy_arg = make_setter(lhs, rhs) - copy_arg.source_pos = getpos(x.ast_source) + copy_arg.ast_source = x.ast_source ret.append(copy_arg) ret.append(["goto", func_t._ir_info.external_function_base_entry_label]) @@ -210,7 +210,7 @@ def generate_ir_for_external_function(code, compilation_target): # the ir which comprises the main body of the function, # besides any kwarg handling - func_common_ir = IRnode.from_list(["seq", body, exit_], source_pos=getpos(code)) + func_common_ir = IRnode.from_list(["seq", body, exit_], ast_source=code) tag_frame_info(func_t, context) diff --git a/vyper/codegen/ir_node.py b/vyper/codegen/ir_node.py index 1df2932da1..14e396ff74 100644 --- a/vyper/codegen/ir_node.py +++ b/vyper/codegen/ir_node.py @@ -3,8 +3,9 @@ import re from enum import Enum, auto from functools import cached_property -from typing import Any, List, Optional, Tuple, Union +from typing import Any, List, Optional, Union +import vyper.ast as vy_ast from vyper.compiler.settings import VYPER_COLOR_OUTPUT from vyper.evm.address_space import AddrSpace from vyper.evm.opcodes import get_ir_opcodes @@ -144,7 +145,7 @@ def __init__( args: List["IRnode"] = None, typ: VyperType = None, location: Optional[AddrSpace] = None, - source_pos: Optional[Tuple[int, int]] = None, + ast_source: Optional[vy_ast.VyperNode] = None, annotation: Optional[str] = None, error_msg: Optional[str] = None, mutable: bool = True, @@ -162,7 +163,7 @@ def __init__( assert isinstance(typ, VyperType) or typ is None, repr(typ) self.typ = typ self.location = location - self.source_pos = source_pos + self.ast_source = ast_source self.error_msg = error_msg self.annotation = annotation self.mutable = mutable @@ -478,11 +479,8 @@ def __eq__(self, other): and self.args == other.args and self.typ == other.typ and self.location == other.location - and self.source_pos == other.source_pos - and self.annotation == other.annotation and self.mutable == other.mutable and self.add_gas_estimate == other.add_gas_estimate - and self.valency == other.valency ) @property @@ -516,13 +514,13 @@ def repr(self) -> str: if self.repr_show_gas and self.gas: o += OKBLUE + "{" + ENDC + str(self.gas) + OKBLUE + "} " + ENDC # add gas for info. o += "[" + self._colorise_keywords(self.repr_value) - prev_lineno = self.source_pos[0] if self.source_pos else None + prev_lineno = self.ast_source.lineno if self.ast_source else None arg_lineno = None annotated = False has_inner_newlines = False for arg in self.args: o += ",\n " - arg_lineno = arg.source_pos[0] if arg.source_pos else None + arg_lineno = arg.ast_source.lineno if arg.ast_source else None if arg_lineno is not None and arg_lineno != prev_lineno and self.value in ("seq", "if"): o += f"# Line {(arg_lineno)}\n " prev_lineno = arg_lineno @@ -553,7 +551,7 @@ def from_list( obj: Any, typ: VyperType = None, location: Optional[AddrSpace] = None, - source_pos: Optional[Tuple[int, int]] = None, + ast_source: Optional[vy_ast.VyperNode] = None, annotation: Optional[str] = None, error_msg: Optional[str] = None, mutable: bool = True, @@ -570,8 +568,8 @@ def from_list( # the input gets modified. CC 20191121. if typ is not None: obj.typ = typ - if obj.source_pos is None: - obj.source_pos = source_pos + if obj.ast_source is None: + obj.ast_source = ast_source if obj.location is None: obj.location = location if obj.encoding is None: @@ -589,7 +587,7 @@ def from_list( annotation=annotation, mutable=mutable, add_gas_estimate=add_gas_estimate, - source_pos=source_pos, + ast_source=ast_source, encoding=encoding, error_msg=error_msg, is_self_call=is_self_call, @@ -598,12 +596,12 @@ def from_list( else: return cls( obj[0], - [cls.from_list(o, source_pos=source_pos) for o in obj[1:]], + [cls.from_list(o, ast_source=ast_source) for o in obj[1:]], typ, location=location, annotation=annotation, mutable=mutable, - source_pos=source_pos, + ast_source=ast_source, add_gas_estimate=add_gas_estimate, encoding=encoding, error_msg=error_msg, diff --git a/vyper/codegen/stmt.py b/vyper/codegen/stmt.py index f658dc92b9..1da31d3bda 100644 --- a/vyper/codegen/stmt.py +++ b/vyper/codegen/stmt.py @@ -9,7 +9,6 @@ clamp_le, get_dyn_array_count, get_element_ptr, - getpos, make_byte_array_copier, make_setter, zero_pad, @@ -42,7 +41,7 @@ def __init__(self, node: vy_ast.VyperNode, context: Context) -> None: assert isinstance(self.ir_node, IRnode), self.ir_node self.ir_node.annotation = self.stmt.get("node_source_code") - self.ir_node.source_pos = getpos(self.stmt) + self.ir_node.ast_source = self.stmt def parse_Expr(self): return Expr(self.stmt.value, self.context, is_stmt=True).ir_node @@ -197,20 +196,19 @@ def _parse_For_range(self): assert "type" in self.stmt.target.target._metadata target_type = self.stmt.target.target._metadata["type"] - # Get arg0 range_call: vy_ast.Call = self.stmt.iter assert isinstance(range_call, vy_ast.Call) - args_len = len(range_call.args) - if args_len == 1: - arg0, arg1 = (IRnode.from_list(0, typ=target_type), range_call.args[0]) - elif args_len == 2: - arg0, arg1 = range_call.args - else: # pragma: nocover - raise TypeCheckFailure("unreachable: bad # of arguments to range()") with self.context.range_scope(): - start = Expr.parse_value_expr(arg0, self.context) - end = Expr.parse_value_expr(arg1, self.context) + args = [Expr.parse_value_expr(arg, self.context) for arg in range_call.args] + if len(args) == 1: + start = IRnode.from_list(0, typ=target_type) + end = args[0] + elif len(args) == 2: + start, end = args + else: # pragma: nocover + raise TypeCheckFailure("unreachable") + kwargs = { s.arg: Expr.parse_value_expr(s.value, self.context) for s in range_call.keywords } @@ -300,8 +298,8 @@ def _parse_For_list(self): def parse_AugAssign(self): target = self._get_target(self.stmt.target) + right = Expr.parse_value_expr(self.stmt.value, self.context) - sub = Expr.parse_value_expr(self.stmt.value, self.context) if not target.typ._is_prim_word: # because of this check, we do not need to check for # make_setter references lhs<->rhs as in parse_Assign - @@ -309,20 +307,9 @@ def parse_AugAssign(self): raise TypeCheckFailure("unreachable") with target.cache_when_complex("_loc") as (b, target): - rhs = Expr.parse_value_expr( - vy_ast.BinOp( - left=IRnode.from_list(LOAD(target), typ=target.typ), - right=sub, - op=self.stmt.op, - lineno=self.stmt.lineno, - col_offset=self.stmt.col_offset, - end_lineno=self.stmt.end_lineno, - end_col_offset=self.stmt.end_col_offset, - node_source_code=self.stmt.get("node_source_code"), - ), - self.context, - ) - return b.resolve(STORE(target, rhs)) + left = IRnode.from_list(LOAD(target), typ=target.typ) + new_val = Expr.handle_binop(self.stmt.op, left, right, self.context) + return b.resolve(STORE(target, new_val)) def parse_Continue(self): return IRnode.from_list("continue") diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index 707c99291b..de8e34370d 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -1,9 +1,7 @@ import warnings -from collections import OrderedDict, deque +from collections import deque from pathlib import PurePath -import asttokens - from vyper.ast import ast_to_dict, parse_natspec from vyper.codegen.ir_node import IRnode from vyper.compiler.phases import CompilerData @@ -237,46 +235,72 @@ def _build_asm(asm_list): return output_string -def build_source_map_output(compiler_data: CompilerData) -> OrderedDict: - _, line_number_map = compile_ir.assembly_to_evm( - compiler_data.assembly_runtime, insert_compiler_metadata=False - ) - # Sort line_number_map - out = OrderedDict() - for k in sorted(line_number_map.keys()): - out[k] = line_number_map[k] +def _build_node_identifier(ast_node): + assert ast_node.module_node is not None, type(ast_node) + return (ast_node.module_node.source_id, ast_node.node_id) - out["pc_pos_map_compressed"] = _compress_source_map( - compiler_data.source_code, out["pc_pos_map"], out["pc_jump_map"], compiler_data.source_id + +def build_source_map_output(compiler_data: CompilerData) -> dict: + """ + Generate source map output in various formats. Note that integrations + are encouraged to use pc_ast_map since the information it provides is + a superset of the other formats, and the other types are included + for legacy reasons. + """ + bytecode, pc_maps = compile_ir.assembly_to_evm( + compiler_data.assembly_runtime, insert_compiler_metadata=False ) - out["pc_pos_map"] = dict((k, v) for k, v in out["pc_pos_map"].items() if v) + # sort the pc maps alphabetically + # CMC 2024-03-09 is this really necessary? + out = {} + for k in sorted(pc_maps.keys()): + out[k] = pc_maps[k] + + ast_map = out.pop("pc_raw_ast_map") + + assert isinstance(ast_map, dict) # lint + if 0 not in ast_map: + # tag it with source id + ast_map[0] = compiler_data.annotated_vyper_module + + pc_pos_map = {k: compile_ir.getpos(v) for (k, v) in ast_map.items()} + node_id_map = {k: _build_node_identifier(v) for (k, v) in ast_map.items()} + compressed_map = _compress_source_map(ast_map, out["pc_jump_map"], len(bytecode)) + out["pc_pos_map_compressed"] = compressed_map + out["pc_pos_map"] = pc_pos_map + out["pc_ast_map"] = node_id_map + # hint to consumers what the fields in pc_ast_map mean + out["pc_ast_map_item_keys"] = ("source_id", "node_id") return out -def _compress_source_map(code, pos_map, jump_map, source_id): - linenos = asttokens.LineNumbers(code) - ret = [f"-1:-1:{source_id}:-"] - last_pos = [-1, -1, source_id] +# generate a solidity-style source map. this functionality is deprecated +# in favor of pc_ast_map, and may not be maintained to the same level +# as pc_ast_map. +def _compress_source_map(ast_map, jump_map, bytecode_size): + ret = [] - for pc in sorted(pos_map)[1:]: - current_pos = [-1, -1, source_id] - if pos_map[pc]: - current_pos[0] = linenos.line_to_offset(*pos_map[pc][:2]) - current_pos[1] = linenos.line_to_offset(*pos_map[pc][2:]) - current_pos[0] + jump_map = jump_map.copy() + ast_map = ast_map.copy() - if pc in jump_map: - current_pos.append(jump_map[pc]) + for pc in range(bytecode_size): + if pc in ast_map: + ast_node = ast_map.pop(pc) + # ast_node.src conveniently has the current position in + # the correct, compressed format + current_pos = [ast_node.src] + else: + current_pos = ["-1:-1:-1"] - for i in range(2, -1, -1): - if current_pos[i] != last_pos[i]: - last_pos[i] = current_pos[i] - elif len(current_pos) == i + 1: - current_pos.pop() - else: - current_pos[i] = "" + if pc in jump_map: + jump_type = jump_map.pop(pc) + current_pos.append(jump_type) ret.append(":".join(str(i) for i in current_pos)) + assert len(ast_map) == 0, ast_map + assert len(jump_map) == 0, jump_map + return ";".join(ret) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index ac8631ff7b..e4a4cc60f7 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -54,8 +54,8 @@ def mksymbol(name=""): return f"_sym_{name}{_next_symbol}" -def mkdebug(pc_debugger, source_pos): - i = Instruction("DEBUG", source_pos) +def mkdebug(pc_debugger, ast_source): + i = Instruction("DEBUG", ast_source) i.pc_debugger = pc_debugger return [i] @@ -133,7 +133,7 @@ def _rewrite_return_sequences(ir_node, label_params=None): # works for both internal and external exit_to more_args = ["pass" if t.value == "return_pc" else t for t in args[1:]] _t.append(["goto", dest] + more_args) - ir_node.args = IRnode.from_list(_t, source_pos=ir_node.source_pos).args + ir_node.args = IRnode.from_list(_t, ast_source=ir_node.ast_source).args if ir_node.value == "label": label_params = set(t.value for t in ir_node.args[1].args) @@ -187,14 +187,11 @@ class Instruction(str): def __new__(cls, sstr, *args, **kwargs): return super().__new__(cls, sstr) - def __init__(self, sstr, source_pos=None, error_msg=None): + def __init__(self, sstr, ast_source=None, error_msg=None): self.error_msg = error_msg self.pc_debugger = False - if source_pos is not None: - self.lineno, self.col_offset, self.end_lineno, self.end_col_offset = source_pos - else: - self.lineno, self.col_offset, self.end_lineno, self.end_col_offset = [None] * 4 + self.ast_source = ast_source def apply_line_numbers(func): @@ -204,7 +201,7 @@ def apply_line_no_wrapper(*args, **kwargs): ret = func(*args, **kwargs) new_ret = [ - Instruction(i, code.source_pos, code.error_msg) + Instruction(i, code.ast_source, code.error_msg) if isinstance(i, str) and not isinstance(i, Instruction) else i for i in ret @@ -765,37 +762,38 @@ def _height_of(witharg): # inject debug opcode. elif code.value == "debugger": - return mkdebug(pc_debugger=False, source_pos=code.source_pos) + return mkdebug(pc_debugger=False, ast_source=code.ast_source) # inject debug opcode. elif code.value == "pc_debugger": - return mkdebug(pc_debugger=True, source_pos=code.source_pos) + return mkdebug(pc_debugger=True, ast_source=code.ast_source) else: # pragma: no cover raise ValueError(f"Weird code element: {type(code)} {code}") -def note_line_num(line_number_map, item, pos): - # Record line number attached to pos. - if isinstance(item, Instruction): - if item.lineno is not None: - offsets = (item.lineno, item.col_offset, item.end_lineno, item.end_col_offset) - else: - offsets = None +def getpos(node): + return (node.lineno, node.col_offset, node.end_lineno, node.end_col_offset) - line_number_map["pc_pos_map"][pos] = offsets + +def note_line_num(line_number_map, pc, item): + # Record AST attached to pc + if isinstance(item, Instruction): + if (ast_node := item.ast_source) is not None: + ast_node = ast_node.get_original_node() + if hasattr(ast_node, "node_id"): + line_number_map["pc_raw_ast_map"][pc] = ast_node if item.error_msg is not None: - line_number_map["error_map"][pos] = item.error_msg + line_number_map["error_map"][pc] = item.error_msg - added_line_breakpoint = note_breakpoint(line_number_map, item, pos) - return added_line_breakpoint + note_breakpoint(line_number_map, pc, item) -def note_breakpoint(line_number_map, item, pos): - # Record line number attached to pos. +def note_breakpoint(line_number_map, pc, item): + # Record line number attached to pc if item == "DEBUG": # Is PC debugger, create PC breakpoint. if item.pc_debugger: - line_number_map["pc_breakpoints"].add(pos) + line_number_map["pc_breakpoints"].add(pc) # Create line number breakpoint. else: line_number_map["breakpoints"].add(item.lineno + 1) @@ -1064,7 +1062,7 @@ def adjust_pc_maps(pc_maps, ofst): ret["breakpoints"] = pc_maps["breakpoints"].copy() ret["pc_breakpoints"] = {pc + ofst for pc in pc_maps["pc_breakpoints"]} ret["pc_jump_map"] = {k + ofst: v for (k, v) in pc_maps["pc_jump_map"].items()} - ret["pc_pos_map"] = {k + ofst: v for (k, v) in pc_maps["pc_pos_map"].items()} + ret["pc_raw_ast_map"] = {k + ofst: v for (k, v) in pc_maps["pc_raw_ast_map"].items()} ret["error_map"] = {k + ofst: v for (k, v) in pc_maps["error_map"].items()} return ret @@ -1171,7 +1169,7 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_compiler_metadat "breakpoints": set(), "pc_breakpoints": set(), "pc_jump_map": {0: "-"}, - "pc_pos_map": {}, + "pc_raw_ast_map": {}, "error_map": {}, } @@ -1213,7 +1211,7 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_compiler_metadat # go through the code, resolving symbolic locations # (i.e. JUMPDEST locations) to actual code locations for i, item in enumerate(assembly): - note_line_num(line_number_map, item, pc) + note_line_num(line_number_map, pc, item) if item == "DEBUG": continue # skip debug diff --git a/vyper/ir/optimizer.py b/vyper/ir/optimizer.py index 75e9b46783..7ff5390e4b 100644 --- a/vyper/ir/optimizer.py +++ b/vyper/ir/optimizer.py @@ -436,7 +436,7 @@ def _optimize(node: IRnode, parent: Optional[IRnode]) -> Tuple[bool, IRnode]: value = node.value typ = node.typ location = node.location - source_pos = node.source_pos + ast_source = node.ast_source error_msg = node.error_msg annotation = node.annotation add_gas_estimate = node.add_gas_estimate @@ -460,7 +460,7 @@ def finalize(val, args): ir_builder, typ=typ, location=location, - source_pos=source_pos, + ast_source=ast_source, error_msg=error_msg, annotation=annotation, add_gas_estimate=add_gas_estimate, @@ -552,7 +552,7 @@ def finalize(val, args): if _evm_int(argz[0]) == 0: raise StaticAssertionException( f"assertion found to fail at compile time. (hint: did you mean `raise`?) {node}", - source_pos, + ast_source, ) else: changed = True @@ -615,7 +615,7 @@ def _merge_memzero(argz): changed = True new_ir = IRnode.from_list( ["calldatacopy", initial_offset, "calldatasize", total_length], - source_pos=mstore_nodes[0].source_pos, + ast_source=mstore_nodes[0].ast_source, ) # replace first zero'ing operation with optimized node and remove the rest argz[idx] = new_ir @@ -658,7 +658,7 @@ def _rewrite_mstore_dload(argz): dst = arg.args[0] src = arg.args[1].args[0] len_ = 32 - argz[i] = IRnode.from_list(["dloadbytes", dst, src, len_], source_pos=arg.source_pos) + argz[i] = IRnode.from_list(["dloadbytes", dst, src, len_], ast_source=arg.ast_source) changed = True return changed @@ -716,7 +716,7 @@ def _merge_load(argz, _LOAD, _COPY, allow_overlap=True): changed = True new_ir = IRnode.from_list( [_COPY, initial_dst_offset, initial_src_offset, total_length], - source_pos=mstore_nodes[0].source_pos, + ast_source=mstore_nodes[0].ast_source, ) # replace first copy operation with optimized node and remove the rest argz[idx] = new_ir diff --git a/vyper/semantics/analysis/getters.py b/vyper/semantics/analysis/getters.py index bce64987da..ad5c8227cb 100644 --- a/vyper/semantics/analysis/getters.py +++ b/vyper/semantics/analysis/getters.py @@ -63,8 +63,7 @@ def generate_public_variable_getters(vyper_module: vy_ast.Module) -> None: # after iterating the input types, the remaining annotation node is our return type return_annotation = copy.copy(annotation) - # join everything together as a new `FunctionDef` node, annotate it - # with the type, and append it to the existing `Module` node + # join everything together as a new `FunctionDef` node expanded = vy_ast.FunctionDef( name=funcname, args=vy_ast.arguments(args=input_nodes, defaults=[]),