diff --git a/docs/compiling-a-contract.rst b/docs/compiling-a-contract.rst index c4e8bad636..83571203e8 100644 --- a/docs/compiling-a-contract.rst +++ b/docs/compiling-a-contract.rst @@ -275,11 +275,14 @@ The following example describes the expected input format of ``vyper-json``. Com // evm.bytecode.opcodes - Opcodes list // evm.deployedBytecode.object - Deployed bytecode object // evm.deployedBytecode.opcodes - Deployed opcodes list - // evm.deployedBytecode.sourceMap - Deployed source mapping (useful for debugging) + // evm.deployedBytecode.sourceMap - Solidity-style source mapping + // evm.deployedBytecode.sourceMapFull - Deployed source mapping (useful for debugging) // evm.methodIdentifiers - The list of function hashes // // Using `evm`, `evm.bytecode`, etc. will select every target part of that output. // Additionally, `*` can be used as a wildcard to request everything. + // Note that the sourceMapFull.pc_ast_map is the recommended source map to use; + // the other types are included for legacy and compatibility reasons. // "outputSelection": { "*": ["evm.bytecode", "abi"], // Enable the abi and bytecode outputs for every single contract diff --git a/tests/unit/cli/vyper_json/test_compile_json.py b/tests/unit/cli/vyper_json/test_compile_json.py index e5f7384068..4fe2111f43 100644 --- a/tests/unit/cli/vyper_json/test_compile_json.py +++ b/tests/unit/cli/vyper_json/test_compile_json.py @@ -151,7 +151,11 @@ def test_compile_json(input_json, input_bundle): for source_id, contract_name in [(0, "foo"), (2, "library"), (3, "bar")]: path = f"contracts/{contract_name}.vy" data = compile_code_results[path] - assert output_json["sources"][path] == {"id": source_id, "ast": data["ast_dict"]["ast"]} + assert output_json["sources"][path] == { + "id": source_id, + "ast": data["ast_dict"]["ast"], + "annotated_ast": data["annotated_ast_dict"]["ast"], + } assert output_json["contracts"][path][contract_name] == { "abi": data["abi"], "devdoc": data["devdoc"], @@ -260,15 +264,25 @@ def test_exc_handler_to_dict_compiler(input_json): def test_source_ids_increment(input_json): - input_json["settings"]["outputSelection"] = {"*": ["evm.deployedBytecode.sourceMap"]} + input_json["settings"]["outputSelection"] = {"*": ["ast", "evm.deployedBytecode.sourceMapFull"]} result = compile_json(input_json) def get(filename, contractname): - return result["contracts"][filename][contractname]["evm"]["deployedBytecode"]["sourceMap"] + ast = result["sources"][filename]["ast"] + ret = ast["source_id"] + + # grab it via source map to sanity check + contract_info = result["contracts"][filename][contractname]["evm"] + pc_ast_map = contract_info["deployedBytecode"]["sourceMapFull"]["pc_ast_map"] + pc_item = next(iter(pc_ast_map.values())) + source_id, node_id = pc_item + assert ret == source_id + + return ret - assert get("contracts/foo.vy", "foo").startswith("-1:-1:0") - assert get("contracts/library.vy", "library").startswith("-1:-1:2") - assert get("contracts/bar.vy", "bar").startswith("-1:-1:3") + assert get("contracts/foo.vy", "foo") == 0 + assert get("contracts/library.vy", "library") == 2 + assert get("contracts/bar.vy", "bar") == 3 def test_relative_import_paths(input_json): diff --git a/tests/unit/cli/vyper_json/test_output_selection.py b/tests/unit/cli/vyper_json/test_output_selection.py index 5383190a66..f7fbfe673c 100644 --- a/tests/unit/cli/vyper_json/test_output_selection.py +++ b/tests/unit/cli/vyper_json/test_output_selection.py @@ -45,6 +45,16 @@ def test_star(): assert result == {PurePath("foo.vy"): expected, PurePath("bar.vy"): expected} +def test_ast(): + input_json = { + "sources": {"foo.vy": ""}, + "settings": {"outputSelection": {"foo.vy": ["ast", "annotated_ast"]}}, + } + expected = sorted([TRANSLATE_MAP[k] for k in ["ast", "annotated_ast"]]) + result = get_output_formats(input_json) + assert result == {PurePath("foo.vy"): expected} + + def test_evm(): input_json = { "sources": {"foo.vy": ""}, diff --git a/tests/unit/compiler/test_source_map.py b/tests/unit/compiler/test_source_map.py index 5b478dd2aa..04bd141185 100644 --- a/tests/unit/compiler/test_source_map.py +++ b/tests/unit/compiler/test_source_map.py @@ -1,14 +1,18 @@ +from collections import namedtuple + from vyper.compiler import compile_code from vyper.compiler.output import _compress_source_map from vyper.compiler.utils import expand_source_map TEST_CODE = """ +x: public(uint256) + @internal def _baz(a: int128) -> int128: b: int128 = a for i: int128 in range(2, 5): b *= i - if b > 31337: + if b > 31336 + 1: break return b @@ -82,22 +86,19 @@ def update_foo(): def test_compress_source_map(): - code = """ -@external -def foo() -> uint256: - return 42 - """ + # mock the required VyperNode fields in compress_source_map + # fake_node = namedtuple("fake_node", ("lineno", "col_offset", "end_lineno", "end_col_offset")) + fake_node = namedtuple("fake_node", ["src"]) + compressed = _compress_source_map( - code, {"0": None, "2": (2, 0, 4, 13), "3": (2, 0, 2, 8), "5": (2, 0, 2, 8)}, {"3": "o"}, 2 + {2: fake_node("-1:-1:-1"), 3: fake_node("1:45"), 5: fake_node("45:49")}, {3: "o"}, 6 ) - assert compressed == "-1:-1:2:-;1:45;:8::o;" + assert compressed == "-1:-1:-1;-1:-1:-1;-1:-1:-1;1:45:o;-1:-1:-1;45:49" def test_expand_source_map(): - compressed = "-1:-1:0:-;;13:42:1;:21;::0:o;:::-;1::1;" + compressed = "13:42:1;:21;::0:o;:::-;1::1;" expanded = [ - [-1, -1, 0, "-"], - [-1, -1, 0, None], [13, 42, 1, None], [13, 21, 1, None], [13, 21, 0, "o"], @@ -105,3 +106,38 @@ def test_expand_source_map(): [1, 21, 1, None], ] assert expand_source_map(compressed) == expanded + + +def _construct_node_id_map(ast_struct): + if isinstance(ast_struct, dict): + ret = {} + if "node_id" in ast_struct: + ret[ast_struct["node_id"]] = ast_struct + for item in ast_struct.values(): + ret.update(_construct_node_id_map(item)) + return ret + + elif isinstance(ast_struct, list): + ret = {} + for item in ast_struct: + ret.update(_construct_node_id_map(item)) + return ret + + else: + return {} + + +def test_node_id_map(): + code = TEST_CODE + out = compile_code(code, output_formats=["annotated_ast_dict", "source_map", "ir"]) + assert out["source_map"]["pc_ast_map_item_keys"] == ("source_id", "node_id") + + pc_ast_map = out["source_map"]["pc_ast_map"] + + ast_node_map = _construct_node_id_map(out["annotated_ast_dict"]) + + for pc, (source_id, node_id) in pc_ast_map.items(): + assert isinstance(pc, int), pc + assert isinstance(source_id, int), source_id + assert isinstance(node_id, int), node_id + assert node_id in ast_node_map diff --git a/vyper/ast/nodes.py b/vyper/ast/nodes.py index 2ca199bd7e..02c7e15686 100644 --- a/vyper/ast/nodes.py +++ b/vyper/ast/nodes.py @@ -146,7 +146,7 @@ def _to_node(obj, parent): if isinstance(obj, VyperNode): # if object is already a vyper node, make sure the parent is set correctly # and fix any missing source offsets - obj._parent = parent + obj.set_parent(parent) for field_name in NODE_SRC_ATTRIBUTES: if getattr(obj, field_name) is None: setattr(obj, field_name, getattr(parent, field_name, None)) diff --git a/vyper/ast/nodes.pyi b/vyper/ast/nodes.pyi index 4ebb61e76e..f673bb765c 100644 --- a/vyper/ast/nodes.pyi +++ b/vyper/ast/nodes.pyi @@ -17,6 +17,10 @@ def get_node( class VyperNode: full_source_code: str = ... node_source_code: str = ... + lineno: int = ... + col_offset: int = ... + end_lineno: int = ... + end_col_offset: int = ... _metadata: dict = ... _original_node: Optional[VyperNode] = ... def __init__(self, parent: Optional[VyperNode] = ..., **kwargs: Any) -> None: ... diff --git a/vyper/builtins/_utils.py b/vyper/builtins/_utils.py index 3fad225b48..0ee7ecd0b7 100644 --- a/vyper/builtins/_utils.py +++ b/vyper/builtins/_utils.py @@ -7,10 +7,10 @@ from vyper.semantics.types.module import ModuleT -def _strip_source_pos(ir_node): - ir_node.source_pos = None +def _strip_ast_source(ir_node): + ir_node.ast_source = None for x in ir_node.args: - _strip_source_pos(x) + _strip_ast_source(x) def generate_inline_function(code, variables, variables_2, memory_allocator): @@ -38,5 +38,5 @@ def generate_inline_function(code, variables, variables_2, memory_allocator): # NOTE if we ever use this for inlining user-code, it would make # sense to fix the offsets of the source positions in the generated # code instead of stripping them. - _strip_source_pos(generated_ir) + _strip_ast_source(generated_ir) return new_context, generated_ir diff --git a/vyper/cli/vyper_json.py b/vyper/cli/vyper_json.py index 032d7ebe64..21073cabeb 100755 --- a/vyper/cli/vyper_json.py +++ b/vyper/cli/vyper_json.py @@ -17,6 +17,7 @@ TRANSLATE_MAP = { "abi": "abi", "ast": "ast_dict", + "annotated_ast": "annotated_ast_dict", "devdoc": "devdoc", "evm.methodIdentifiers": "method_identifiers", "evm.bytecode.object": "bytecode", @@ -313,8 +314,12 @@ def format_to_output_dict(compiler_data: dict) -> dict: for path, data in compiler_data.items(): path = str(path) # Path breaks json serializability output_dict["sources"][path] = {"id": data["source_id"]} - if "ast_dict" in data: - output_dict["sources"][path]["ast"] = data["ast_dict"]["ast"] + + for k in ("ast_dict", "annotated_ast_dict"): + if k in data: + # un-translate the key + k2 = k.removesuffix("_dict") + output_dict["sources"][path][k2] = data[k]["ast"] name = PurePath(path).stem output_dict["contracts"][path] = {name: {}} diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index ecf05d1a49..2cb2876088 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -432,15 +432,6 @@ def pop_dyn_array(darray_node, return_popped_item): return IRnode.from_list(b1.resolve(b2.resolve(ret)), typ=typ, location=location) -def getpos(node): - return ( - node.lineno, - node.col_offset, - getattr(node, "end_lineno", None), - getattr(node, "end_col_offset", None), - ) - - # add an offset to a pointer, keeping location and encoding info def add_ofst(ptr, ofst): ret = ["add", ptr, ofst] diff --git a/vyper/codegen/expr.py b/vyper/codegen/expr.py index d0c5154cbe..7c39a4f5cf 100644 --- a/vyper/codegen/expr.py +++ b/vyper/codegen/expr.py @@ -13,7 +13,6 @@ ensure_in_memory, get_dyn_array_count, get_element_ptr, - getpos, is_array_like, is_bytes_m_type, is_flag_type, @@ -72,13 +71,6 @@ class Expr: # TODO: Once other refactors are made reevaluate all inline imports def __init__(self, node, context, is_stmt=False): - if isinstance(node, IRnode): - # this is a kludge for parse_AugAssign to pass in IRnodes - # directly. - # TODO fixme! - self.ir_node = node - return - assert isinstance(node, vy_ast.VyperNode) if node.has_folded_value: node = node.get_folded_value() @@ -94,7 +86,7 @@ def __init__(self, node, context, is_stmt=False): assert isinstance(self.ir_node, IRnode), self.ir_node self.ir_node.annotation = self.expr.get("node_source_code") - self.ir_node.source_pos = getpos(self.expr) + self.ir_node.ast_source = self.expr def parse_Int(self): typ = self.expr._metadata["type"] @@ -382,7 +374,14 @@ def parse_BinOp(self): left = Expr.parse_value_expr(self.expr.left, self.context) right = Expr.parse_value_expr(self.expr.right, self.context) - is_shift_op = isinstance(self.expr.op, (vy_ast.LShift, vy_ast.RShift)) + return Expr.handle_binop(self.expr.op, left, right, self.context) + + @classmethod + def handle_binop(cls, op, left, right, context): + assert not left.is_pointer + assert not right.is_pointer + + is_shift_op = isinstance(op, (vy_ast.LShift, vy_ast.RShift)) if is_shift_op: assert is_numeric_type(left.typ) @@ -391,25 +390,25 @@ def parse_BinOp(self): # Sanity check - ensure that we aren't dealing with different types # This should be unreachable due to the type check pass if left.typ != right.typ: - raise TypeCheckFailure(f"unreachable, {left.typ} != {right.typ}", self.expr) + raise TypeCheckFailure(f"unreachable: {left.typ} != {right.typ}") assert is_numeric_type(left.typ) or is_flag_type(left.typ) out_typ = left.typ - if isinstance(self.expr.op, vy_ast.BitAnd): + if isinstance(op, vy_ast.BitAnd): return IRnode.from_list(["and", left, right], typ=out_typ) - if isinstance(self.expr.op, vy_ast.BitOr): + if isinstance(op, vy_ast.BitOr): return IRnode.from_list(["or", left, right], typ=out_typ) - if isinstance(self.expr.op, vy_ast.BitXor): + if isinstance(op, vy_ast.BitXor): return IRnode.from_list(["xor", left, right], typ=out_typ) - if isinstance(self.expr.op, vy_ast.LShift): + if isinstance(op, vy_ast.LShift): new_typ = left.typ if new_typ.bits != 256: # TODO implement me. ["and", 2**bits - 1, shl(right, left)] raise TypeCheckFailure("unreachable") return IRnode.from_list(shl(right, left), typ=new_typ) - if isinstance(self.expr.op, vy_ast.RShift): + if isinstance(op, vy_ast.RShift): new_typ = left.typ if new_typ.bits != 256: # TODO implement me. promote_signed_int(op(right, left), bits) @@ -421,17 +420,17 @@ def parse_BinOp(self): assert is_numeric_type(left.typ) with left.cache_when_complex("x") as (b1, x), right.cache_when_complex("y") as (b2, y): - if isinstance(self.expr.op, vy_ast.Add): + if isinstance(op, vy_ast.Add): ret = arithmetic.safe_add(x, y) - elif isinstance(self.expr.op, vy_ast.Sub): + elif isinstance(op, vy_ast.Sub): ret = arithmetic.safe_sub(x, y) - elif isinstance(self.expr.op, vy_ast.Mult): + elif isinstance(op, vy_ast.Mult): ret = arithmetic.safe_mul(x, y) - elif isinstance(self.expr.op, (vy_ast.Div, vy_ast.FloorDiv)): + elif isinstance(op, (vy_ast.Div, vy_ast.FloorDiv)): ret = arithmetic.safe_div(x, y) - elif isinstance(self.expr.op, vy_ast.Mod): + elif isinstance(op, vy_ast.Mod): ret = arithmetic.safe_mod(x, y) - elif isinstance(self.expr.op, vy_ast.Pow): + elif isinstance(op, vy_ast.Pow): ret = arithmetic.safe_pow(x, y) else: # pragma: nocover raise CompilerPanic("Unreachable") diff --git a/vyper/codegen/function_definitions/external_function.py b/vyper/codegen/function_definitions/external_function.py index b380eab2ce..6f783bb9c5 100644 --- a/vyper/codegen/function_definitions/external_function.py +++ b/vyper/codegen/function_definitions/external_function.py @@ -1,6 +1,6 @@ from vyper.codegen.abi_encoder import abi_encoding_matches_vyper from vyper.codegen.context import Context, VariableRecord -from vyper.codegen.core import get_element_ptr, getpos, make_setter, needs_clamp +from vyper.codegen.core import get_element_ptr, make_setter, needs_clamp from vyper.codegen.expr import Expr from vyper.codegen.function_definitions.common import ( EntryPointInfo, @@ -39,7 +39,7 @@ def _register_function_args(func_t: ContractFunctionT, context: Context) -> list dst = IRnode(p, typ=arg.typ, location=MEMORY) copy_arg = make_setter(dst, arg_ir) - copy_arg.source_pos = getpos(arg.ast_source) + copy_arg.ast_source = arg.ast_source ret.append(copy_arg) else: assert abi_encoding_matches_vyper(arg.typ) @@ -101,18 +101,18 @@ def handler_for(calldata_kwargs, default_kwargs): rhs = get_element_ptr(calldata_kwargs_ofst, k, array_bounds_check=False) copy_arg = make_setter(lhs, rhs) - copy_arg.source_pos = getpos(arg_meta.ast_source) + copy_arg.ast_source = arg_meta.ast_source ret.append(copy_arg) for x in default_kwargs: dst = context.lookup_var(x.name).pos lhs = IRnode(dst, location=MEMORY, typ=x.typ) - lhs.source_pos = getpos(x.ast_source) + lhs.ast_source = x.ast_source kw_ast_val = func_t.default_values[x.name] # e.g. `3` in x: int = 3 rhs = Expr(kw_ast_val, context).ir_node copy_arg = make_setter(lhs, rhs) - copy_arg.source_pos = getpos(x.ast_source) + copy_arg.ast_source = x.ast_source ret.append(copy_arg) ret.append(["goto", func_t._ir_info.external_function_base_entry_label]) @@ -210,7 +210,7 @@ def generate_ir_for_external_function(code, compilation_target): # the ir which comprises the main body of the function, # besides any kwarg handling - func_common_ir = IRnode.from_list(["seq", body, exit_], source_pos=getpos(code)) + func_common_ir = IRnode.from_list(["seq", body, exit_], ast_source=code) tag_frame_info(func_t, context) diff --git a/vyper/codegen/ir_node.py b/vyper/codegen/ir_node.py index 1df2932da1..14e396ff74 100644 --- a/vyper/codegen/ir_node.py +++ b/vyper/codegen/ir_node.py @@ -3,8 +3,9 @@ import re from enum import Enum, auto from functools import cached_property -from typing import Any, List, Optional, Tuple, Union +from typing import Any, List, Optional, Union +import vyper.ast as vy_ast from vyper.compiler.settings import VYPER_COLOR_OUTPUT from vyper.evm.address_space import AddrSpace from vyper.evm.opcodes import get_ir_opcodes @@ -144,7 +145,7 @@ def __init__( args: List["IRnode"] = None, typ: VyperType = None, location: Optional[AddrSpace] = None, - source_pos: Optional[Tuple[int, int]] = None, + ast_source: Optional[vy_ast.VyperNode] = None, annotation: Optional[str] = None, error_msg: Optional[str] = None, mutable: bool = True, @@ -162,7 +163,7 @@ def __init__( assert isinstance(typ, VyperType) or typ is None, repr(typ) self.typ = typ self.location = location - self.source_pos = source_pos + self.ast_source = ast_source self.error_msg = error_msg self.annotation = annotation self.mutable = mutable @@ -478,11 +479,8 @@ def __eq__(self, other): and self.args == other.args and self.typ == other.typ and self.location == other.location - and self.source_pos == other.source_pos - and self.annotation == other.annotation and self.mutable == other.mutable and self.add_gas_estimate == other.add_gas_estimate - and self.valency == other.valency ) @property @@ -516,13 +514,13 @@ def repr(self) -> str: if self.repr_show_gas and self.gas: o += OKBLUE + "{" + ENDC + str(self.gas) + OKBLUE + "} " + ENDC # add gas for info. o += "[" + self._colorise_keywords(self.repr_value) - prev_lineno = self.source_pos[0] if self.source_pos else None + prev_lineno = self.ast_source.lineno if self.ast_source else None arg_lineno = None annotated = False has_inner_newlines = False for arg in self.args: o += ",\n " - arg_lineno = arg.source_pos[0] if arg.source_pos else None + arg_lineno = arg.ast_source.lineno if arg.ast_source else None if arg_lineno is not None and arg_lineno != prev_lineno and self.value in ("seq", "if"): o += f"# Line {(arg_lineno)}\n " prev_lineno = arg_lineno @@ -553,7 +551,7 @@ def from_list( obj: Any, typ: VyperType = None, location: Optional[AddrSpace] = None, - source_pos: Optional[Tuple[int, int]] = None, + ast_source: Optional[vy_ast.VyperNode] = None, annotation: Optional[str] = None, error_msg: Optional[str] = None, mutable: bool = True, @@ -570,8 +568,8 @@ def from_list( # the input gets modified. CC 20191121. if typ is not None: obj.typ = typ - if obj.source_pos is None: - obj.source_pos = source_pos + if obj.ast_source is None: + obj.ast_source = ast_source if obj.location is None: obj.location = location if obj.encoding is None: @@ -589,7 +587,7 @@ def from_list( annotation=annotation, mutable=mutable, add_gas_estimate=add_gas_estimate, - source_pos=source_pos, + ast_source=ast_source, encoding=encoding, error_msg=error_msg, is_self_call=is_self_call, @@ -598,12 +596,12 @@ def from_list( else: return cls( obj[0], - [cls.from_list(o, source_pos=source_pos) for o in obj[1:]], + [cls.from_list(o, ast_source=ast_source) for o in obj[1:]], typ, location=location, annotation=annotation, mutable=mutable, - source_pos=source_pos, + ast_source=ast_source, add_gas_estimate=add_gas_estimate, encoding=encoding, error_msg=error_msg, diff --git a/vyper/codegen/stmt.py b/vyper/codegen/stmt.py index f658dc92b9..1da31d3bda 100644 --- a/vyper/codegen/stmt.py +++ b/vyper/codegen/stmt.py @@ -9,7 +9,6 @@ clamp_le, get_dyn_array_count, get_element_ptr, - getpos, make_byte_array_copier, make_setter, zero_pad, @@ -42,7 +41,7 @@ def __init__(self, node: vy_ast.VyperNode, context: Context) -> None: assert isinstance(self.ir_node, IRnode), self.ir_node self.ir_node.annotation = self.stmt.get("node_source_code") - self.ir_node.source_pos = getpos(self.stmt) + self.ir_node.ast_source = self.stmt def parse_Expr(self): return Expr(self.stmt.value, self.context, is_stmt=True).ir_node @@ -197,20 +196,19 @@ def _parse_For_range(self): assert "type" in self.stmt.target.target._metadata target_type = self.stmt.target.target._metadata["type"] - # Get arg0 range_call: vy_ast.Call = self.stmt.iter assert isinstance(range_call, vy_ast.Call) - args_len = len(range_call.args) - if args_len == 1: - arg0, arg1 = (IRnode.from_list(0, typ=target_type), range_call.args[0]) - elif args_len == 2: - arg0, arg1 = range_call.args - else: # pragma: nocover - raise TypeCheckFailure("unreachable: bad # of arguments to range()") with self.context.range_scope(): - start = Expr.parse_value_expr(arg0, self.context) - end = Expr.parse_value_expr(arg1, self.context) + args = [Expr.parse_value_expr(arg, self.context) for arg in range_call.args] + if len(args) == 1: + start = IRnode.from_list(0, typ=target_type) + end = args[0] + elif len(args) == 2: + start, end = args + else: # pragma: nocover + raise TypeCheckFailure("unreachable") + kwargs = { s.arg: Expr.parse_value_expr(s.value, self.context) for s in range_call.keywords } @@ -300,8 +298,8 @@ def _parse_For_list(self): def parse_AugAssign(self): target = self._get_target(self.stmt.target) + right = Expr.parse_value_expr(self.stmt.value, self.context) - sub = Expr.parse_value_expr(self.stmt.value, self.context) if not target.typ._is_prim_word: # because of this check, we do not need to check for # make_setter references lhs<->rhs as in parse_Assign - @@ -309,20 +307,9 @@ def parse_AugAssign(self): raise TypeCheckFailure("unreachable") with target.cache_when_complex("_loc") as (b, target): - rhs = Expr.parse_value_expr( - vy_ast.BinOp( - left=IRnode.from_list(LOAD(target), typ=target.typ), - right=sub, - op=self.stmt.op, - lineno=self.stmt.lineno, - col_offset=self.stmt.col_offset, - end_lineno=self.stmt.end_lineno, - end_col_offset=self.stmt.end_col_offset, - node_source_code=self.stmt.get("node_source_code"), - ), - self.context, - ) - return b.resolve(STORE(target, rhs)) + left = IRnode.from_list(LOAD(target), typ=target.typ) + new_val = Expr.handle_binop(self.stmt.op, left, right, self.context) + return b.resolve(STORE(target, new_val)) def parse_Continue(self): return IRnode.from_list("continue") diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index 707c99291b..de8e34370d 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -1,9 +1,7 @@ import warnings -from collections import OrderedDict, deque +from collections import deque from pathlib import PurePath -import asttokens - from vyper.ast import ast_to_dict, parse_natspec from vyper.codegen.ir_node import IRnode from vyper.compiler.phases import CompilerData @@ -237,46 +235,72 @@ def _build_asm(asm_list): return output_string -def build_source_map_output(compiler_data: CompilerData) -> OrderedDict: - _, line_number_map = compile_ir.assembly_to_evm( - compiler_data.assembly_runtime, insert_compiler_metadata=False - ) - # Sort line_number_map - out = OrderedDict() - for k in sorted(line_number_map.keys()): - out[k] = line_number_map[k] +def _build_node_identifier(ast_node): + assert ast_node.module_node is not None, type(ast_node) + return (ast_node.module_node.source_id, ast_node.node_id) - out["pc_pos_map_compressed"] = _compress_source_map( - compiler_data.source_code, out["pc_pos_map"], out["pc_jump_map"], compiler_data.source_id + +def build_source_map_output(compiler_data: CompilerData) -> dict: + """ + Generate source map output in various formats. Note that integrations + are encouraged to use pc_ast_map since the information it provides is + a superset of the other formats, and the other types are included + for legacy reasons. + """ + bytecode, pc_maps = compile_ir.assembly_to_evm( + compiler_data.assembly_runtime, insert_compiler_metadata=False ) - out["pc_pos_map"] = dict((k, v) for k, v in out["pc_pos_map"].items() if v) + # sort the pc maps alphabetically + # CMC 2024-03-09 is this really necessary? + out = {} + for k in sorted(pc_maps.keys()): + out[k] = pc_maps[k] + + ast_map = out.pop("pc_raw_ast_map") + + assert isinstance(ast_map, dict) # lint + if 0 not in ast_map: + # tag it with source id + ast_map[0] = compiler_data.annotated_vyper_module + + pc_pos_map = {k: compile_ir.getpos(v) for (k, v) in ast_map.items()} + node_id_map = {k: _build_node_identifier(v) for (k, v) in ast_map.items()} + compressed_map = _compress_source_map(ast_map, out["pc_jump_map"], len(bytecode)) + out["pc_pos_map_compressed"] = compressed_map + out["pc_pos_map"] = pc_pos_map + out["pc_ast_map"] = node_id_map + # hint to consumers what the fields in pc_ast_map mean + out["pc_ast_map_item_keys"] = ("source_id", "node_id") return out -def _compress_source_map(code, pos_map, jump_map, source_id): - linenos = asttokens.LineNumbers(code) - ret = [f"-1:-1:{source_id}:-"] - last_pos = [-1, -1, source_id] +# generate a solidity-style source map. this functionality is deprecated +# in favor of pc_ast_map, and may not be maintained to the same level +# as pc_ast_map. +def _compress_source_map(ast_map, jump_map, bytecode_size): + ret = [] - for pc in sorted(pos_map)[1:]: - current_pos = [-1, -1, source_id] - if pos_map[pc]: - current_pos[0] = linenos.line_to_offset(*pos_map[pc][:2]) - current_pos[1] = linenos.line_to_offset(*pos_map[pc][2:]) - current_pos[0] + jump_map = jump_map.copy() + ast_map = ast_map.copy() - if pc in jump_map: - current_pos.append(jump_map[pc]) + for pc in range(bytecode_size): + if pc in ast_map: + ast_node = ast_map.pop(pc) + # ast_node.src conveniently has the current position in + # the correct, compressed format + current_pos = [ast_node.src] + else: + current_pos = ["-1:-1:-1"] - for i in range(2, -1, -1): - if current_pos[i] != last_pos[i]: - last_pos[i] = current_pos[i] - elif len(current_pos) == i + 1: - current_pos.pop() - else: - current_pos[i] = "" + if pc in jump_map: + jump_type = jump_map.pop(pc) + current_pos.append(jump_type) ret.append(":".join(str(i) for i in current_pos)) + assert len(ast_map) == 0, ast_map + assert len(jump_map) == 0, jump_map + return ";".join(ret) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index ac8631ff7b..e4a4cc60f7 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -54,8 +54,8 @@ def mksymbol(name=""): return f"_sym_{name}{_next_symbol}" -def mkdebug(pc_debugger, source_pos): - i = Instruction("DEBUG", source_pos) +def mkdebug(pc_debugger, ast_source): + i = Instruction("DEBUG", ast_source) i.pc_debugger = pc_debugger return [i] @@ -133,7 +133,7 @@ def _rewrite_return_sequences(ir_node, label_params=None): # works for both internal and external exit_to more_args = ["pass" if t.value == "return_pc" else t for t in args[1:]] _t.append(["goto", dest] + more_args) - ir_node.args = IRnode.from_list(_t, source_pos=ir_node.source_pos).args + ir_node.args = IRnode.from_list(_t, ast_source=ir_node.ast_source).args if ir_node.value == "label": label_params = set(t.value for t in ir_node.args[1].args) @@ -187,14 +187,11 @@ class Instruction(str): def __new__(cls, sstr, *args, **kwargs): return super().__new__(cls, sstr) - def __init__(self, sstr, source_pos=None, error_msg=None): + def __init__(self, sstr, ast_source=None, error_msg=None): self.error_msg = error_msg self.pc_debugger = False - if source_pos is not None: - self.lineno, self.col_offset, self.end_lineno, self.end_col_offset = source_pos - else: - self.lineno, self.col_offset, self.end_lineno, self.end_col_offset = [None] * 4 + self.ast_source = ast_source def apply_line_numbers(func): @@ -204,7 +201,7 @@ def apply_line_no_wrapper(*args, **kwargs): ret = func(*args, **kwargs) new_ret = [ - Instruction(i, code.source_pos, code.error_msg) + Instruction(i, code.ast_source, code.error_msg) if isinstance(i, str) and not isinstance(i, Instruction) else i for i in ret @@ -765,37 +762,38 @@ def _height_of(witharg): # inject debug opcode. elif code.value == "debugger": - return mkdebug(pc_debugger=False, source_pos=code.source_pos) + return mkdebug(pc_debugger=False, ast_source=code.ast_source) # inject debug opcode. elif code.value == "pc_debugger": - return mkdebug(pc_debugger=True, source_pos=code.source_pos) + return mkdebug(pc_debugger=True, ast_source=code.ast_source) else: # pragma: no cover raise ValueError(f"Weird code element: {type(code)} {code}") -def note_line_num(line_number_map, item, pos): - # Record line number attached to pos. - if isinstance(item, Instruction): - if item.lineno is not None: - offsets = (item.lineno, item.col_offset, item.end_lineno, item.end_col_offset) - else: - offsets = None +def getpos(node): + return (node.lineno, node.col_offset, node.end_lineno, node.end_col_offset) - line_number_map["pc_pos_map"][pos] = offsets + +def note_line_num(line_number_map, pc, item): + # Record AST attached to pc + if isinstance(item, Instruction): + if (ast_node := item.ast_source) is not None: + ast_node = ast_node.get_original_node() + if hasattr(ast_node, "node_id"): + line_number_map["pc_raw_ast_map"][pc] = ast_node if item.error_msg is not None: - line_number_map["error_map"][pos] = item.error_msg + line_number_map["error_map"][pc] = item.error_msg - added_line_breakpoint = note_breakpoint(line_number_map, item, pos) - return added_line_breakpoint + note_breakpoint(line_number_map, pc, item) -def note_breakpoint(line_number_map, item, pos): - # Record line number attached to pos. +def note_breakpoint(line_number_map, pc, item): + # Record line number attached to pc if item == "DEBUG": # Is PC debugger, create PC breakpoint. if item.pc_debugger: - line_number_map["pc_breakpoints"].add(pos) + line_number_map["pc_breakpoints"].add(pc) # Create line number breakpoint. else: line_number_map["breakpoints"].add(item.lineno + 1) @@ -1064,7 +1062,7 @@ def adjust_pc_maps(pc_maps, ofst): ret["breakpoints"] = pc_maps["breakpoints"].copy() ret["pc_breakpoints"] = {pc + ofst for pc in pc_maps["pc_breakpoints"]} ret["pc_jump_map"] = {k + ofst: v for (k, v) in pc_maps["pc_jump_map"].items()} - ret["pc_pos_map"] = {k + ofst: v for (k, v) in pc_maps["pc_pos_map"].items()} + ret["pc_raw_ast_map"] = {k + ofst: v for (k, v) in pc_maps["pc_raw_ast_map"].items()} ret["error_map"] = {k + ofst: v for (k, v) in pc_maps["error_map"].items()} return ret @@ -1171,7 +1169,7 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_compiler_metadat "breakpoints": set(), "pc_breakpoints": set(), "pc_jump_map": {0: "-"}, - "pc_pos_map": {}, + "pc_raw_ast_map": {}, "error_map": {}, } @@ -1213,7 +1211,7 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_compiler_metadat # go through the code, resolving symbolic locations # (i.e. JUMPDEST locations) to actual code locations for i, item in enumerate(assembly): - note_line_num(line_number_map, item, pc) + note_line_num(line_number_map, pc, item) if item == "DEBUG": continue # skip debug diff --git a/vyper/ir/optimizer.py b/vyper/ir/optimizer.py index 75e9b46783..7ff5390e4b 100644 --- a/vyper/ir/optimizer.py +++ b/vyper/ir/optimizer.py @@ -436,7 +436,7 @@ def _optimize(node: IRnode, parent: Optional[IRnode]) -> Tuple[bool, IRnode]: value = node.value typ = node.typ location = node.location - source_pos = node.source_pos + ast_source = node.ast_source error_msg = node.error_msg annotation = node.annotation add_gas_estimate = node.add_gas_estimate @@ -460,7 +460,7 @@ def finalize(val, args): ir_builder, typ=typ, location=location, - source_pos=source_pos, + ast_source=ast_source, error_msg=error_msg, annotation=annotation, add_gas_estimate=add_gas_estimate, @@ -552,7 +552,7 @@ def finalize(val, args): if _evm_int(argz[0]) == 0: raise StaticAssertionException( f"assertion found to fail at compile time. (hint: did you mean `raise`?) {node}", - source_pos, + ast_source, ) else: changed = True @@ -615,7 +615,7 @@ def _merge_memzero(argz): changed = True new_ir = IRnode.from_list( ["calldatacopy", initial_offset, "calldatasize", total_length], - source_pos=mstore_nodes[0].source_pos, + ast_source=mstore_nodes[0].ast_source, ) # replace first zero'ing operation with optimized node and remove the rest argz[idx] = new_ir @@ -658,7 +658,7 @@ def _rewrite_mstore_dload(argz): dst = arg.args[0] src = arg.args[1].args[0] len_ = 32 - argz[i] = IRnode.from_list(["dloadbytes", dst, src, len_], source_pos=arg.source_pos) + argz[i] = IRnode.from_list(["dloadbytes", dst, src, len_], ast_source=arg.ast_source) changed = True return changed @@ -716,7 +716,7 @@ def _merge_load(argz, _LOAD, _COPY, allow_overlap=True): changed = True new_ir = IRnode.from_list( [_COPY, initial_dst_offset, initial_src_offset, total_length], - source_pos=mstore_nodes[0].source_pos, + ast_source=mstore_nodes[0].ast_source, ) # replace first copy operation with optimized node and remove the rest argz[idx] = new_ir diff --git a/vyper/semantics/analysis/getters.py b/vyper/semantics/analysis/getters.py index bce64987da..ad5c8227cb 100644 --- a/vyper/semantics/analysis/getters.py +++ b/vyper/semantics/analysis/getters.py @@ -63,8 +63,7 @@ def generate_public_variable_getters(vyper_module: vy_ast.Module) -> None: # after iterating the input types, the remaining annotation node is our return type return_annotation = copy.copy(annotation) - # join everything together as a new `FunctionDef` node, annotate it - # with the type, and append it to the existing `Module` node + # join everything together as a new `FunctionDef` node expanded = vy_ast.FunctionDef( name=funcname, args=vy_ast.arguments(args=input_nodes, defaults=[]),