Skip to content

Commit

Permalink
feat[tool]: add node_id map to source map (#3811)
Browse files Browse the repository at this point in the history
this commit adds a new, AST-based map to the source map which links
program counters (pcs) directly back to the AST output. this should
improve the ability of third parties to implement source code
integrations (debuggers, storage map tracers, etc).

refactors:
- get rid of `vyper.codegen.core.getpos()`
- rename `IRnode.source_pos` to `IRnode.ast_source`
- refactor a couple places in codegen which were passing `IRnode`s to
  the `Expr` constructor
- rewrote the source map compression routine a bit. it might have gotten
  broken but at this point the compressed source map does not seem
  widely used.
  • Loading branch information
charles-cooper authored Mar 12, 2024
1 parent 39027dc commit 246f4a7
Show file tree
Hide file tree
Showing 17 changed files with 239 additions and 171 deletions.
5 changes: 4 additions & 1 deletion docs/compiling-a-contract.rst
Original file line number Diff line number Diff line change
Expand Up @@ -275,11 +275,14 @@ The following example describes the expected input format of ``vyper-json``. Com
// evm.bytecode.opcodes - Opcodes list
// evm.deployedBytecode.object - Deployed bytecode object
// evm.deployedBytecode.opcodes - Deployed opcodes list
// evm.deployedBytecode.sourceMap - Deployed source mapping (useful for debugging)
// evm.deployedBytecode.sourceMap - Solidity-style source mapping
// evm.deployedBytecode.sourceMapFull - Deployed source mapping (useful for debugging)
// evm.methodIdentifiers - The list of function hashes
//
// Using `evm`, `evm.bytecode`, etc. will select every target part of that output.
// Additionally, `*` can be used as a wildcard to request everything.
// Note that the sourceMapFull.pc_ast_map is the recommended source map to use;
// the other types are included for legacy and compatibility reasons.
//
"outputSelection": {
"*": ["evm.bytecode", "abi"], // Enable the abi and bytecode outputs for every single contract
Expand Down
26 changes: 20 additions & 6 deletions tests/unit/cli/vyper_json/test_compile_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,11 @@ def test_compile_json(input_json, input_bundle):
for source_id, contract_name in [(0, "foo"), (2, "library"), (3, "bar")]:
path = f"contracts/{contract_name}.vy"
data = compile_code_results[path]
assert output_json["sources"][path] == {"id": source_id, "ast": data["ast_dict"]["ast"]}
assert output_json["sources"][path] == {
"id": source_id,
"ast": data["ast_dict"]["ast"],
"annotated_ast": data["annotated_ast_dict"]["ast"],
}
assert output_json["contracts"][path][contract_name] == {
"abi": data["abi"],
"devdoc": data["devdoc"],
Expand Down Expand Up @@ -260,15 +264,25 @@ def test_exc_handler_to_dict_compiler(input_json):


def test_source_ids_increment(input_json):
input_json["settings"]["outputSelection"] = {"*": ["evm.deployedBytecode.sourceMap"]}
input_json["settings"]["outputSelection"] = {"*": ["ast", "evm.deployedBytecode.sourceMapFull"]}
result = compile_json(input_json)

def get(filename, contractname):
return result["contracts"][filename][contractname]["evm"]["deployedBytecode"]["sourceMap"]
ast = result["sources"][filename]["ast"]
ret = ast["source_id"]

# grab it via source map to sanity check
contract_info = result["contracts"][filename][contractname]["evm"]
pc_ast_map = contract_info["deployedBytecode"]["sourceMapFull"]["pc_ast_map"]
pc_item = next(iter(pc_ast_map.values()))
source_id, node_id = pc_item
assert ret == source_id

return ret

assert get("contracts/foo.vy", "foo").startswith("-1:-1:0")
assert get("contracts/library.vy", "library").startswith("-1:-1:2")
assert get("contracts/bar.vy", "bar").startswith("-1:-1:3")
assert get("contracts/foo.vy", "foo") == 0
assert get("contracts/library.vy", "library") == 2
assert get("contracts/bar.vy", "bar") == 3


def test_relative_import_paths(input_json):
Expand Down
10 changes: 10 additions & 0 deletions tests/unit/cli/vyper_json/test_output_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,16 @@ def test_star():
assert result == {PurePath("foo.vy"): expected, PurePath("bar.vy"): expected}


def test_ast():
input_json = {
"sources": {"foo.vy": ""},
"settings": {"outputSelection": {"foo.vy": ["ast", "annotated_ast"]}},
}
expected = sorted([TRANSLATE_MAP[k] for k in ["ast", "annotated_ast"]])
result = get_output_formats(input_json)
assert result == {PurePath("foo.vy"): expected}


def test_evm():
input_json = {
"sources": {"foo.vy": ""},
Expand Down
58 changes: 47 additions & 11 deletions tests/unit/compiler/test_source_map.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
from collections import namedtuple

from vyper.compiler import compile_code
from vyper.compiler.output import _compress_source_map
from vyper.compiler.utils import expand_source_map

TEST_CODE = """
x: public(uint256)
@internal
def _baz(a: int128) -> int128:
b: int128 = a
for i: int128 in range(2, 5):
b *= i
if b > 31337:
if b > 31336 + 1:
break
return b
Expand Down Expand Up @@ -82,26 +86,58 @@ def update_foo():


def test_compress_source_map():
code = """
@external
def foo() -> uint256:
return 42
"""
# mock the required VyperNode fields in compress_source_map
# fake_node = namedtuple("fake_node", ("lineno", "col_offset", "end_lineno", "end_col_offset"))
fake_node = namedtuple("fake_node", ["src"])

compressed = _compress_source_map(
code, {"0": None, "2": (2, 0, 4, 13), "3": (2, 0, 2, 8), "5": (2, 0, 2, 8)}, {"3": "o"}, 2
{2: fake_node("-1:-1:-1"), 3: fake_node("1:45"), 5: fake_node("45:49")}, {3: "o"}, 6
)
assert compressed == "-1:-1:2:-;1:45;:8::o;"
assert compressed == "-1:-1:-1;-1:-1:-1;-1:-1:-1;1:45:o;-1:-1:-1;45:49"


def test_expand_source_map():
compressed = "-1:-1:0:-;;13:42:1;:21;::0:o;:::-;1::1;"
compressed = "13:42:1;:21;::0:o;:::-;1::1;"
expanded = [
[-1, -1, 0, "-"],
[-1, -1, 0, None],
[13, 42, 1, None],
[13, 21, 1, None],
[13, 21, 0, "o"],
[13, 21, 0, "-"],
[1, 21, 1, None],
]
assert expand_source_map(compressed) == expanded


def _construct_node_id_map(ast_struct):
if isinstance(ast_struct, dict):
ret = {}
if "node_id" in ast_struct:
ret[ast_struct["node_id"]] = ast_struct
for item in ast_struct.values():
ret.update(_construct_node_id_map(item))
return ret

elif isinstance(ast_struct, list):
ret = {}
for item in ast_struct:
ret.update(_construct_node_id_map(item))
return ret

else:
return {}


def test_node_id_map():
code = TEST_CODE
out = compile_code(code, output_formats=["annotated_ast_dict", "source_map", "ir"])
assert out["source_map"]["pc_ast_map_item_keys"] == ("source_id", "node_id")

pc_ast_map = out["source_map"]["pc_ast_map"]

ast_node_map = _construct_node_id_map(out["annotated_ast_dict"])

for pc, (source_id, node_id) in pc_ast_map.items():
assert isinstance(pc, int), pc
assert isinstance(source_id, int), source_id
assert isinstance(node_id, int), node_id
assert node_id in ast_node_map
2 changes: 1 addition & 1 deletion vyper/ast/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def _to_node(obj, parent):
if isinstance(obj, VyperNode):
# if object is already a vyper node, make sure the parent is set correctly
# and fix any missing source offsets
obj._parent = parent
obj.set_parent(parent)
for field_name in NODE_SRC_ATTRIBUTES:
if getattr(obj, field_name) is None:
setattr(obj, field_name, getattr(parent, field_name, None))
Expand Down
4 changes: 4 additions & 0 deletions vyper/ast/nodes.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ def get_node(
class VyperNode:
full_source_code: str = ...
node_source_code: str = ...
lineno: int = ...
col_offset: int = ...
end_lineno: int = ...
end_col_offset: int = ...
_metadata: dict = ...
_original_node: Optional[VyperNode] = ...
def __init__(self, parent: Optional[VyperNode] = ..., **kwargs: Any) -> None: ...
Expand Down
8 changes: 4 additions & 4 deletions vyper/builtins/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
from vyper.semantics.types.module import ModuleT


def _strip_source_pos(ir_node):
ir_node.source_pos = None
def _strip_ast_source(ir_node):
ir_node.ast_source = None
for x in ir_node.args:
_strip_source_pos(x)
_strip_ast_source(x)


def generate_inline_function(code, variables, variables_2, memory_allocator):
Expand Down Expand Up @@ -38,5 +38,5 @@ def generate_inline_function(code, variables, variables_2, memory_allocator):
# NOTE if we ever use this for inlining user-code, it would make
# sense to fix the offsets of the source positions in the generated
# code instead of stripping them.
_strip_source_pos(generated_ir)
_strip_ast_source(generated_ir)
return new_context, generated_ir
9 changes: 7 additions & 2 deletions vyper/cli/vyper_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
TRANSLATE_MAP = {
"abi": "abi",
"ast": "ast_dict",
"annotated_ast": "annotated_ast_dict",
"devdoc": "devdoc",
"evm.methodIdentifiers": "method_identifiers",
"evm.bytecode.object": "bytecode",
Expand Down Expand Up @@ -313,8 +314,12 @@ def format_to_output_dict(compiler_data: dict) -> dict:
for path, data in compiler_data.items():
path = str(path) # Path breaks json serializability
output_dict["sources"][path] = {"id": data["source_id"]}
if "ast_dict" in data:
output_dict["sources"][path]["ast"] = data["ast_dict"]["ast"]

for k in ("ast_dict", "annotated_ast_dict"):
if k in data:
# un-translate the key
k2 = k.removesuffix("_dict")
output_dict["sources"][path][k2] = data[k]["ast"]

name = PurePath(path).stem
output_dict["contracts"][path] = {name: {}}
Expand Down
9 changes: 0 additions & 9 deletions vyper/codegen/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,15 +432,6 @@ def pop_dyn_array(darray_node, return_popped_item):
return IRnode.from_list(b1.resolve(b2.resolve(ret)), typ=typ, location=location)


def getpos(node):
return (
node.lineno,
node.col_offset,
getattr(node, "end_lineno", None),
getattr(node, "end_col_offset", None),
)


# add an offset to a pointer, keeping location and encoding info
def add_ofst(ptr, ofst):
ret = ["add", ptr, ofst]
Expand Down
43 changes: 21 additions & 22 deletions vyper/codegen/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
ensure_in_memory,
get_dyn_array_count,
get_element_ptr,
getpos,
is_array_like,
is_bytes_m_type,
is_flag_type,
Expand Down Expand Up @@ -72,13 +71,6 @@ class Expr:
# TODO: Once other refactors are made reevaluate all inline imports

def __init__(self, node, context, is_stmt=False):
if isinstance(node, IRnode):
# this is a kludge for parse_AugAssign to pass in IRnodes
# directly.
# TODO fixme!
self.ir_node = node
return

assert isinstance(node, vy_ast.VyperNode)
if node.has_folded_value:
node = node.get_folded_value()
Expand All @@ -94,7 +86,7 @@ def __init__(self, node, context, is_stmt=False):
assert isinstance(self.ir_node, IRnode), self.ir_node

self.ir_node.annotation = self.expr.get("node_source_code")
self.ir_node.source_pos = getpos(self.expr)
self.ir_node.ast_source = self.expr

def parse_Int(self):
typ = self.expr._metadata["type"]
Expand Down Expand Up @@ -382,7 +374,14 @@ def parse_BinOp(self):
left = Expr.parse_value_expr(self.expr.left, self.context)
right = Expr.parse_value_expr(self.expr.right, self.context)

is_shift_op = isinstance(self.expr.op, (vy_ast.LShift, vy_ast.RShift))
return Expr.handle_binop(self.expr.op, left, right, self.context)

@classmethod
def handle_binop(cls, op, left, right, context):
assert not left.is_pointer
assert not right.is_pointer

is_shift_op = isinstance(op, (vy_ast.LShift, vy_ast.RShift))

if is_shift_op:
assert is_numeric_type(left.typ)
Expand All @@ -391,25 +390,25 @@ def parse_BinOp(self):
# Sanity check - ensure that we aren't dealing with different types
# This should be unreachable due to the type check pass
if left.typ != right.typ:
raise TypeCheckFailure(f"unreachable, {left.typ} != {right.typ}", self.expr)
raise TypeCheckFailure(f"unreachable: {left.typ} != {right.typ}")
assert is_numeric_type(left.typ) or is_flag_type(left.typ)

out_typ = left.typ

if isinstance(self.expr.op, vy_ast.BitAnd):
if isinstance(op, vy_ast.BitAnd):
return IRnode.from_list(["and", left, right], typ=out_typ)
if isinstance(self.expr.op, vy_ast.BitOr):
if isinstance(op, vy_ast.BitOr):
return IRnode.from_list(["or", left, right], typ=out_typ)
if isinstance(self.expr.op, vy_ast.BitXor):
if isinstance(op, vy_ast.BitXor):
return IRnode.from_list(["xor", left, right], typ=out_typ)

if isinstance(self.expr.op, vy_ast.LShift):
if isinstance(op, vy_ast.LShift):
new_typ = left.typ
if new_typ.bits != 256:
# TODO implement me. ["and", 2**bits - 1, shl(right, left)]
raise TypeCheckFailure("unreachable")
return IRnode.from_list(shl(right, left), typ=new_typ)
if isinstance(self.expr.op, vy_ast.RShift):
if isinstance(op, vy_ast.RShift):
new_typ = left.typ
if new_typ.bits != 256:
# TODO implement me. promote_signed_int(op(right, left), bits)
Expand All @@ -421,17 +420,17 @@ def parse_BinOp(self):
assert is_numeric_type(left.typ)

with left.cache_when_complex("x") as (b1, x), right.cache_when_complex("y") as (b2, y):
if isinstance(self.expr.op, vy_ast.Add):
if isinstance(op, vy_ast.Add):
ret = arithmetic.safe_add(x, y)
elif isinstance(self.expr.op, vy_ast.Sub):
elif isinstance(op, vy_ast.Sub):
ret = arithmetic.safe_sub(x, y)
elif isinstance(self.expr.op, vy_ast.Mult):
elif isinstance(op, vy_ast.Mult):
ret = arithmetic.safe_mul(x, y)
elif isinstance(self.expr.op, (vy_ast.Div, vy_ast.FloorDiv)):
elif isinstance(op, (vy_ast.Div, vy_ast.FloorDiv)):
ret = arithmetic.safe_div(x, y)
elif isinstance(self.expr.op, vy_ast.Mod):
elif isinstance(op, vy_ast.Mod):
ret = arithmetic.safe_mod(x, y)
elif isinstance(self.expr.op, vy_ast.Pow):
elif isinstance(op, vy_ast.Pow):
ret = arithmetic.safe_pow(x, y)
else: # pragma: nocover
raise CompilerPanic("Unreachable")
Expand Down
12 changes: 6 additions & 6 deletions vyper/codegen/function_definitions/external_function.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from vyper.codegen.abi_encoder import abi_encoding_matches_vyper
from vyper.codegen.context import Context, VariableRecord
from vyper.codegen.core import get_element_ptr, getpos, make_setter, needs_clamp
from vyper.codegen.core import get_element_ptr, make_setter, needs_clamp
from vyper.codegen.expr import Expr
from vyper.codegen.function_definitions.common import (
EntryPointInfo,
Expand Down Expand Up @@ -39,7 +39,7 @@ def _register_function_args(func_t: ContractFunctionT, context: Context) -> list
dst = IRnode(p, typ=arg.typ, location=MEMORY)

copy_arg = make_setter(dst, arg_ir)
copy_arg.source_pos = getpos(arg.ast_source)
copy_arg.ast_source = arg.ast_source
ret.append(copy_arg)
else:
assert abi_encoding_matches_vyper(arg.typ)
Expand Down Expand Up @@ -101,18 +101,18 @@ def handler_for(calldata_kwargs, default_kwargs):
rhs = get_element_ptr(calldata_kwargs_ofst, k, array_bounds_check=False)

copy_arg = make_setter(lhs, rhs)
copy_arg.source_pos = getpos(arg_meta.ast_source)
copy_arg.ast_source = arg_meta.ast_source
ret.append(copy_arg)

for x in default_kwargs:
dst = context.lookup_var(x.name).pos
lhs = IRnode(dst, location=MEMORY, typ=x.typ)
lhs.source_pos = getpos(x.ast_source)
lhs.ast_source = x.ast_source
kw_ast_val = func_t.default_values[x.name] # e.g. `3` in x: int = 3
rhs = Expr(kw_ast_val, context).ir_node

copy_arg = make_setter(lhs, rhs)
copy_arg.source_pos = getpos(x.ast_source)
copy_arg.ast_source = x.ast_source
ret.append(copy_arg)

ret.append(["goto", func_t._ir_info.external_function_base_entry_label])
Expand Down Expand Up @@ -210,7 +210,7 @@ def generate_ir_for_external_function(code, compilation_target):

# the ir which comprises the main body of the function,
# besides any kwarg handling
func_common_ir = IRnode.from_list(["seq", body, exit_], source_pos=getpos(code))
func_common_ir = IRnode.from_list(["seq", body, exit_], ast_source=code)

tag_frame_info(func_t, context)

Expand Down
Loading

0 comments on commit 246f4a7

Please sign in to comment.