From 9428196a0c48ec7cc02938ee4ae1a0e2fee133c8 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 9 Mar 2024 07:11:14 -0800 Subject: [PATCH 1/5] feat[tool]: improvements to AST annotation (#3829) this commit enriches the annotated AST output so that links between nodes are explicit and consumers of the AST do not need to "guess" about relationships. - link to the type declaration node for user types - add `sha256sum` field for `Module` nodes - add fields on `Import*` nodes so that they can be linked directly to `Module` nodes (including `sha256sum`, so that changes in dependencies can be detected) - add improved type information (including parsed out metadata, like array length) consistently across all types. misc/refactors: - add `to_dict()` for type objects - removed some dead code - `compare_nodes` was only used in a couple of tests, and node equality could be used there instead. (node equality is not super well-defined, but we could revisit that later). --- tests/unit/ast/nodes/test_binary.py | 2 +- tests/unit/ast/nodes/test_compare_nodes.py | 14 +- tests/unit/ast/nodes/test_from_node.py | 7 - tests/unit/ast/test_ast_dict.py | 1410 +++++++++++++++++--- vyper/ast/__init__.py | 2 +- vyper/ast/nodes.py | 72 +- vyper/ast/nodes.pyi | 1 - vyper/ast/parse.py | 5 +- vyper/builtins/_signatures.py | 2 + vyper/compiler/input_bundle.py | 6 + vyper/semantics/analysis/base.py | 22 +- vyper/semantics/analysis/module.py | 24 +- vyper/semantics/types/base.py | 40 +- vyper/semantics/types/bytestrings.py | 7 + vyper/semantics/types/function.py | 11 + vyper/semantics/types/module.py | 28 +- vyper/semantics/types/primitives.py | 6 + vyper/semantics/types/subscriptable.py | 15 +- vyper/semantics/types/user.py | 9 + vyper/utils.py | 7 + 20 files changed, 1448 insertions(+), 242 deletions(-) diff --git a/tests/unit/ast/nodes/test_binary.py b/tests/unit/ast/nodes/test_binary.py index 069101d7ff..d7662bc4bb 100644 --- a/tests/unit/ast/nodes/test_binary.py +++ b/tests/unit/ast/nodes/test_binary.py @@ -18,7 +18,7 @@ def x(): """ ) - assert vy_ast.compare_nodes(expected, mutated) + assert expected == mutated def test_binary_length(): diff --git a/tests/unit/ast/nodes/test_compare_nodes.py b/tests/unit/ast/nodes/test_compare_nodes.py index 73dc319203..164cd3d371 100644 --- a/tests/unit/ast/nodes/test_compare_nodes.py +++ b/tests/unit/ast/nodes/test_compare_nodes.py @@ -7,7 +7,6 @@ def test_compare_different_node_clases(): right = vyper_ast.body[0].value assert left != right - assert not vy_ast.compare_nodes(left, right) def test_compare_different_nodes_same_class(): @@ -15,7 +14,6 @@ def test_compare_different_nodes_same_class(): left, right = vyper_ast.body[0].value.elements assert left != right - assert not vy_ast.compare_nodes(left, right) def test_compare_different_nodes_same_value(): @@ -23,15 +21,14 @@ def test_compare_different_nodes_same_value(): left, right = vyper_ast.body[0].value.elements assert left != right - assert vy_ast.compare_nodes(left, right) -def test_compare_complex_nodes_same_value(): - vyper_ast = vy_ast.parse_to_ast("[{'foo':'bar', 43:[1,2,3]}, {'foo':'bar', 43:[1,2,3]}]") - left, right = vyper_ast.body[0].value.elements +def test_compare_similar_node(): + # test equality without node_ids + left = vy_ast.Int(value=1) + right = vy_ast.Int(value=1) - assert left != right - assert vy_ast.compare_nodes(left, right) + assert left == right def test_compare_same_node(): @@ -39,4 +36,3 @@ def test_compare_same_node(): node = vyper_ast.body[0].value assert node == node - assert vy_ast.compare_nodes(node, node) diff --git a/tests/unit/ast/nodes/test_from_node.py b/tests/unit/ast/nodes/test_from_node.py index 8a7922d582..8f4a50e729 100644 --- a/tests/unit/ast/nodes/test_from_node.py +++ b/tests/unit/ast/nodes/test_from_node.py @@ -24,13 +24,6 @@ def test_kwargs(): assert new_node.value == 666 -def test_compare_nodes(): - old_node = vy_ast.parse_to_ast("foo = 42") - new_node = vy_ast.Int.from_node(old_node, value=666) - - assert not vy_ast.compare_nodes(old_node, new_node) - - def test_new_node_has_no_parent(): old_node = vy_ast.parse_to_ast("foo = 42") new_node = vy_ast.Int.from_node(old_node, value=666) diff --git a/tests/unit/ast/test_ast_dict.py b/tests/unit/ast/test_ast_dict.py index 3f14e3d2f7..81c3dc46fa 100644 --- a/tests/unit/ast/test_ast_dict.py +++ b/tests/unit/ast/test_ast_dict.py @@ -58,6 +58,10 @@ def test_basic_ast(): "col_offset": 0, "end_col_offset": 9, "end_lineno": 2, + "is_constant": False, + "is_immutable": False, + "is_public": False, + "is_transient": False, "lineno": 2, "node_id": 1, "src": "1:9:0", @@ -70,14 +74,10 @@ def test_basic_ast(): "lineno": 2, "node_id": 2, "src": "1:1:0", - "type": "int128", + "type": {"bits": 128, "is_signed": True, "name": "int128", "typeclass": "integer"}, }, + "type": {"bits": 128, "is_signed": True, "name": "int128", "typeclass": "integer"}, "value": None, - "is_constant": False, - "is_immutable": False, - "is_public": False, - "is_transient": False, - "type": "int128", } @@ -142,21 +142,1047 @@ def test() -> int128: # strip source annotations like lineno, we don't care for inspecting # the analysis result -def _strip_source_annotations(dict_node): - to_strip = NODE_SRC_ATTRIBUTES + ("node_id",) +def _strip_source_annotations(dict_node, to_strip): if isinstance(dict_node, dict): for k in list(dict_node.keys()): if k in to_strip: del dict_node[k] continue - _strip_source_annotations(dict_node[k]) + if "decl_node" not in k: + _strip_source_annotations(dict_node[k], to_strip) elif isinstance(dict_node, list): for child in dict_node: - _strip_source_annotations(child) + _strip_source_annotations(child, to_strip) + + +def test_output_type_info(make_input_bundle, chdir_tmp_path): + # test type info is output in the ast dict + # test different, complex types, and test import info is also output + lib1 = """ +struct Foo: + x: uint256 + +event Bar: + pass + +struct Baz: + x: decimal + y: Bytes[20] + z: String[32] + w: uint256 + u: address + +interface Qux: + def return_tuple() -> (Foo[1], uint256): nonpayable + +foo_var: Foo +sarray_var: Foo[1] +darray_var: DynArray[Foo, 5] +interface_var: Qux + +hashmap_var: HashMap[address, Foo] + +sarray_var2: uint256[2] +darray_var2: DynArray[uint256, 5] + +@internal +def foo(): + t: uint256 = max_value(uint256) + u: int24 = empty(int24) + + self.foo_var = empty(Foo) + self.sarray_var[0] = empty(Foo) + self.darray_var[1] = empty(Foo) + + self.sarray_var, t = extcall self.interface_var.return_tuple() + +@external +def bar(): + s: bytes24 = empty(bytes24) + """ + + main = """ +import lib1 + +initializes: lib1 + +@internal +def foo(): + lib1.foo() + log lib1.Bar() + s: lib1.Foo = empty(lib1.Foo) + """ + + input_bundle = make_input_bundle({"lib1.vy": lib1, "main.vy": main}) + + lib1_file = input_bundle.load_file("lib1.vy") + out = compiler.compile_from_file_input( + lib1_file, input_bundle=input_bundle, output_formats=["annotated_ast_dict"] + ) + lib1_ast = out["annotated_ast_dict"]["ast"] + lib1_sha256sum = lib1_ast.pop("source_sha256sum") + assert lib1_sha256sum == lib1_file.sha256sum + to_strip = NODE_SRC_ATTRIBUTES + ("resolved_path", "variable_reads", "variable_writes") + _strip_source_annotations(lib1_ast, to_strip=to_strip) + + main_file = input_bundle.load_file("main.vy") + out = compiler.compile_from_file_input( + main_file, input_bundle=input_bundle, output_formats=["annotated_ast_dict"] + ) + main_ast = out["annotated_ast_dict"]["ast"] + main_sha256sum = main_ast.pop("source_sha256sum") + assert main_sha256sum == main_file.sha256sum + _strip_source_annotations(main_ast, to_strip=to_strip) + + # TODO: would be nice to refactor this into bunch of small test cases + assert main_ast == { + "ast_type": "Module", + "body": [ + { + "alias": None, + "ast_type": "Import", + "import_info": { + "alias": "lib1", + "file_sha256sum": lib1_file.sha256sum, + "path": "lib1.vy", + "qualified_module_name": "lib1", + "source_id": 0, + }, + "name": "lib1", + "node_id": 1, + }, + { + "annotation": {"ast_type": "Name", "id": "lib1", "node_id": 6}, + "ast_type": "InitializesDecl", + "node_id": 3, + }, + { + "args": { + "args": [], + "ast_type": "arguments", + "default": None, + "defaults": [], + "node_id": 9, + }, + "ast_type": "FunctionDef", + "body": [ + { + "ast_type": "Expr", + "node_id": 10, + "value": { + "args": [], + "ast_type": "Call", + "func": { + "ast_type": "Attribute", + "attr": "foo", + "node_id": 12, + "type": { + "name": "foo", + "type_decl_node": {"node_id": 119, "source_id": 0}, + "typeclass": "contract_function", + }, + "value": { + "ast_type": "Name", + "id": "lib1", + "node_id": 13, + "type": { + "name": "lib1.vy", + "type_decl_node": {"node_id": 0, "source_id": 0}, + "typeclass": "module", + }, + }, + }, + "keywords": [], + "node_id": 11, + "type": {"name": "(void)"}, + }, + }, + { + "ast_type": "Log", + "node_id": 17, + "type": { + "name": "Bar", + "type_decl_node": {"node_id": 7, "source_id": 0}, + "typeclass": "event", + }, + "value": { + "args": [], + "ast_type": "Call", + "func": { + "ast_type": "Attribute", + "attr": "Bar", + "node_id": 19, + "type": { + "type_t": { + "name": "Bar", + "type_decl_node": {"node_id": 7, "source_id": 0}, + "typeclass": "event", + } + }, + "value": { + "ast_type": "Name", + "id": "lib1", + "node_id": 20, + "type": { + "name": "lib1.vy", + "type_decl_node": {"node_id": 0, "source_id": 0}, + "typeclass": "module", + }, + }, + }, + "keywords": [], + "node_id": 18, + "type": {"name": "(void)"}, + }, + }, + { + "annotation": { + "ast_type": "Attribute", + "attr": "Foo", + "node_id": 26, + "value": {"ast_type": "Name", "id": "lib1", "node_id": 27}, + }, + "ast_type": "AnnAssign", + "node_id": 23, + "target": { + "ast_type": "Name", + "id": "s", + "node_id": 24, + "type": {"name": "Foo", "typeclass": "struct"}, + }, + "value": { + "args": [ + { + "ast_type": "Attribute", + "attr": "Foo", + "node_id": 33, + "type": {"type_t": {"name": "Foo", "typeclass": "struct"}}, + "value": { + "ast_type": "Name", + "id": "lib1", + "node_id": 34, + "type": { + "name": "lib1.vy", + "type_decl_node": {"node_id": 0, "source_id": 0}, + "typeclass": "module", + }, + }, + } + ], + "ast_type": "Call", + "func": { + "ast_type": "Name", + "id": "empty", + "node_id": 31, + "type": {"name": "empty", "typeclass": "builtin_function"}, + }, + "keywords": [], + "node_id": 30, + "type": {"name": "Foo", "typeclass": "struct"}, + }, + }, + ], + "decorator_list": [{"ast_type": "Name", "id": "internal", "node_id": 37}], + "doc_string": None, + "name": "foo", + "node_id": 8, + "pos": None, + "returns": None, + }, + ], + "doc_string": None, + "name": None, + "node_id": 0, + "path": "main.vy", + "source_id": 1, + "type": { + "name": "main.vy", + "type_decl_node": {"node_id": 0, "source_id": 1}, + "typeclass": "module", + }, + } + + # TODO: would be nice to refactor this into bunch of small test cases + # TODO: write the test in a way which makes the links between nodes + # clearer + assert lib1_ast == { + "ast_type": "Module", + "body": [ + { + "ast_type": "StructDef", + "body": [ + { + "annotation": {"ast_type": "Name", "id": "uint256", "node_id": 5}, + "ast_type": "AnnAssign", + "node_id": 2, + "target": {"ast_type": "Name", "id": "x", "node_id": 3}, + "value": None, + } + ], + "doc_string": None, + "name": "Foo", + "node_id": 1, + }, + { + "ast_type": "EventDef", + "body": [{"ast_type": "Pass", "node_id": 8}], + "doc_string": None, + "name": "Bar", + "node_id": 7, + }, + { + "ast_type": "StructDef", + "body": [ + { + "annotation": {"ast_type": "Name", "id": "decimal", "node_id": 13}, + "ast_type": "AnnAssign", + "node_id": 10, + "target": {"ast_type": "Name", "id": "x", "node_id": 11}, + "value": None, + }, + { + "annotation": { + "ast_type": "Subscript", + "node_id": 18, + "slice": {"ast_type": "Int", "node_id": 21, "value": 20}, + "value": {"ast_type": "Name", "id": "Bytes", "node_id": 19}, + }, + "ast_type": "AnnAssign", + "node_id": 15, + "target": {"ast_type": "Name", "id": "y", "node_id": 16}, + "value": None, + }, + { + "annotation": { + "ast_type": "Subscript", + "node_id": 26, + "slice": {"ast_type": "Int", "node_id": 29, "value": 32}, + "value": {"ast_type": "Name", "id": "String", "node_id": 27}, + }, + "ast_type": "AnnAssign", + "node_id": 23, + "target": {"ast_type": "Name", "id": "z", "node_id": 24}, + "value": None, + }, + { + "annotation": {"ast_type": "Name", "id": "uint256", "node_id": 34}, + "ast_type": "AnnAssign", + "node_id": 31, + "target": {"ast_type": "Name", "id": "w", "node_id": 32}, + "value": None, + }, + { + "annotation": {"ast_type": "Name", "id": "address", "node_id": 39}, + "ast_type": "AnnAssign", + "node_id": 36, + "target": {"ast_type": "Name", "id": "u", "node_id": 37}, + "value": None, + }, + ], + "doc_string": None, + "name": "Baz", + "node_id": 9, + }, + { + "ast_type": "InterfaceDef", + "body": [ + { + "args": { + "args": [], + "ast_type": "arguments", + "default": None, + "defaults": [], + "node_id": 43, + }, + "ast_type": "FunctionDef", + "body": [ + { + "ast_type": "Expr", + "node_id": 44, + "value": {"ast_type": "Name", "id": "nonpayable", "node_id": 45}, + } + ], + "decorator_list": [], + "doc_string": None, + "name": "return_tuple", + "node_id": 42, + "pos": None, + "returns": { + "ast_type": "Tuple", + "elements": [ + { + "ast_type": "Subscript", + "node_id": 48, + "slice": {"ast_type": "Int", "node_id": 51, "value": 1}, + "value": {"ast_type": "Name", "id": "Foo", "node_id": 49}, + }, + {"ast_type": "Name", "id": "uint256", "node_id": 53}, + ], + "node_id": 47, + }, + } + ], + "doc_string": None, + "name": "Qux", + "node_id": 41, + }, + { + "annotation": {"ast_type": "Name", "id": "Foo", "node_id": 59}, + "ast_type": "VariableDecl", + "is_constant": False, + "is_immutable": False, + "is_public": False, + "is_transient": False, + "node_id": 56, + "target": { + "ast_type": "Name", + "id": "foo_var", + "node_id": 57, + "type": {"name": "Foo", "typeclass": "struct"}, + }, + "type": {"name": "Foo", "typeclass": "struct"}, + "value": None, + }, + { + "annotation": { + "ast_type": "Subscript", + "node_id": 64, + "slice": {"ast_type": "Int", "node_id": 67, "value": 1}, + "value": {"ast_type": "Name", "id": "Foo", "node_id": 65}, + }, + "ast_type": "VariableDecl", + "is_constant": False, + "is_immutable": False, + "is_public": False, + "is_transient": False, + "node_id": 61, + "target": { + "ast_type": "Name", + "id": "sarray_var", + "node_id": 62, + "type": { + "length": 1, + "name": "$SArray", + "typeclass": "static_array", + "value_type": {"name": "Foo", "typeclass": "struct"}, + }, + }, + "type": { + "length": 1, + "name": "$SArray", + "typeclass": "static_array", + "value_type": {"name": "Foo", "typeclass": "struct"}, + }, + "value": None, + }, + { + "annotation": { + "ast_type": "Subscript", + "node_id": 72, + "slice": { + "ast_type": "Tuple", + "elements": [ + {"ast_type": "Name", "id": "Foo", "node_id": 76}, + {"ast_type": "Int", "node_id": 78, "value": 5}, + ], + "node_id": 75, + }, + "value": {"ast_type": "Name", "id": "DynArray", "node_id": 73}, + }, + "ast_type": "VariableDecl", + "is_constant": False, + "is_immutable": False, + "is_public": False, + "is_transient": False, + "node_id": 69, + "target": { + "ast_type": "Name", + "id": "darray_var", + "node_id": 70, + "type": { + "length": 5, + "name": "DynArray", + "typeclass": "dynamic_array", + "value_type": {"name": "Foo", "typeclass": "struct"}, + }, + }, + "type": { + "length": 5, + "name": "DynArray", + "typeclass": "dynamic_array", + "value_type": {"name": "Foo", "typeclass": "struct"}, + }, + "value": None, + }, + { + "annotation": {"ast_type": "Name", "id": "Qux", "node_id": 84}, + "ast_type": "VariableDecl", + "is_constant": False, + "is_immutable": False, + "is_public": False, + "is_transient": False, + "node_id": 81, + "target": { + "ast_type": "Name", + "id": "interface_var", + "node_id": 82, + "type": { + "name": "Qux", + "type_decl_node": {"node_id": 41, "source_id": 0}, + "typeclass": "interface", + }, + }, + "type": { + "name": "Qux", + "type_decl_node": {"node_id": 41, "source_id": 0}, + "typeclass": "interface", + }, + "value": None, + }, + { + "annotation": { + "ast_type": "Subscript", + "node_id": 89, + "slice": { + "ast_type": "Tuple", + "elements": [ + {"ast_type": "Name", "id": "address", "node_id": 93}, + {"ast_type": "Name", "id": "Foo", "node_id": 95}, + ], + "node_id": 92, + }, + "value": {"ast_type": "Name", "id": "HashMap", "node_id": 90}, + }, + "ast_type": "VariableDecl", + "is_constant": False, + "is_immutable": False, + "is_public": False, + "is_transient": False, + "node_id": 86, + "target": { + "ast_type": "Name", + "id": "hashmap_var", + "node_id": 87, + "type": { + "key_type": {"name": "address"}, + "name": "HashMap", + "typeclass": "hashmap", + "value_type": {"name": "Foo", "typeclass": "struct"}, + }, + }, + "type": { + "key_type": {"name": "address"}, + "name": "HashMap", + "typeclass": "hashmap", + "value_type": {"name": "Foo", "typeclass": "struct"}, + }, + "value": None, + }, + { + "annotation": { + "ast_type": "Subscript", + "node_id": 102, + "slice": {"ast_type": "Int", "node_id": 105, "value": 2}, + "value": {"ast_type": "Name", "id": "uint256", "node_id": 103}, + }, + "ast_type": "VariableDecl", + "is_constant": False, + "is_immutable": False, + "is_public": False, + "is_transient": False, + "node_id": 99, + "target": { + "ast_type": "Name", + "id": "sarray_var2", + "node_id": 100, + "type": { + "length": 2, + "name": "$SArray", + "typeclass": "static_array", + "value_type": { + "bits": 256, + "is_signed": False, + "name": "uint256", + "typeclass": "integer", + }, + }, + }, + "type": { + "length": 2, + "name": "$SArray", + "typeclass": "static_array", + "value_type": { + "bits": 256, + "is_signed": False, + "name": "uint256", + "typeclass": "integer", + }, + }, + "value": None, + }, + { + "annotation": { + "ast_type": "Subscript", + "node_id": 110, + "slice": { + "ast_type": "Tuple", + "elements": [ + {"ast_type": "Name", "id": "uint256", "node_id": 114}, + {"ast_type": "Int", "node_id": 116, "value": 5}, + ], + "node_id": 113, + }, + "value": {"ast_type": "Name", "id": "DynArray", "node_id": 111}, + }, + "ast_type": "VariableDecl", + "is_constant": False, + "is_immutable": False, + "is_public": False, + "is_transient": False, + "node_id": 107, + "target": { + "ast_type": "Name", + "id": "darray_var2", + "node_id": 108, + "type": { + "length": 5, + "name": "DynArray", + "typeclass": "dynamic_array", + "value_type": { + "bits": 256, + "is_signed": False, + "name": "uint256", + "typeclass": "integer", + }, + }, + }, + "type": { + "length": 5, + "name": "DynArray", + "typeclass": "dynamic_array", + "value_type": { + "bits": 256, + "is_signed": False, + "name": "uint256", + "typeclass": "integer", + }, + }, + "value": None, + }, + { + "args": { + "args": [], + "ast_type": "arguments", + "default": None, + "defaults": [], + "node_id": 120, + }, + "ast_type": "FunctionDef", + "body": [ + { + "annotation": {"ast_type": "Name", "id": "uint256", "node_id": 124}, + "ast_type": "AnnAssign", + "node_id": 121, + "target": { + "ast_type": "Name", + "id": "t", + "node_id": 122, + "type": { + "bits": 256, + "is_signed": False, + "name": "uint256", + "typeclass": "integer", + }, + }, + "value": { + "args": [ + { + "ast_type": "Name", + "id": "uint256", + "node_id": 129, + "type": { + "type_t": { + "bits": 256, + "is_signed": False, + "name": "uint256", + "typeclass": "integer", + } + }, + } + ], + "ast_type": "Call", + "func": { + "ast_type": "Name", + "id": "max_value", + "node_id": 127, + "type": {"name": "max_value", "typeclass": "builtin_function"}, + }, + "keywords": [], + "node_id": 126, + "type": { + "bits": 256, + "is_signed": False, + "name": "uint256", + "typeclass": "integer", + }, + }, + }, + { + "annotation": {"ast_type": "Name", "id": "int24", "node_id": 134}, + "ast_type": "AnnAssign", + "node_id": 131, + "target": { + "ast_type": "Name", + "id": "u", + "node_id": 132, + "type": { + "bits": 24, + "is_signed": True, + "name": "int24", + "typeclass": "integer", + }, + }, + "value": { + "args": [ + { + "ast_type": "Name", + "id": "int24", + "node_id": 139, + "type": { + "type_t": { + "bits": 24, + "is_signed": True, + "name": "int24", + "typeclass": "integer", + } + }, + } + ], + "ast_type": "Call", + "func": { + "ast_type": "Name", + "id": "empty", + "node_id": 137, + "type": {"name": "empty", "typeclass": "builtin_function"}, + }, + "keywords": [], + "node_id": 136, + "type": { + "bits": 24, + "is_signed": True, + "name": "int24", + "typeclass": "integer", + }, + }, + }, + { + "ast_type": "Assign", + "node_id": 141, + "target": { + "ast_type": "Attribute", + "attr": "foo_var", + "node_id": 142, + "type": {"name": "Foo", "typeclass": "struct"}, + "value": { + "ast_type": "Name", + "id": "self", + "node_id": 143, + "type": {"name": "self"}, + }, + }, + "value": { + "args": [ + { + "ast_type": "Name", + "id": "Foo", + "node_id": 149, + "type": {"type_t": {"name": "Foo", "typeclass": "struct"}}, + } + ], + "ast_type": "Call", + "func": { + "ast_type": "Name", + "id": "empty", + "node_id": 147, + "type": {"name": "empty", "typeclass": "builtin_function"}, + }, + "keywords": [], + "node_id": 146, + "type": {"name": "Foo", "typeclass": "struct"}, + }, + }, + { + "ast_type": "Assign", + "node_id": 151, + "target": { + "ast_type": "Subscript", + "node_id": 152, + "slice": { + "ast_type": "Int", + "node_id": 157, + "type": { + "bits": 8, + "is_signed": True, + "name": "int8", + "typeclass": "integer", + }, + "value": 0, + }, + "type": {"name": "Foo", "typeclass": "struct"}, + "value": { + "ast_type": "Attribute", + "attr": "sarray_var", + "node_id": 153, + "type": { + "length": 1, + "name": "$SArray", + "typeclass": "static_array", + "value_type": {"name": "Foo", "typeclass": "struct"}, + }, + "value": { + "ast_type": "Name", + "id": "self", + "node_id": 154, + "type": {"name": "self"}, + }, + }, + }, + "value": { + "args": [ + { + "ast_type": "Name", + "id": "Foo", + "node_id": 162, + "type": {"type_t": {"name": "Foo", "typeclass": "struct"}}, + } + ], + "ast_type": "Call", + "func": { + "ast_type": "Name", + "id": "empty", + "node_id": 160, + "type": {"name": "empty", "typeclass": "builtin_function"}, + }, + "keywords": [], + "node_id": 159, + "type": {"name": "Foo", "typeclass": "struct"}, + }, + }, + { + "ast_type": "Assign", + "node_id": 164, + "target": { + "ast_type": "Subscript", + "node_id": 165, + "slice": { + "ast_type": "Int", + "node_id": 170, + "type": { + "bits": 8, + "is_signed": True, + "name": "int8", + "typeclass": "integer", + }, + "value": 1, + }, + "type": {"name": "Foo", "typeclass": "struct"}, + "value": { + "ast_type": "Attribute", + "attr": "darray_var", + "node_id": 166, + "type": { + "length": 5, + "name": "DynArray", + "typeclass": "dynamic_array", + "value_type": {"name": "Foo", "typeclass": "struct"}, + }, + "value": { + "ast_type": "Name", + "id": "self", + "node_id": 167, + "type": {"name": "self"}, + }, + }, + }, + "value": { + "args": [ + { + "ast_type": "Name", + "id": "Foo", + "node_id": 175, + "type": {"type_t": {"name": "Foo", "typeclass": "struct"}}, + } + ], + "ast_type": "Call", + "func": { + "ast_type": "Name", + "id": "empty", + "node_id": 173, + "type": {"name": "empty", "typeclass": "builtin_function"}, + }, + "keywords": [], + "node_id": 172, + "type": {"name": "Foo", "typeclass": "struct"}, + }, + }, + { + "ast_type": "Assign", + "node_id": 177, + "target": { + "ast_type": "Tuple", + "elements": [ + { + "ast_type": "Attribute", + "attr": "sarray_var", + "node_id": 179, + "type": { + "length": 1, + "name": "$SArray", + "typeclass": "static_array", + "value_type": {"name": "Foo", "typeclass": "struct"}, + }, + "value": { + "ast_type": "Name", + "id": "self", + "node_id": 180, + "type": {"name": "self"}, + }, + }, + { + "ast_type": "Name", + "id": "t", + "node_id": 183, + "type": { + "bits": 256, + "is_signed": False, + "name": "uint256", + "typeclass": "integer", + }, + }, + ], + "node_id": 178, + "type": {"members": {}, "name": "$Tuple", "typeclass": "tuple"}, + }, + "value": { + "ast_type": "ExtCall", + "node_id": 186, + "type": {"members": {}, "name": "$Tuple", "typeclass": "tuple"}, + "value": { + "args": [], + "ast_type": "Call", + "func": { + "ast_type": "Attribute", + "attr": "return_tuple", + "node_id": 188, + "type": { + "name": "return_tuple", + "type_decl_node": {"node_id": 42, "source_id": 0}, + "typeclass": "contract_function", + }, + "value": { + "ast_type": "Attribute", + "attr": "interface_var", + "node_id": 189, + "type": { + "name": "Qux", + "type_decl_node": {"node_id": 41, "source_id": 0}, + "typeclass": "interface", + }, + "value": { + "ast_type": "Name", + "id": "self", + "node_id": 190, + "type": {"name": "self"}, + }, + }, + }, + "keywords": [], + "node_id": 187, + "type": {"members": {}, "name": "$Tuple", "typeclass": "tuple"}, + }, + }, + }, + ], + "decorator_list": [{"ast_type": "Name", "id": "internal", "node_id": 194}], + "doc_string": None, + "name": "foo", + "node_id": 119, + "pos": None, + "returns": None, + }, + { + "args": { + "args": [], + "ast_type": "arguments", + "default": None, + "defaults": [], + "node_id": 197, + }, + "ast_type": "FunctionDef", + "body": [ + { + "annotation": {"ast_type": "Name", "id": "bytes24", "node_id": 201}, + "ast_type": "AnnAssign", + "node_id": 198, + "target": { + "ast_type": "Name", + "id": "s", + "node_id": 199, + "type": {"m": 24, "name": "bytes24", "typeclass": "bytes_m"}, + }, + "value": { + "args": [ + { + "ast_type": "Name", + "id": "bytes24", + "node_id": 206, + "type": { + "type_t": { + "m": 24, + "name": "bytes24", + "typeclass": "bytes_m", + } + }, + } + ], + "ast_type": "Call", + "func": { + "ast_type": "Name", + "id": "empty", + "node_id": 204, + "type": {"name": "empty", "typeclass": "builtin_function"}, + }, + "keywords": [], + "node_id": 203, + "type": {"m": 24, "name": "bytes24", "typeclass": "bytes_m"}, + }, + } + ], + "decorator_list": [{"ast_type": "Name", "id": "external", "node_id": 208}], + "doc_string": None, + "name": "bar", + "node_id": 196, + "pos": None, + "returns": None, + }, + ], + "doc_string": None, + "name": None, + "node_id": 0, + "path": "lib1.vy", + "source_id": 0, + "type": { + "name": "lib1.vy", + "type_decl_node": {"node_id": 0, "source_id": 0}, + "typeclass": "module", + }, + } def test_output_variable_read_write_analysis(make_input_bundle, chdir_tmp_path): # test we output the result of variable read/write correctly + # note: also tests serialization of structs, strings, static arrays, + # and type_decl_nodes across modules. lib1 = """ struct Foo: a: uint256 @@ -170,7 +1196,7 @@ def test_output_variable_read_write_analysis(make_input_bundle, chdir_tmp_path): bars: DynArray[Bar, 10] """ - code = """ + main = """ import lib1 initializes: lib1 @@ -206,30 +1232,41 @@ def qux(): def qux2(): self.qux() """ - input_bundle = make_input_bundle({"lib1.vy": lib1}) - - out = compiler.compile_code( - code, - contract_path="main.vy", - input_bundle=input_bundle, - output_formats=["annotated_ast_dict"], - source_id=0, - )["annotated_ast_dict"]["ast"] - _strip_source_annotations(out) - - foo, bar, baz, qux, qux2 = out["body"][3:] + input_bundle = make_input_bundle({"lib1.vy": lib1, "main.vy": main}) + + # preliminaries: main.vy has source_id==0, lib1.vy has source_id==1. + file = input_bundle.load_file("main.vy") + assert file.source_id == 0 + assert input_bundle.load_file("lib1.vy").source_id == 1 + + out = compiler.compile_from_file_input( + file, input_bundle=input_bundle, output_formats=["annotated_ast_dict"] + ) + ast = out["annotated_ast_dict"]["ast"] + + assert ast["path"] == "main.vy" + assert ast["source_id"] == 0 + + _strip_source_annotations(ast, to_strip=NODE_SRC_ATTRIBUTES + ("node_id", "type")) + + foo, bar, baz, qux, qux2 = ast["body"][3:] assert foo["name"] == "foo" assert foo["body"] == [ { "annotation": {"ast_type": "Name", "id": "uint256"}, "ast_type": "AnnAssign", - "target": {"ast_type": "Name", "id": "x", "type": "uint256"}, + "target": {"ast_type": "Name", "id": "x"}, "value": { "ast_type": "Attribute", "attr": "counter", - "type": "uint256", - "value": {"ast_type": "Name", "id": "lib1", "type": "lib1.vy"}, - "variable_reads": [{"access_path": [], "module": "lib1.vy", "variable": "counter"}], + "value": {"ast_type": "Name", "id": "lib1"}, + "variable_reads": [ + { + "access_path": [], + "decl_node": {"node_id": 29, "source_id": 1}, + "name": "counter", + } + ], }, }, { @@ -238,14 +1275,23 @@ def qux2(): "target": { "ast_type": "Attribute", "attr": "counter", - "type": "uint256", - "value": {"ast_type": "Name", "id": "lib1", "type": "lib1.vy"}, - "variable_reads": [{"access_path": [], "module": "lib1.vy", "variable": "counter"}], + "value": {"ast_type": "Name", "id": "lib1"}, + "variable_reads": [ + { + "access_path": [], + "decl_node": {"node_id": 29, "source_id": 1}, + "name": "counter", + } + ], "variable_writes": [ - {"access_path": [], "module": "lib1.vy", "variable": "counter"} + { + "access_path": [], + "decl_node": {"node_id": 29, "source_id": 1}, + "name": "counter", + } ], }, - "value": {"ast_type": "Int", "type": "uint256", "value": 1}, + "value": {"ast_type": "Int", "value": 1}, }, ] @@ -254,25 +1300,35 @@ def qux2(): { "annotation": {"ast_type": "Name", "id": "uint256"}, "ast_type": "AnnAssign", - "target": {"ast_type": "Name", "id": "x", "type": "uint256"}, + "target": {"ast_type": "Name", "id": "x"}, "value": { "ast_type": "Attribute", "attr": "counter", - "type": "uint256", - "value": {"ast_type": "Name", "id": "lib1", "type": "lib1.vy"}, - "variable_reads": [{"access_path": [], "module": "lib1.vy", "variable": "counter"}], + "value": {"ast_type": "Name", "id": "lib1"}, + "variable_reads": [ + { + "access_path": [], + "decl_node": {"node_id": 29, "source_id": 1}, + "name": "counter", + } + ], }, }, { "annotation": {"ast_type": "Name", "id": "uint256"}, "ast_type": "AnnAssign", - "target": {"ast_type": "Name", "id": "y", "type": "uint256"}, + "target": {"ast_type": "Name", "id": "y"}, "value": { "ast_type": "Attribute", "attr": "counter", - "type": "uint256", - "value": {"ast_type": "Name", "id": "self", "type": "self"}, - "variable_reads": [{"access_path": [], "module": "main.vy", "variable": "counter"}], + "value": {"ast_type": "Name", "id": "self"}, + "variable_reads": [ + { + "access_path": [], + "decl_node": {"node_id": 8, "source_id": 0}, + "name": "counter", + } + ], }, }, { @@ -281,14 +1337,23 @@ def qux2(): "target": { "ast_type": "Attribute", "attr": "counter", - "type": "uint256", - "value": {"ast_type": "Name", "id": "lib1", "type": "lib1.vy"}, - "variable_reads": [{"access_path": [], "module": "lib1.vy", "variable": "counter"}], + "value": {"ast_type": "Name", "id": "lib1"}, + "variable_reads": [ + { + "access_path": [], + "decl_node": {"node_id": 29, "source_id": 1}, + "name": "counter", + } + ], "variable_writes": [ - {"access_path": [], "module": "lib1.vy", "variable": "counter"} + { + "access_path": [], + "decl_node": {"node_id": 29, "source_id": 1}, + "name": "counter", + } ], }, - "value": {"ast_type": "Int", "type": "uint256", "value": 1}, + "value": {"ast_type": "Int", "value": 1}, }, ] @@ -302,18 +1367,28 @@ def qux2(): "func": { "ast_type": "Attribute", "attr": "bar", - "type": "def bar():", - "value": {"ast_type": "Name", "id": "self", "type": "self"}, + "value": {"ast_type": "Name", "id": "self"}, "variable_reads": [ - {"access_path": [], "module": "lib1.vy", "variable": "counter"}, - {"access_path": [], "module": "main.vy", "variable": "counter"}, + { + "access_path": [], + "decl_node": {"node_id": 29, "source_id": 1}, + "name": "counter", + }, + { + "access_path": [], + "decl_node": {"node_id": 8, "source_id": 0}, + "name": "counter", + }, ], "variable_writes": [ - {"access_path": [], "module": "lib1.vy", "variable": "counter"} + { + "access_path": [], + "decl_node": {"node_id": 29, "source_id": 1}, + "name": "counter", + } ], }, "keywords": [], - "type": "(void)", }, }, { @@ -322,14 +1397,23 @@ def qux2(): "target": { "ast_type": "Attribute", "attr": "counter", - "type": "uint256", - "value": {"ast_type": "Name", "id": "self", "type": "self"}, - "variable_reads": [{"access_path": [], "module": "main.vy", "variable": "counter"}], + "value": {"ast_type": "Name", "id": "self"}, + "variable_reads": [ + { + "access_path": [], + "decl_node": {"node_id": 8, "source_id": 0}, + "name": "counter", + } + ], "variable_writes": [ - {"access_path": [], "module": "main.vy", "variable": "counter"} + { + "access_path": [], + "decl_node": {"node_id": 8, "source_id": 0}, + "name": "counter", + } ], }, - "value": {"ast_type": "Int", "type": "uint256", "value": 1}, + "value": {"ast_type": "Int", "value": 1}, }, ] @@ -340,37 +1424,54 @@ def qux2(): "target": { "ast_type": "Attribute", "attr": "bars", - "type": "DynArray[Bar declaration object, 10]", - "value": {"ast_type": "Name", "id": "lib1", "type": "lib1.vy"}, - "variable_reads": [{"access_path": [], "module": "lib1.vy", "variable": "bars"}], - "variable_writes": [{"access_path": [], "module": "lib1.vy", "variable": "bars"}], - }, - "value": { - "ast_type": "List", - "elements": [], - "type": "DynArray[Bar declaration object, 10]", + "value": {"ast_type": "Name", "id": "lib1"}, + "variable_reads": [ + { + "access_path": [], + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", + } + ], + "variable_writes": [ + { + "access_path": [], + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", + } + ], }, + "value": {"ast_type": "List", "elements": []}, }, { "ast_type": "Assign", "target": { "ast_type": "Subscript", - "slice": {"ast_type": "Int", "type": "int8", "value": 0}, - "type": "Bar declaration object", + "slice": {"ast_type": "Int", "value": 0}, "value": { "ast_type": "Attribute", "attr": "bars", - "type": "DynArray[Bar declaration object, 10]", - "value": {"ast_type": "Name", "id": "lib1", "type": "lib1.vy"}, + "value": {"ast_type": "Name", "id": "lib1"}, "variable_reads": [ - {"access_path": [], "module": "lib1.vy", "variable": "bars"} + { + "access_path": [], + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", + } ], }, "variable_reads": [ - {"access_path": ["$subscript_access"], "module": "lib1.vy", "variable": "bars"} + { + "access_path": ["$subscript_access"], + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", + } ], "variable_writes": [ - {"access_path": ["$subscript_access"], "module": "lib1.vy", "variable": "bars"} + { + "access_path": ["$subscript_access"], + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", + } ], }, "value": { @@ -378,14 +1479,12 @@ def qux2(): { "ast_type": "Attribute", "attr": "Bar", - "type": "type(Bar declaration object)", - "value": {"ast_type": "Name", "id": "lib1", "type": "lib1.vy"}, + "value": {"ast_type": "Name", "id": "lib1"}, } ], "ast_type": "Call", - "func": {"ast_type": "Name", "id": "empty", "type": "(builtin) empty"}, + "func": {"ast_type": "Name", "id": "empty"}, "keywords": [], - "type": "Bar declaration object", }, }, { @@ -393,40 +1492,41 @@ def qux2(): "target": { "ast_type": "Attribute", "attr": "items", - "type": "Foo declaration object[2]", "value": { "ast_type": "Subscript", - "slice": {"ast_type": "Int", "type": "int8", "value": 1}, - "type": "Bar declaration object", + "slice": {"ast_type": "Int", "value": 1}, "value": { "ast_type": "Attribute", "attr": "bars", - "type": "DynArray[Bar declaration object, 10]", - "value": {"ast_type": "Name", "id": "lib1", "type": "lib1.vy"}, + "value": {"ast_type": "Name", "id": "lib1"}, "variable_reads": [ - {"access_path": [], "module": "lib1.vy", "variable": "bars"} + { + "access_path": [], + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", + } ], }, "variable_reads": [ { "access_path": ["$subscript_access"], - "module": "lib1.vy", - "variable": "bars", + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", } ], }, "variable_reads": [ { "access_path": ["$subscript_access", "items"], - "module": "lib1.vy", - "variable": "bars", + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", } ], "variable_writes": [ { "access_path": ["$subscript_access", "items"], - "module": "lib1.vy", - "variable": "bars", + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", } ], }, @@ -435,7 +1535,6 @@ def qux2(): { "ast_type": "Subscript", "slice": {"ast_type": "Int", "value": 2}, - "type": "type(Foo declaration object[2])", "value": { "ast_type": "Attribute", "attr": "Foo", @@ -444,9 +1543,8 @@ def qux2(): } ], "ast_type": "Call", - "func": {"ast_type": "Name", "id": "empty", "type": "(builtin) empty"}, + "func": {"ast_type": "Name", "id": "empty"}, "keywords": [], - "type": "Foo declaration object[2]", }, }, { @@ -454,136 +1552,134 @@ def qux2(): "target": { "ast_type": "Attribute", "attr": "a", - "type": "uint256", "value": { "ast_type": "Subscript", - "slice": {"ast_type": "Int", "type": "int8", "value": 0}, - "type": "Foo declaration object", + "slice": {"ast_type": "Int", "value": 0}, "value": { "ast_type": "Attribute", "attr": "items", - "type": "Foo declaration object[2]", "value": { "ast_type": "Subscript", - "slice": {"ast_type": "Int", "type": "int8", "value": 1}, - "type": "Bar declaration object", + "slice": {"ast_type": "Int", "value": 1}, "value": { "ast_type": "Attribute", "attr": "bars", - "type": "DynArray[Bar " "declaration " "object, 10]", - "value": {"ast_type": "Name", "id": "lib1", "type": "lib1.vy"}, + "value": {"ast_type": "Name", "id": "lib1"}, "variable_reads": [ - {"access_path": [], "module": "lib1.vy", "variable": "bars"} + { + "access_path": [], + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", + } ], }, "variable_reads": [ { "access_path": ["$subscript_access"], - "module": "lib1.vy", - "variable": "bars", + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", } ], }, "variable_reads": [ { "access_path": ["$subscript_access", "items"], - "module": "lib1.vy", - "variable": "bars", + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", } ], }, "variable_reads": [ { "access_path": ["$subscript_access", "items", "$subscript_access"], - "module": "lib1.vy", - "variable": "bars", + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", } ], }, "variable_reads": [ { "access_path": ["$subscript_access", "items", "$subscript_access", "a"], - "module": "lib1.vy", - "variable": "bars", + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", } ], "variable_writes": [ { "access_path": ["$subscript_access", "items", "$subscript_access", "a"], - "module": "lib1.vy", - "variable": "bars", + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", } ], }, - "value": {"ast_type": "Int", "type": "uint256", "value": 1}, + "value": {"ast_type": "Int", "value": 1}, }, { "ast_type": "Assign", "target": { "ast_type": "Attribute", "attr": "c", - "type": "decimal", "value": { "ast_type": "Subscript", - "slice": {"ast_type": "Int", "type": "int8", "value": 1}, - "type": "Foo declaration object", + "slice": {"ast_type": "Int", "value": 1}, "value": { "ast_type": "Attribute", "attr": "items", - "type": "Foo declaration object[2]", "value": { "ast_type": "Subscript", - "slice": {"ast_type": "Int", "type": "int8", "value": 0}, - "type": "Bar declaration object", + "slice": {"ast_type": "Int", "value": 0}, "value": { "ast_type": "Attribute", "attr": "bars", - "type": "DynArray[Bar " "declaration " "object, 10]", - "value": {"ast_type": "Name", "id": "lib1", "type": "lib1.vy"}, + "value": {"ast_type": "Name", "id": "lib1"}, "variable_reads": [ - {"access_path": [], "module": "lib1.vy", "variable": "bars"} + { + "access_path": [], + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", + } ], }, "variable_reads": [ { "access_path": ["$subscript_access"], - "module": "lib1.vy", - "variable": "bars", + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", } ], }, "variable_reads": [ { "access_path": ["$subscript_access", "items"], - "module": "lib1.vy", - "variable": "bars", + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", } ], }, "variable_reads": [ { "access_path": ["$subscript_access", "items", "$subscript_access"], - "module": "lib1.vy", - "variable": "bars", + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", } ], }, "variable_reads": [ { "access_path": ["$subscript_access", "items", "$subscript_access", "c"], - "module": "lib1.vy", - "variable": "bars", + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", } ], "variable_writes": [ { "access_path": ["$subscript_access", "items", "$subscript_access", "c"], - "module": "lib1.vy", - "variable": "bars", + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", } ], }, - "value": {"ast_type": "Decimal", "type": "decimal", "value": "10.0"}, + "value": {"ast_type": "Decimal", "value": "10.0"}, }, ] @@ -597,62 +1693,68 @@ def qux2(): "func": { "ast_type": "Attribute", "attr": "qux", - "type": "def qux():", - "value": {"ast_type": "Name", "id": "self", "type": "self"}, + "value": {"ast_type": "Name", "id": "self"}, "variable_reads": [ - {"access_path": [], "module": "lib1.vy", "variable": "bars"}, + { + "access_path": [], + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", + }, { "access_path": ["$subscript_access"], - "module": "lib1.vy", - "variable": "bars", + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", }, { "access_path": ["$subscript_access", "items"], - "module": "lib1.vy", - "variable": "bars", + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", }, { "access_path": ["$subscript_access", "items", "$subscript_access"], - "module": "lib1.vy", - "variable": "bars", + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", }, { "access_path": ["$subscript_access", "items", "$subscript_access", "a"], - "module": "lib1.vy", - "variable": "bars", + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", }, { "access_path": ["$subscript_access", "items", "$subscript_access", "c"], - "module": "lib1.vy", - "variable": "bars", + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", }, ], "variable_writes": [ - {"access_path": [], "module": "lib1.vy", "variable": "bars"}, + { + "access_path": [], + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", + }, { "access_path": ["$subscript_access"], - "module": "lib1.vy", - "variable": "bars", + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", }, { "access_path": ["$subscript_access", "items"], - "module": "lib1.vy", - "variable": "bars", + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", }, { "access_path": ["$subscript_access", "items", "$subscript_access", "a"], - "module": "lib1.vy", - "variable": "bars", + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", }, { "access_path": ["$subscript_access", "items", "$subscript_access", "c"], - "module": "lib1.vy", - "variable": "bars", + "decl_node": {"node_id": 34, "source_id": 1}, + "name": "bars", }, ], }, "keywords": [], - "type": "(void)", }, } ] diff --git a/vyper/ast/__init__.py b/vyper/ast/__init__.py index 39530d0c3e..67734ea7ab 100644 --- a/vyper/ast/__init__.py +++ b/vyper/ast/__init__.py @@ -5,7 +5,7 @@ from . import nodes, validation from .natspec import parse_natspec -from .nodes import compare_nodes, as_tuple +from .nodes import as_tuple from .utils import ast_to_dict from .parse import parse_to_ast, parse_to_ast_with_settings diff --git a/vyper/ast/nodes.py b/vyper/ast/nodes.py index bd95b68e09..2ca199bd7e 100644 --- a/vyper/ast/nodes.py +++ b/vyper/ast/nodes.py @@ -25,7 +25,7 @@ VyperException, ZeroDivisionException, ) -from vyper.utils import MAX_DECIMAL_PLACES, SizeLimits, annotate_source_code, evm_div +from vyper.utils import MAX_DECIMAL_PLACES, SizeLimits, annotate_source_code, evm_div, sha256sum NODE_BASE_ATTRIBUTES = ( "_children", @@ -139,49 +139,6 @@ def get_node( return node -def compare_nodes(left_node: "VyperNode", right_node: "VyperNode") -> bool: - """ - Compare the represented value(s) of two vyper nodes. - - This method evaluates a sort of "loose equality". It recursively compares the - values of each field within two different nodes but does not compare the - node_id or any members related to source offsets. - - Arguments - --------- - left_node : VyperNode - First node object to compare. - right_node : VyperNode - Second node object to compare. - - Returns - ------- - bool - True if the given nodes represent the same value(s), False otherwise. - """ - if not isinstance(left_node, type(right_node)): - return False - - for field_name in (i for i in left_node.get_fields() if i not in VyperNode.__slots__): - left_value = getattr(left_node, field_name, None) - right_value = getattr(right_node, field_name, None) - - # compare types instead of isinstance() in case one node class inherits the other - if type(left_value) is not type(right_value): - return False - - if isinstance(left_value, list): - if next((i for i in zip(left_value, right_value) if not compare_nodes(*i)), None): - return False - elif isinstance(left_value, VyperNode): - if not compare_nodes(left_value, right_value): - return False - elif left_value != right_value: - return False - - return True - - def _to_node(obj, parent): # if object is a Python node or dict representing a node, convert to a Vyper node if isinstance(obj, (dict, python_ast.AST)): @@ -375,6 +332,8 @@ def __deepcopy__(self, memo): return pickle.loads(pickle.dumps(self)) def __eq__(self, other): + # CMC 2024-03-03 I'm not sure it makes much sense to compare AST + # nodes, especially if they come from other modules if not isinstance(other, type(self)): return False if getattr(other, "node_id", None) != getattr(self, "node_id", None): @@ -413,8 +372,16 @@ def description(self): @property def module_node(self): + if isinstance(self, Module): + return self return self.get_ancestor(Module) + def get_id_dict(self): + source_id = None + if self.module_node is not None: + source_id = self.module_node.source_id + return {"node_id": self.node_id, "source_id": source_id} + @property def is_literal_value(self): """ @@ -487,8 +454,9 @@ def to_dict(self) -> dict: else: ast_dict[key] = _to_dict(value) + # TODO: add full analysis result, e.g. expr_info if "type" in self._metadata: - ast_dict["type"] = str(self._metadata["type"]) + ast_dict["type"] = self._metadata["type"].to_dict() return ast_dict @@ -659,6 +627,13 @@ class Module(TopLevel): # metadata __slots__ = ("path", "resolved_path", "source_id") + def to_dict(self): + return dict(source_sha256sum=self.source_sha256sum, **super().to_dict()) + + @property + def source_sha256sum(self): + return sha256sum(self.full_source_code) + @contextlib.contextmanager def namespace(self): from vyper.semantics.namespace import get_namespace, override_global_namespace @@ -1454,6 +1429,13 @@ class Pass(Stmt): class _ImportStmt(Stmt): __slots__ = ("name", "alias") + def to_dict(self): + ret = super().to_dict() + if (import_info := self._metadata.get("import_info")) is not None: + ret["import_info"] = import_info.to_dict() + + return ret + def __init__(self, *args, **kwargs): if len(kwargs["names"]) > 1: _raise_syntax_exc("Assignment statement must have one target", kwargs) diff --git a/vyper/ast/nodes.pyi b/vyper/ast/nodes.pyi index a49ac43bdf..4ebb61e76e 100644 --- a/vyper/ast/nodes.pyi +++ b/vyper/ast/nodes.pyi @@ -13,7 +13,6 @@ DICT_AST_SKIPLIST: Any def get_node( ast_struct: Union[dict, python_ast.AST], parent: Optional[VyperNode] = ... ) -> VyperNode: ... -def compare_nodes(left_node: VyperNode, right_node: VyperNode) -> bool: ... class VyperNode: full_source_code: str = ... diff --git a/vyper/ast/parse.py b/vyper/ast/parse.py index d14d0a33be..787b1404e6 100644 --- a/vyper/ast/parse.py +++ b/vyper/ast/parse.py @@ -10,7 +10,7 @@ from vyper.compiler.settings import Settings from vyper.exceptions import CompilerPanic, ParserException, SyntaxException from vyper.typing import ModificationOffsets -from vyper.utils import vyper_warn +from vyper.utils import sha256sum, vyper_warn def parse_to_ast(*args: Any, **kwargs: Any) -> vy_ast.Module: @@ -244,8 +244,11 @@ def _visit_docstring(self, node): return node def visit_Module(self, node): + # TODO: is this the best place for these? maybe they can be on + # CompilerData instead. node.path = self._module_path node.resolved_path = self._resolved_path + node.source_sha256sum = sha256sum(self._source_code) node.source_id = self._source_id return self._visit_docstring(node) diff --git a/vyper/builtins/_signatures.py b/vyper/builtins/_signatures.py index ab5854d68f..d012e4a1cf 100644 --- a/vyper/builtins/_signatures.py +++ b/vyper/builtins/_signatures.py @@ -80,6 +80,8 @@ def decorator_fn(self, node, context): class BuiltinFunctionT(VyperType): + typeclass = "builtin_function" + _has_varargs = False _inputs: list[tuple[str, Any]] = [] _kwargs: dict[str, KwargSettings] = {} diff --git a/vyper/compiler/input_bundle.py b/vyper/compiler/input_bundle.py index d4132cad50..4fe16a4bf1 100644 --- a/vyper/compiler/input_bundle.py +++ b/vyper/compiler/input_bundle.py @@ -2,10 +2,12 @@ import json import os from dataclasses import dataclass +from functools import cached_property from pathlib import Path, PurePath from typing import Any, Iterator, Optional from vyper.exceptions import JSONError +from vyper.utils import sha256sum # a type to make mypy happy PathLike = Path | PurePath @@ -26,6 +28,10 @@ class CompilerInput: class FileInput(CompilerInput): source_code: str + @cached_property + def sha256sum(self): + return sha256sum(self.source_code) + @dataclass class ABIInput(CompilerInput): diff --git a/vyper/semantics/analysis/base.py b/vyper/semantics/analysis/base.py index 762345a726..e424f94e19 100644 --- a/vyper/semantics/analysis/base.py +++ b/vyper/semantics/analysis/base.py @@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Any, ClassVar, Dict, Optional, Union from vyper import ast as vy_ast -from vyper.compiler.input_bundle import InputBundle +from vyper.compiler.input_bundle import CompilerInput, FileInput from vyper.exceptions import CompilerPanic, StructureException from vyper.semantics.data_locations import DataLocation from vyper.semantics.types.base import VyperType @@ -123,10 +123,21 @@ class ImportInfo(AnalysisResult): typ: Union[ModuleInfo, "InterfaceT"] alias: str # the name in the namespace qualified_module_name: str # for error messages - # source_id: int - input_bundle: InputBundle + compiler_input: CompilerInput # to recover file info for ast export node: vy_ast.VyperNode + def to_dict(self): + ret = {"alias": self.alias, "qualified_module_name": self.qualified_module_name} + + ret["source_id"] = self.compiler_input.source_id + ret["path"] = str(self.compiler_input.path) + ret["resolved_path"] = str(self.compiler_input.resolved_path) + + if isinstance(self.compiler_input, FileInput): + ret["file_sha256sum"] = self.compiler_input.sha256sum + + return ret + # analysis result of InitializesDecl @dataclass @@ -242,9 +253,8 @@ def to_dict(self): path = ["$subscript_access" if s is self.SUBSCRIPT_ACCESS else s for s in self.path] varname = var.decl_node.target.id - module_node = var.decl_node.get_ancestor(vy_ast.Module) - module_path = module_node.path - ret = {"variable": varname, "module": module_path, "access_path": path} + decl_node = var.decl_node.get_id_dict() + ret = {"name": varname, "decl_node": decl_node, "access_path": path} return ret diff --git a/vyper/semantics/analysis/module.py b/vyper/semantics/analysis/module.py index b8b4bf48f2..90493d643b 100644 --- a/vyper/semantics/analysis/module.py +++ b/vyper/semantics/analysis/module.py @@ -4,7 +4,13 @@ import vyper.builtins.interfaces from vyper import ast as vy_ast -from vyper.compiler.input_bundle import ABIInput, FileInput, FilesystemInputBundle, InputBundle +from vyper.compiler.input_bundle import ( + ABIInput, + CompilerInput, + FileInput, + FilesystemInputBundle, + InputBundle, +) from vyper.evm.opcodes import version_check from vyper.exceptions import ( BorrowException, @@ -715,9 +721,9 @@ def visit_StructDef(self, node): def _add_import( self, node: vy_ast.VyperNode, level: int, qualified_module_name: str, alias: str ) -> None: - module_info = self._load_import(node, level, qualified_module_name, alias) + compiler_input, module_info = self._load_import(node, level, qualified_module_name, alias) node._metadata["import_info"] = ImportInfo( - module_info, alias, qualified_module_name, self.input_bundle, node + module_info, alias, qualified_module_name, compiler_input, node ) self.namespace[alias] = module_info @@ -732,7 +738,7 @@ def _load_import(self, node: vy_ast.VyperNode, level: int, module_str: str, alia def _load_import_helper( self, node: vy_ast.VyperNode, level: int, module_str: str, alias: str - ) -> Any: + ) -> tuple[CompilerInput, Any]: if _is_builtin(module_str): return _load_builtin_import(level, module_str) @@ -762,7 +768,7 @@ def _load_import_helper( is_interface=False, ) - return ModuleInfo(module_t, alias) + return file, ModuleInfo(module_t, alias) except FileNotFoundError as e: # escape `e` from the block scope, it can make things @@ -783,7 +789,7 @@ def _load_import_helper( ) module_t = module_ast._metadata["type"] - return module_t.interface + return file, module_t.interface except FileNotFoundError: pass @@ -791,7 +797,7 @@ def _load_import_helper( try: file = self.input_bundle.load_file(path.with_suffix(".json")) assert isinstance(file, ABIInput) # mypy hint - return InterfaceT.from_json_abi(str(file.path), file.abi) + return file, InterfaceT.from_json_abi(str(file.path), file.abi) except FileNotFoundError: pass @@ -844,7 +850,7 @@ def _is_builtin(module_str): return any(module_str.startswith(prefix) for prefix in BUILTIN_PREFIXES) -def _load_builtin_import(level: int, module_str: str) -> InterfaceT: +def _load_builtin_import(level: int, module_str: str) -> tuple[CompilerInput, InterfaceT]: if not _is_builtin(module_str): raise ModuleNotFound(module_str) @@ -885,4 +891,4 @@ def _load_builtin_import(level: int, module_str: str) -> InterfaceT: with override_global_namespace(Namespace()): module_t = _analyze_module_r(interface_ast, input_bundle, ImportGraph(), is_interface=True) - return module_t.interface + return file, module_t.interface diff --git a/vyper/semantics/types/base.py b/vyper/semantics/types/base.py index 94d9c1e371..46edb522ca 100644 --- a/vyper/semantics/types/base.py +++ b/vyper/semantics/types/base.py @@ -32,6 +32,13 @@ def compare_type(self, other): # type is the same return isinstance(other, self.__class__) and other.type_ == self.type_ + def to_dict(self): + # this shouldn't really appear in the AST type annotations, but it's + # there for certain string literals which don't have a known type. this + # should be fixed soon by improving type inference. for now just put + # *something* in the AST. + return {"generic": self.type_.typeclass} + class VyperType: """ @@ -58,7 +65,9 @@ class VyperType: `InterfaceT`s. """ - _id: str + typeclass: str = None # type: ignore + + _id: str # rename to `_name` _type_members: Optional[Dict] = None _valid_literal: Tuple = () _invalid_locations: Tuple = () @@ -74,6 +83,7 @@ class VyperType: _attribute_in_annotation: bool = False size_in_bytes = 32 # default; override for larger types + decl_node: Optional[vy_ast.VyperNode] = None def __init__(self, members: Optional[Dict] = None) -> None: @@ -106,6 +116,31 @@ def __eq__(self, other): def __lt__(self, other): return self.abi_type.selector_name() < other.abi_type.selector_name() + # return a dict suitable for serializing in the AST + def to_dict(self): + ret = {"name": self._id} + if self.decl_node is not None: + ret["type_decl_node"] = self.decl_node.get_id_dict() + if self.typeclass is not None: + ret["typeclass"] = self.typeclass + + # use dict ctor to block duplicates + return dict(**self._addl_dict_fields(), **ret) + + # for most types, this is a reasonable implementation, but it can + # be overridden as needed. + def _addl_dict_fields(self): + keys = self._equality_attrs or () + ret = {} + for k in keys: + if k.startswith("_"): + continue + v = getattr(self, k) + if hasattr(v, "to_dict"): + v = v.to_dict() + ret[k] = v + return ret + @cached_property def _as_darray(self): return self._as_array @@ -369,6 +404,9 @@ def __init__(self, typedef): self.typedef = typedef + def to_dict(self): + return {"type_t": self.typedef.to_dict()} + def __repr__(self): return f"type({self.typedef})" diff --git a/vyper/semantics/types/bytestrings.py b/vyper/semantics/types/bytestrings.py index 96bb1bbf74..cd330681cf 100644 --- a/vyper/semantics/types/bytestrings.py +++ b/vyper/semantics/types/bytestrings.py @@ -42,6 +42,9 @@ def __init__(self, length: int = 0) -> None: def __repr__(self): return f"{self._id}[{self.length}]" + def _addl_dict_fields(self): + return {"length": self.length} + @property def length(self): """ @@ -153,6 +156,8 @@ def from_literal(cls, node: vy_ast.Constant) -> "_BytestringT": class BytesT(_BytestringT): + typeclass = "bytes" + _id = "Bytes" _valid_literal = (vy_ast.Bytes,) @@ -162,6 +167,8 @@ def abi_type(self) -> ABIType: class StringT(_BytestringT): + typeclass = "string" + _id = "String" _valid_literal = (vy_ast.Str,) diff --git a/vyper/semantics/types/function.py b/vyper/semantics/types/function.py index 2cbb972ac7..fbeb3e37cd 100644 --- a/vyper/semantics/types/function.py +++ b/vyper/semantics/types/function.py @@ -82,6 +82,8 @@ class ContractFunctionT(VyperType): Whether this function is marked `@nonreentrant` or not """ + typeclass = "contract_function" + _is_callable = True def __init__( @@ -140,6 +142,10 @@ def __init__( def decl_node(self): return self.ast_def + @property + def _id(self): + return self.name + def mark_analysed(self): assert not self._analysed self._analysed = True @@ -810,6 +816,7 @@ class MemberFunctionT(VyperType): return_type: the return type of this method. ex. None """ + typeclass = "member_function" _is_callable = True # keep LGTM linter happy @@ -836,6 +843,10 @@ def __init__( def modifiability(self): return Modifiability.MODIFIABLE if self.is_modifying else Modifiability.RUNTIME_CONSTANT + @property + def _id(self): + return self.name + def __repr__(self): return f"{self.underlying_type._id} member function '{self.name}'" diff --git a/vyper/semantics/types/module.py b/vyper/semantics/types/module.py index 5faefaf404..a242bfa1fe 100644 --- a/vyper/semantics/types/module.py +++ b/vyper/semantics/types/module.py @@ -29,6 +29,8 @@ class InterfaceT(_UserType): + typeclass = "interface" + _type_members = {"address": AddressT()} _is_prim_word = True _as_array = True @@ -36,7 +38,14 @@ class InterfaceT(_UserType): _supports_external_calls = True _attribute_in_annotation = True - def __init__(self, _id: str, functions: dict, events: dict, structs: dict) -> None: + def __init__( + self, + _id: str, + decl_node: Optional[vy_ast.VyperNode], + functions: dict, + events: dict, + structs: dict, + ) -> None: validate_unique_method_ids(list(functions.values())) members = functions | events | structs @@ -53,6 +62,8 @@ def __init__(self, _id: str, functions: dict, events: dict, structs: dict) -> No self.events = events self.structs = structs + self.decl_node = decl_node + def get_type_member(self, attr, node): # get an event or struct from this interface return TYPE_T(self._helper.get_member(attr, node)) @@ -140,6 +151,7 @@ def to_toplevel_abi_dict(self) -> list[dict]: def _from_lists( cls, interface_name: str, + decl_node: Optional[vy_ast.VyperNode], function_list: list[tuple[str, ContractFunctionT]], event_list: list[tuple[str, EventT]], struct_list: list[tuple[str, StructT]], @@ -169,7 +181,7 @@ def _mark_seen(name, item): _mark_seen(name, struct) structs[name] = struct - return cls(interface_name, functions, events, structs) + return cls(interface_name, decl_node, functions, events, structs) @classmethod def from_json_abi(cls, name: str, abi: dict) -> "InterfaceT": @@ -197,7 +209,7 @@ def from_json_abi(cls, name: str, abi: dict) -> "InterfaceT": events.append((item["name"], EventT.from_abi(item))) structs: list = [] # no structs in json ABI (as of yet) - return cls._from_lists(name, functions, events, structs) + return cls._from_lists(name, None, functions, events, structs) @classmethod def from_ModuleT(cls, module_t: "ModuleT") -> "InterfaceT": @@ -230,7 +242,7 @@ def from_ModuleT(cls, module_t: "ModuleT") -> "InterfaceT": # in the ABI json structs = [(node.name, node._metadata["struct_type"]) for node in module_t.struct_defs] - return cls._from_lists(module_t._id, funcs, events, structs) + return cls._from_lists(module_t._id, module_t.decl_node, funcs, events, structs) @classmethod def from_InterfaceDef(cls, node: vy_ast.InterfaceDef) -> "InterfaceT": @@ -251,11 +263,13 @@ def from_InterfaceDef(cls, node: vy_ast.InterfaceDef) -> "InterfaceT": events: list = [] structs: list = [] - return cls._from_lists(node.name, functions, events, structs) + return cls._from_lists(node.name, node, functions, events, structs) # Datatype to store all module information. class ModuleT(VyperType): + typeclass = "module" + _attribute_in_annotation = True _invalid_locations = ( DataLocation.CALLDATA, @@ -318,6 +332,10 @@ def __eq__(self, other): def __hash__(self): return hash(id(self)) + @property + def decl_node(self) -> Optional[vy_ast.VyperNode]: # type: ignore[override] + return self._module + def get_type_member(self, key: str, node: vy_ast.VyperNode) -> "VyperType": return self._helper.get_member(key, node) diff --git a/vyper/semantics/types/primitives.py b/vyper/semantics/types/primitives.py index 66efabd1db..e3a5d7f834 100644 --- a/vyper/semantics/types/primitives.py +++ b/vyper/semantics/types/primitives.py @@ -55,6 +55,8 @@ def validate_literal(self, node: vy_ast.Constant) -> None: # one-word bytesM with m possible bytes set, e.g. bytes1..bytes32 class BytesM_T(_PrimT): + typeclass = "bytes_m" + _valid_literal = (vy_ast.Hex,) _equality_attrs = ("m",) @@ -231,6 +233,8 @@ class IntegerT(NumericT): Is the value signed? """ + typeclass = "integer" + _valid_literal = (vy_ast.Int,) _equality_attrs = ("is_signed", "bits") @@ -307,6 +311,8 @@ def SINT(bits): class DecimalT(NumericT): + typeclass = "decimal" + _bits = 168 # TODO generalize _decimal_places = 10 # TODO generalize _id = "decimal" diff --git a/vyper/semantics/types/subscriptable.py b/vyper/semantics/types/subscriptable.py index 635a1631a2..e6e8971087 100644 --- a/vyper/semantics/types/subscriptable.py +++ b/vyper/semantics/types/subscriptable.py @@ -41,7 +41,8 @@ def validate_index_type(self, node): class HashMapT(_SubscriptableT): - _id = "HashMap" + typeclass = "hashmap" + _id = "HashMap" # CMC 2024-03-03 maybe this would be better as repr(self) _equality_attrs = ("key_type", "value_type") @@ -152,6 +153,10 @@ class SArrayT(_SequenceT): Static array type """ + typeclass = "static_array" + + _id = "$SArray" + def __init__(self, value_type: VyperType, length: int) -> None: super().__init__(value_type, length) @@ -217,9 +222,12 @@ class DArrayT(_SequenceT): Dynamic array type """ + typeclass = "dynamic_array" + _valid_literal = (vy_ast.List,) _as_array = True - _id = "DynArray" + + _id = "DynArray" # CMC 2024-03-03 maybe this would be better as repr(self) def __init__(self, value_type: VyperType, length: int) -> None: super().__init__(value_type, length) @@ -306,7 +314,10 @@ class TupleT(VyperType): This class is used to represent multiple return values from functions. """ + typeclass = "tuple" + _equality_attrs = ("members",) + _id = "$Tuple" # note: docs say that tuples are not instantiable but they # are in fact instantiable and the codegen works. if we diff --git a/vyper/semantics/types/user.py b/vyper/semantics/types/user.py index 8af229337b..a6ee646e62 100644 --- a/vyper/semantics/types/user.py +++ b/vyper/semantics/types/user.py @@ -46,6 +46,8 @@ def __hash__(self): # note: flag behaves a lot like uint256, or uints in general. class FlagT(_UserType): + typeclass = "flag" + # this is a carveout because currently we allow dynamic arrays of # flags, but not static arrays of flags _as_darray = True @@ -163,6 +165,8 @@ class EventT(_UserType): Name of the event. """ + typeclass = "event" + _invalid_locations = tuple(iter(DataLocation)) # not instantiable in any location def __init__( @@ -180,6 +184,10 @@ def __init__( self.decl_node = decl_node + @property + def _id(self): + return self.name + # backward compatible @property def arguments(self): @@ -292,6 +300,7 @@ def to_toplevel_abi_dict(self) -> list[dict]: class StructT(_UserType): + typeclass = "struct" _as_array = True def __init__(self, _id, members, ast_def=None): diff --git a/vyper/utils.py b/vyper/utils.py index f1e4352d57..ba615e58d7 100644 --- a/vyper/utils.py +++ b/vyper/utils.py @@ -2,6 +2,8 @@ import contextlib import decimal import enum +import functools +import hashlib import sys import time import traceback @@ -159,6 +161,11 @@ def __setattr__(self, name, value): keccak256 = lambda x: _sha3.sha3_256(x).digest() # noqa: E731 +@functools.lru_cache(maxsize=512) +def sha256sum(s: str) -> str: + return hashlib.sha256(s.encode("utf-8")).digest().hex() + + # Converts four bytes to an integer def fourbytes_to_int(inp): return (inp[0] << 24) + (inp[1] << 16) + (inp[2] << 8) + inp[3] From 39027dc8db9c7d1a0ffb0fe37c4bb375408f1a62 Mon Sep 17 00:00:00 2001 From: Ikko Eltociear Ashimine Date: Tue, 12 Mar 2024 08:30:32 +0900 Subject: [PATCH 2/5] docs: typo in test comment (#3850) minor fix --- tests/functional/builtins/codegen/test_convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/builtins/codegen/test_convert.py b/tests/functional/builtins/codegen/test_convert.py index 559e1448ef..73b24de8a5 100644 --- a/tests/functional/builtins/codegen/test_convert.py +++ b/tests/functional/builtins/codegen/test_convert.py @@ -224,7 +224,7 @@ def _padconvert(val_bits, direction, n, padding_byte=None): """ Takes the ABI representation of a value, and convert the padding if needed. If fill_zeroes is false, the two halves of the bytestring are just swapped - and the dirty bytes remain dirty. If fill_zeroes is true, the the padding + and the dirty bytes remain dirty. If fill_zeroes is true, the padding bytes get set to 0 """ assert len(val_bits) == 32 From 246f4a7e089a3f8ff6dff79a52b627d4fa68c1c5 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 12 Mar 2024 11:11:36 -0400 Subject: [PATCH 3/5] feat[tool]: add `node_id` map to source map (#3811) this commit adds a new, AST-based map to the source map which links program counters (pcs) directly back to the AST output. this should improve the ability of third parties to implement source code integrations (debuggers, storage map tracers, etc). refactors: - get rid of `vyper.codegen.core.getpos()` - rename `IRnode.source_pos` to `IRnode.ast_source` - refactor a couple places in codegen which were passing `IRnode`s to the `Expr` constructor - rewrote the source map compression routine a bit. it might have gotten broken but at this point the compressed source map does not seem widely used. --- docs/compiling-a-contract.rst | 5 +- .../unit/cli/vyper_json/test_compile_json.py | 26 ++++-- .../cli/vyper_json/test_output_selection.py | 10 +++ tests/unit/compiler/test_source_map.py | 58 +++++++++--- vyper/ast/nodes.py | 2 +- vyper/ast/nodes.pyi | 4 + vyper/builtins/_utils.py | 8 +- vyper/cli/vyper_json.py | 9 +- vyper/codegen/core.py | 9 -- vyper/codegen/expr.py | 43 +++++---- .../function_definitions/external_function.py | 12 +-- vyper/codegen/ir_node.py | 26 +++--- vyper/codegen/stmt.py | 41 +++------ vyper/compiler/output.py | 88 ++++++++++++------- vyper/ir/compile_ir.py | 54 ++++++------ vyper/ir/optimizer.py | 12 +-- vyper/semantics/analysis/getters.py | 3 +- 17 files changed, 239 insertions(+), 171 deletions(-) diff --git a/docs/compiling-a-contract.rst b/docs/compiling-a-contract.rst index c4e8bad636..83571203e8 100644 --- a/docs/compiling-a-contract.rst +++ b/docs/compiling-a-contract.rst @@ -275,11 +275,14 @@ The following example describes the expected input format of ``vyper-json``. Com // evm.bytecode.opcodes - Opcodes list // evm.deployedBytecode.object - Deployed bytecode object // evm.deployedBytecode.opcodes - Deployed opcodes list - // evm.deployedBytecode.sourceMap - Deployed source mapping (useful for debugging) + // evm.deployedBytecode.sourceMap - Solidity-style source mapping + // evm.deployedBytecode.sourceMapFull - Deployed source mapping (useful for debugging) // evm.methodIdentifiers - The list of function hashes // // Using `evm`, `evm.bytecode`, etc. will select every target part of that output. // Additionally, `*` can be used as a wildcard to request everything. + // Note that the sourceMapFull.pc_ast_map is the recommended source map to use; + // the other types are included for legacy and compatibility reasons. // "outputSelection": { "*": ["evm.bytecode", "abi"], // Enable the abi and bytecode outputs for every single contract diff --git a/tests/unit/cli/vyper_json/test_compile_json.py b/tests/unit/cli/vyper_json/test_compile_json.py index e5f7384068..4fe2111f43 100644 --- a/tests/unit/cli/vyper_json/test_compile_json.py +++ b/tests/unit/cli/vyper_json/test_compile_json.py @@ -151,7 +151,11 @@ def test_compile_json(input_json, input_bundle): for source_id, contract_name in [(0, "foo"), (2, "library"), (3, "bar")]: path = f"contracts/{contract_name}.vy" data = compile_code_results[path] - assert output_json["sources"][path] == {"id": source_id, "ast": data["ast_dict"]["ast"]} + assert output_json["sources"][path] == { + "id": source_id, + "ast": data["ast_dict"]["ast"], + "annotated_ast": data["annotated_ast_dict"]["ast"], + } assert output_json["contracts"][path][contract_name] == { "abi": data["abi"], "devdoc": data["devdoc"], @@ -260,15 +264,25 @@ def test_exc_handler_to_dict_compiler(input_json): def test_source_ids_increment(input_json): - input_json["settings"]["outputSelection"] = {"*": ["evm.deployedBytecode.sourceMap"]} + input_json["settings"]["outputSelection"] = {"*": ["ast", "evm.deployedBytecode.sourceMapFull"]} result = compile_json(input_json) def get(filename, contractname): - return result["contracts"][filename][contractname]["evm"]["deployedBytecode"]["sourceMap"] + ast = result["sources"][filename]["ast"] + ret = ast["source_id"] + + # grab it via source map to sanity check + contract_info = result["contracts"][filename][contractname]["evm"] + pc_ast_map = contract_info["deployedBytecode"]["sourceMapFull"]["pc_ast_map"] + pc_item = next(iter(pc_ast_map.values())) + source_id, node_id = pc_item + assert ret == source_id + + return ret - assert get("contracts/foo.vy", "foo").startswith("-1:-1:0") - assert get("contracts/library.vy", "library").startswith("-1:-1:2") - assert get("contracts/bar.vy", "bar").startswith("-1:-1:3") + assert get("contracts/foo.vy", "foo") == 0 + assert get("contracts/library.vy", "library") == 2 + assert get("contracts/bar.vy", "bar") == 3 def test_relative_import_paths(input_json): diff --git a/tests/unit/cli/vyper_json/test_output_selection.py b/tests/unit/cli/vyper_json/test_output_selection.py index 5383190a66..f7fbfe673c 100644 --- a/tests/unit/cli/vyper_json/test_output_selection.py +++ b/tests/unit/cli/vyper_json/test_output_selection.py @@ -45,6 +45,16 @@ def test_star(): assert result == {PurePath("foo.vy"): expected, PurePath("bar.vy"): expected} +def test_ast(): + input_json = { + "sources": {"foo.vy": ""}, + "settings": {"outputSelection": {"foo.vy": ["ast", "annotated_ast"]}}, + } + expected = sorted([TRANSLATE_MAP[k] for k in ["ast", "annotated_ast"]]) + result = get_output_formats(input_json) + assert result == {PurePath("foo.vy"): expected} + + def test_evm(): input_json = { "sources": {"foo.vy": ""}, diff --git a/tests/unit/compiler/test_source_map.py b/tests/unit/compiler/test_source_map.py index 5b478dd2aa..04bd141185 100644 --- a/tests/unit/compiler/test_source_map.py +++ b/tests/unit/compiler/test_source_map.py @@ -1,14 +1,18 @@ +from collections import namedtuple + from vyper.compiler import compile_code from vyper.compiler.output import _compress_source_map from vyper.compiler.utils import expand_source_map TEST_CODE = """ +x: public(uint256) + @internal def _baz(a: int128) -> int128: b: int128 = a for i: int128 in range(2, 5): b *= i - if b > 31337: + if b > 31336 + 1: break return b @@ -82,22 +86,19 @@ def update_foo(): def test_compress_source_map(): - code = """ -@external -def foo() -> uint256: - return 42 - """ + # mock the required VyperNode fields in compress_source_map + # fake_node = namedtuple("fake_node", ("lineno", "col_offset", "end_lineno", "end_col_offset")) + fake_node = namedtuple("fake_node", ["src"]) + compressed = _compress_source_map( - code, {"0": None, "2": (2, 0, 4, 13), "3": (2, 0, 2, 8), "5": (2, 0, 2, 8)}, {"3": "o"}, 2 + {2: fake_node("-1:-1:-1"), 3: fake_node("1:45"), 5: fake_node("45:49")}, {3: "o"}, 6 ) - assert compressed == "-1:-1:2:-;1:45;:8::o;" + assert compressed == "-1:-1:-1;-1:-1:-1;-1:-1:-1;1:45:o;-1:-1:-1;45:49" def test_expand_source_map(): - compressed = "-1:-1:0:-;;13:42:1;:21;::0:o;:::-;1::1;" + compressed = "13:42:1;:21;::0:o;:::-;1::1;" expanded = [ - [-1, -1, 0, "-"], - [-1, -1, 0, None], [13, 42, 1, None], [13, 21, 1, None], [13, 21, 0, "o"], @@ -105,3 +106,38 @@ def test_expand_source_map(): [1, 21, 1, None], ] assert expand_source_map(compressed) == expanded + + +def _construct_node_id_map(ast_struct): + if isinstance(ast_struct, dict): + ret = {} + if "node_id" in ast_struct: + ret[ast_struct["node_id"]] = ast_struct + for item in ast_struct.values(): + ret.update(_construct_node_id_map(item)) + return ret + + elif isinstance(ast_struct, list): + ret = {} + for item in ast_struct: + ret.update(_construct_node_id_map(item)) + return ret + + else: + return {} + + +def test_node_id_map(): + code = TEST_CODE + out = compile_code(code, output_formats=["annotated_ast_dict", "source_map", "ir"]) + assert out["source_map"]["pc_ast_map_item_keys"] == ("source_id", "node_id") + + pc_ast_map = out["source_map"]["pc_ast_map"] + + ast_node_map = _construct_node_id_map(out["annotated_ast_dict"]) + + for pc, (source_id, node_id) in pc_ast_map.items(): + assert isinstance(pc, int), pc + assert isinstance(source_id, int), source_id + assert isinstance(node_id, int), node_id + assert node_id in ast_node_map diff --git a/vyper/ast/nodes.py b/vyper/ast/nodes.py index 2ca199bd7e..02c7e15686 100644 --- a/vyper/ast/nodes.py +++ b/vyper/ast/nodes.py @@ -146,7 +146,7 @@ def _to_node(obj, parent): if isinstance(obj, VyperNode): # if object is already a vyper node, make sure the parent is set correctly # and fix any missing source offsets - obj._parent = parent + obj.set_parent(parent) for field_name in NODE_SRC_ATTRIBUTES: if getattr(obj, field_name) is None: setattr(obj, field_name, getattr(parent, field_name, None)) diff --git a/vyper/ast/nodes.pyi b/vyper/ast/nodes.pyi index 4ebb61e76e..f673bb765c 100644 --- a/vyper/ast/nodes.pyi +++ b/vyper/ast/nodes.pyi @@ -17,6 +17,10 @@ def get_node( class VyperNode: full_source_code: str = ... node_source_code: str = ... + lineno: int = ... + col_offset: int = ... + end_lineno: int = ... + end_col_offset: int = ... _metadata: dict = ... _original_node: Optional[VyperNode] = ... def __init__(self, parent: Optional[VyperNode] = ..., **kwargs: Any) -> None: ... diff --git a/vyper/builtins/_utils.py b/vyper/builtins/_utils.py index 3fad225b48..0ee7ecd0b7 100644 --- a/vyper/builtins/_utils.py +++ b/vyper/builtins/_utils.py @@ -7,10 +7,10 @@ from vyper.semantics.types.module import ModuleT -def _strip_source_pos(ir_node): - ir_node.source_pos = None +def _strip_ast_source(ir_node): + ir_node.ast_source = None for x in ir_node.args: - _strip_source_pos(x) + _strip_ast_source(x) def generate_inline_function(code, variables, variables_2, memory_allocator): @@ -38,5 +38,5 @@ def generate_inline_function(code, variables, variables_2, memory_allocator): # NOTE if we ever use this for inlining user-code, it would make # sense to fix the offsets of the source positions in the generated # code instead of stripping them. - _strip_source_pos(generated_ir) + _strip_ast_source(generated_ir) return new_context, generated_ir diff --git a/vyper/cli/vyper_json.py b/vyper/cli/vyper_json.py index 032d7ebe64..21073cabeb 100755 --- a/vyper/cli/vyper_json.py +++ b/vyper/cli/vyper_json.py @@ -17,6 +17,7 @@ TRANSLATE_MAP = { "abi": "abi", "ast": "ast_dict", + "annotated_ast": "annotated_ast_dict", "devdoc": "devdoc", "evm.methodIdentifiers": "method_identifiers", "evm.bytecode.object": "bytecode", @@ -313,8 +314,12 @@ def format_to_output_dict(compiler_data: dict) -> dict: for path, data in compiler_data.items(): path = str(path) # Path breaks json serializability output_dict["sources"][path] = {"id": data["source_id"]} - if "ast_dict" in data: - output_dict["sources"][path]["ast"] = data["ast_dict"]["ast"] + + for k in ("ast_dict", "annotated_ast_dict"): + if k in data: + # un-translate the key + k2 = k.removesuffix("_dict") + output_dict["sources"][path][k2] = data[k]["ast"] name = PurePath(path).stem output_dict["contracts"][path] = {name: {}} diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index ecf05d1a49..2cb2876088 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -432,15 +432,6 @@ def pop_dyn_array(darray_node, return_popped_item): return IRnode.from_list(b1.resolve(b2.resolve(ret)), typ=typ, location=location) -def getpos(node): - return ( - node.lineno, - node.col_offset, - getattr(node, "end_lineno", None), - getattr(node, "end_col_offset", None), - ) - - # add an offset to a pointer, keeping location and encoding info def add_ofst(ptr, ofst): ret = ["add", ptr, ofst] diff --git a/vyper/codegen/expr.py b/vyper/codegen/expr.py index d0c5154cbe..7c39a4f5cf 100644 --- a/vyper/codegen/expr.py +++ b/vyper/codegen/expr.py @@ -13,7 +13,6 @@ ensure_in_memory, get_dyn_array_count, get_element_ptr, - getpos, is_array_like, is_bytes_m_type, is_flag_type, @@ -72,13 +71,6 @@ class Expr: # TODO: Once other refactors are made reevaluate all inline imports def __init__(self, node, context, is_stmt=False): - if isinstance(node, IRnode): - # this is a kludge for parse_AugAssign to pass in IRnodes - # directly. - # TODO fixme! - self.ir_node = node - return - assert isinstance(node, vy_ast.VyperNode) if node.has_folded_value: node = node.get_folded_value() @@ -94,7 +86,7 @@ def __init__(self, node, context, is_stmt=False): assert isinstance(self.ir_node, IRnode), self.ir_node self.ir_node.annotation = self.expr.get("node_source_code") - self.ir_node.source_pos = getpos(self.expr) + self.ir_node.ast_source = self.expr def parse_Int(self): typ = self.expr._metadata["type"] @@ -382,7 +374,14 @@ def parse_BinOp(self): left = Expr.parse_value_expr(self.expr.left, self.context) right = Expr.parse_value_expr(self.expr.right, self.context) - is_shift_op = isinstance(self.expr.op, (vy_ast.LShift, vy_ast.RShift)) + return Expr.handle_binop(self.expr.op, left, right, self.context) + + @classmethod + def handle_binop(cls, op, left, right, context): + assert not left.is_pointer + assert not right.is_pointer + + is_shift_op = isinstance(op, (vy_ast.LShift, vy_ast.RShift)) if is_shift_op: assert is_numeric_type(left.typ) @@ -391,25 +390,25 @@ def parse_BinOp(self): # Sanity check - ensure that we aren't dealing with different types # This should be unreachable due to the type check pass if left.typ != right.typ: - raise TypeCheckFailure(f"unreachable, {left.typ} != {right.typ}", self.expr) + raise TypeCheckFailure(f"unreachable: {left.typ} != {right.typ}") assert is_numeric_type(left.typ) or is_flag_type(left.typ) out_typ = left.typ - if isinstance(self.expr.op, vy_ast.BitAnd): + if isinstance(op, vy_ast.BitAnd): return IRnode.from_list(["and", left, right], typ=out_typ) - if isinstance(self.expr.op, vy_ast.BitOr): + if isinstance(op, vy_ast.BitOr): return IRnode.from_list(["or", left, right], typ=out_typ) - if isinstance(self.expr.op, vy_ast.BitXor): + if isinstance(op, vy_ast.BitXor): return IRnode.from_list(["xor", left, right], typ=out_typ) - if isinstance(self.expr.op, vy_ast.LShift): + if isinstance(op, vy_ast.LShift): new_typ = left.typ if new_typ.bits != 256: # TODO implement me. ["and", 2**bits - 1, shl(right, left)] raise TypeCheckFailure("unreachable") return IRnode.from_list(shl(right, left), typ=new_typ) - if isinstance(self.expr.op, vy_ast.RShift): + if isinstance(op, vy_ast.RShift): new_typ = left.typ if new_typ.bits != 256: # TODO implement me. promote_signed_int(op(right, left), bits) @@ -421,17 +420,17 @@ def parse_BinOp(self): assert is_numeric_type(left.typ) with left.cache_when_complex("x") as (b1, x), right.cache_when_complex("y") as (b2, y): - if isinstance(self.expr.op, vy_ast.Add): + if isinstance(op, vy_ast.Add): ret = arithmetic.safe_add(x, y) - elif isinstance(self.expr.op, vy_ast.Sub): + elif isinstance(op, vy_ast.Sub): ret = arithmetic.safe_sub(x, y) - elif isinstance(self.expr.op, vy_ast.Mult): + elif isinstance(op, vy_ast.Mult): ret = arithmetic.safe_mul(x, y) - elif isinstance(self.expr.op, (vy_ast.Div, vy_ast.FloorDiv)): + elif isinstance(op, (vy_ast.Div, vy_ast.FloorDiv)): ret = arithmetic.safe_div(x, y) - elif isinstance(self.expr.op, vy_ast.Mod): + elif isinstance(op, vy_ast.Mod): ret = arithmetic.safe_mod(x, y) - elif isinstance(self.expr.op, vy_ast.Pow): + elif isinstance(op, vy_ast.Pow): ret = arithmetic.safe_pow(x, y) else: # pragma: nocover raise CompilerPanic("Unreachable") diff --git a/vyper/codegen/function_definitions/external_function.py b/vyper/codegen/function_definitions/external_function.py index b380eab2ce..6f783bb9c5 100644 --- a/vyper/codegen/function_definitions/external_function.py +++ b/vyper/codegen/function_definitions/external_function.py @@ -1,6 +1,6 @@ from vyper.codegen.abi_encoder import abi_encoding_matches_vyper from vyper.codegen.context import Context, VariableRecord -from vyper.codegen.core import get_element_ptr, getpos, make_setter, needs_clamp +from vyper.codegen.core import get_element_ptr, make_setter, needs_clamp from vyper.codegen.expr import Expr from vyper.codegen.function_definitions.common import ( EntryPointInfo, @@ -39,7 +39,7 @@ def _register_function_args(func_t: ContractFunctionT, context: Context) -> list dst = IRnode(p, typ=arg.typ, location=MEMORY) copy_arg = make_setter(dst, arg_ir) - copy_arg.source_pos = getpos(arg.ast_source) + copy_arg.ast_source = arg.ast_source ret.append(copy_arg) else: assert abi_encoding_matches_vyper(arg.typ) @@ -101,18 +101,18 @@ def handler_for(calldata_kwargs, default_kwargs): rhs = get_element_ptr(calldata_kwargs_ofst, k, array_bounds_check=False) copy_arg = make_setter(lhs, rhs) - copy_arg.source_pos = getpos(arg_meta.ast_source) + copy_arg.ast_source = arg_meta.ast_source ret.append(copy_arg) for x in default_kwargs: dst = context.lookup_var(x.name).pos lhs = IRnode(dst, location=MEMORY, typ=x.typ) - lhs.source_pos = getpos(x.ast_source) + lhs.ast_source = x.ast_source kw_ast_val = func_t.default_values[x.name] # e.g. `3` in x: int = 3 rhs = Expr(kw_ast_val, context).ir_node copy_arg = make_setter(lhs, rhs) - copy_arg.source_pos = getpos(x.ast_source) + copy_arg.ast_source = x.ast_source ret.append(copy_arg) ret.append(["goto", func_t._ir_info.external_function_base_entry_label]) @@ -210,7 +210,7 @@ def generate_ir_for_external_function(code, compilation_target): # the ir which comprises the main body of the function, # besides any kwarg handling - func_common_ir = IRnode.from_list(["seq", body, exit_], source_pos=getpos(code)) + func_common_ir = IRnode.from_list(["seq", body, exit_], ast_source=code) tag_frame_info(func_t, context) diff --git a/vyper/codegen/ir_node.py b/vyper/codegen/ir_node.py index 1df2932da1..14e396ff74 100644 --- a/vyper/codegen/ir_node.py +++ b/vyper/codegen/ir_node.py @@ -3,8 +3,9 @@ import re from enum import Enum, auto from functools import cached_property -from typing import Any, List, Optional, Tuple, Union +from typing import Any, List, Optional, Union +import vyper.ast as vy_ast from vyper.compiler.settings import VYPER_COLOR_OUTPUT from vyper.evm.address_space import AddrSpace from vyper.evm.opcodes import get_ir_opcodes @@ -144,7 +145,7 @@ def __init__( args: List["IRnode"] = None, typ: VyperType = None, location: Optional[AddrSpace] = None, - source_pos: Optional[Tuple[int, int]] = None, + ast_source: Optional[vy_ast.VyperNode] = None, annotation: Optional[str] = None, error_msg: Optional[str] = None, mutable: bool = True, @@ -162,7 +163,7 @@ def __init__( assert isinstance(typ, VyperType) or typ is None, repr(typ) self.typ = typ self.location = location - self.source_pos = source_pos + self.ast_source = ast_source self.error_msg = error_msg self.annotation = annotation self.mutable = mutable @@ -478,11 +479,8 @@ def __eq__(self, other): and self.args == other.args and self.typ == other.typ and self.location == other.location - and self.source_pos == other.source_pos - and self.annotation == other.annotation and self.mutable == other.mutable and self.add_gas_estimate == other.add_gas_estimate - and self.valency == other.valency ) @property @@ -516,13 +514,13 @@ def repr(self) -> str: if self.repr_show_gas and self.gas: o += OKBLUE + "{" + ENDC + str(self.gas) + OKBLUE + "} " + ENDC # add gas for info. o += "[" + self._colorise_keywords(self.repr_value) - prev_lineno = self.source_pos[0] if self.source_pos else None + prev_lineno = self.ast_source.lineno if self.ast_source else None arg_lineno = None annotated = False has_inner_newlines = False for arg in self.args: o += ",\n " - arg_lineno = arg.source_pos[0] if arg.source_pos else None + arg_lineno = arg.ast_source.lineno if arg.ast_source else None if arg_lineno is not None and arg_lineno != prev_lineno and self.value in ("seq", "if"): o += f"# Line {(arg_lineno)}\n " prev_lineno = arg_lineno @@ -553,7 +551,7 @@ def from_list( obj: Any, typ: VyperType = None, location: Optional[AddrSpace] = None, - source_pos: Optional[Tuple[int, int]] = None, + ast_source: Optional[vy_ast.VyperNode] = None, annotation: Optional[str] = None, error_msg: Optional[str] = None, mutable: bool = True, @@ -570,8 +568,8 @@ def from_list( # the input gets modified. CC 20191121. if typ is not None: obj.typ = typ - if obj.source_pos is None: - obj.source_pos = source_pos + if obj.ast_source is None: + obj.ast_source = ast_source if obj.location is None: obj.location = location if obj.encoding is None: @@ -589,7 +587,7 @@ def from_list( annotation=annotation, mutable=mutable, add_gas_estimate=add_gas_estimate, - source_pos=source_pos, + ast_source=ast_source, encoding=encoding, error_msg=error_msg, is_self_call=is_self_call, @@ -598,12 +596,12 @@ def from_list( else: return cls( obj[0], - [cls.from_list(o, source_pos=source_pos) for o in obj[1:]], + [cls.from_list(o, ast_source=ast_source) for o in obj[1:]], typ, location=location, annotation=annotation, mutable=mutable, - source_pos=source_pos, + ast_source=ast_source, add_gas_estimate=add_gas_estimate, encoding=encoding, error_msg=error_msg, diff --git a/vyper/codegen/stmt.py b/vyper/codegen/stmt.py index f658dc92b9..1da31d3bda 100644 --- a/vyper/codegen/stmt.py +++ b/vyper/codegen/stmt.py @@ -9,7 +9,6 @@ clamp_le, get_dyn_array_count, get_element_ptr, - getpos, make_byte_array_copier, make_setter, zero_pad, @@ -42,7 +41,7 @@ def __init__(self, node: vy_ast.VyperNode, context: Context) -> None: assert isinstance(self.ir_node, IRnode), self.ir_node self.ir_node.annotation = self.stmt.get("node_source_code") - self.ir_node.source_pos = getpos(self.stmt) + self.ir_node.ast_source = self.stmt def parse_Expr(self): return Expr(self.stmt.value, self.context, is_stmt=True).ir_node @@ -197,20 +196,19 @@ def _parse_For_range(self): assert "type" in self.stmt.target.target._metadata target_type = self.stmt.target.target._metadata["type"] - # Get arg0 range_call: vy_ast.Call = self.stmt.iter assert isinstance(range_call, vy_ast.Call) - args_len = len(range_call.args) - if args_len == 1: - arg0, arg1 = (IRnode.from_list(0, typ=target_type), range_call.args[0]) - elif args_len == 2: - arg0, arg1 = range_call.args - else: # pragma: nocover - raise TypeCheckFailure("unreachable: bad # of arguments to range()") with self.context.range_scope(): - start = Expr.parse_value_expr(arg0, self.context) - end = Expr.parse_value_expr(arg1, self.context) + args = [Expr.parse_value_expr(arg, self.context) for arg in range_call.args] + if len(args) == 1: + start = IRnode.from_list(0, typ=target_type) + end = args[0] + elif len(args) == 2: + start, end = args + else: # pragma: nocover + raise TypeCheckFailure("unreachable") + kwargs = { s.arg: Expr.parse_value_expr(s.value, self.context) for s in range_call.keywords } @@ -300,8 +298,8 @@ def _parse_For_list(self): def parse_AugAssign(self): target = self._get_target(self.stmt.target) + right = Expr.parse_value_expr(self.stmt.value, self.context) - sub = Expr.parse_value_expr(self.stmt.value, self.context) if not target.typ._is_prim_word: # because of this check, we do not need to check for # make_setter references lhs<->rhs as in parse_Assign - @@ -309,20 +307,9 @@ def parse_AugAssign(self): raise TypeCheckFailure("unreachable") with target.cache_when_complex("_loc") as (b, target): - rhs = Expr.parse_value_expr( - vy_ast.BinOp( - left=IRnode.from_list(LOAD(target), typ=target.typ), - right=sub, - op=self.stmt.op, - lineno=self.stmt.lineno, - col_offset=self.stmt.col_offset, - end_lineno=self.stmt.end_lineno, - end_col_offset=self.stmt.end_col_offset, - node_source_code=self.stmt.get("node_source_code"), - ), - self.context, - ) - return b.resolve(STORE(target, rhs)) + left = IRnode.from_list(LOAD(target), typ=target.typ) + new_val = Expr.handle_binop(self.stmt.op, left, right, self.context) + return b.resolve(STORE(target, new_val)) def parse_Continue(self): return IRnode.from_list("continue") diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index 707c99291b..de8e34370d 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -1,9 +1,7 @@ import warnings -from collections import OrderedDict, deque +from collections import deque from pathlib import PurePath -import asttokens - from vyper.ast import ast_to_dict, parse_natspec from vyper.codegen.ir_node import IRnode from vyper.compiler.phases import CompilerData @@ -237,46 +235,72 @@ def _build_asm(asm_list): return output_string -def build_source_map_output(compiler_data: CompilerData) -> OrderedDict: - _, line_number_map = compile_ir.assembly_to_evm( - compiler_data.assembly_runtime, insert_compiler_metadata=False - ) - # Sort line_number_map - out = OrderedDict() - for k in sorted(line_number_map.keys()): - out[k] = line_number_map[k] +def _build_node_identifier(ast_node): + assert ast_node.module_node is not None, type(ast_node) + return (ast_node.module_node.source_id, ast_node.node_id) - out["pc_pos_map_compressed"] = _compress_source_map( - compiler_data.source_code, out["pc_pos_map"], out["pc_jump_map"], compiler_data.source_id + +def build_source_map_output(compiler_data: CompilerData) -> dict: + """ + Generate source map output in various formats. Note that integrations + are encouraged to use pc_ast_map since the information it provides is + a superset of the other formats, and the other types are included + for legacy reasons. + """ + bytecode, pc_maps = compile_ir.assembly_to_evm( + compiler_data.assembly_runtime, insert_compiler_metadata=False ) - out["pc_pos_map"] = dict((k, v) for k, v in out["pc_pos_map"].items() if v) + # sort the pc maps alphabetically + # CMC 2024-03-09 is this really necessary? + out = {} + for k in sorted(pc_maps.keys()): + out[k] = pc_maps[k] + + ast_map = out.pop("pc_raw_ast_map") + + assert isinstance(ast_map, dict) # lint + if 0 not in ast_map: + # tag it with source id + ast_map[0] = compiler_data.annotated_vyper_module + + pc_pos_map = {k: compile_ir.getpos(v) for (k, v) in ast_map.items()} + node_id_map = {k: _build_node_identifier(v) for (k, v) in ast_map.items()} + compressed_map = _compress_source_map(ast_map, out["pc_jump_map"], len(bytecode)) + out["pc_pos_map_compressed"] = compressed_map + out["pc_pos_map"] = pc_pos_map + out["pc_ast_map"] = node_id_map + # hint to consumers what the fields in pc_ast_map mean + out["pc_ast_map_item_keys"] = ("source_id", "node_id") return out -def _compress_source_map(code, pos_map, jump_map, source_id): - linenos = asttokens.LineNumbers(code) - ret = [f"-1:-1:{source_id}:-"] - last_pos = [-1, -1, source_id] +# generate a solidity-style source map. this functionality is deprecated +# in favor of pc_ast_map, and may not be maintained to the same level +# as pc_ast_map. +def _compress_source_map(ast_map, jump_map, bytecode_size): + ret = [] - for pc in sorted(pos_map)[1:]: - current_pos = [-1, -1, source_id] - if pos_map[pc]: - current_pos[0] = linenos.line_to_offset(*pos_map[pc][:2]) - current_pos[1] = linenos.line_to_offset(*pos_map[pc][2:]) - current_pos[0] + jump_map = jump_map.copy() + ast_map = ast_map.copy() - if pc in jump_map: - current_pos.append(jump_map[pc]) + for pc in range(bytecode_size): + if pc in ast_map: + ast_node = ast_map.pop(pc) + # ast_node.src conveniently has the current position in + # the correct, compressed format + current_pos = [ast_node.src] + else: + current_pos = ["-1:-1:-1"] - for i in range(2, -1, -1): - if current_pos[i] != last_pos[i]: - last_pos[i] = current_pos[i] - elif len(current_pos) == i + 1: - current_pos.pop() - else: - current_pos[i] = "" + if pc in jump_map: + jump_type = jump_map.pop(pc) + current_pos.append(jump_type) ret.append(":".join(str(i) for i in current_pos)) + assert len(ast_map) == 0, ast_map + assert len(jump_map) == 0, jump_map + return ";".join(ret) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index ac8631ff7b..e4a4cc60f7 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -54,8 +54,8 @@ def mksymbol(name=""): return f"_sym_{name}{_next_symbol}" -def mkdebug(pc_debugger, source_pos): - i = Instruction("DEBUG", source_pos) +def mkdebug(pc_debugger, ast_source): + i = Instruction("DEBUG", ast_source) i.pc_debugger = pc_debugger return [i] @@ -133,7 +133,7 @@ def _rewrite_return_sequences(ir_node, label_params=None): # works for both internal and external exit_to more_args = ["pass" if t.value == "return_pc" else t for t in args[1:]] _t.append(["goto", dest] + more_args) - ir_node.args = IRnode.from_list(_t, source_pos=ir_node.source_pos).args + ir_node.args = IRnode.from_list(_t, ast_source=ir_node.ast_source).args if ir_node.value == "label": label_params = set(t.value for t in ir_node.args[1].args) @@ -187,14 +187,11 @@ class Instruction(str): def __new__(cls, sstr, *args, **kwargs): return super().__new__(cls, sstr) - def __init__(self, sstr, source_pos=None, error_msg=None): + def __init__(self, sstr, ast_source=None, error_msg=None): self.error_msg = error_msg self.pc_debugger = False - if source_pos is not None: - self.lineno, self.col_offset, self.end_lineno, self.end_col_offset = source_pos - else: - self.lineno, self.col_offset, self.end_lineno, self.end_col_offset = [None] * 4 + self.ast_source = ast_source def apply_line_numbers(func): @@ -204,7 +201,7 @@ def apply_line_no_wrapper(*args, **kwargs): ret = func(*args, **kwargs) new_ret = [ - Instruction(i, code.source_pos, code.error_msg) + Instruction(i, code.ast_source, code.error_msg) if isinstance(i, str) and not isinstance(i, Instruction) else i for i in ret @@ -765,37 +762,38 @@ def _height_of(witharg): # inject debug opcode. elif code.value == "debugger": - return mkdebug(pc_debugger=False, source_pos=code.source_pos) + return mkdebug(pc_debugger=False, ast_source=code.ast_source) # inject debug opcode. elif code.value == "pc_debugger": - return mkdebug(pc_debugger=True, source_pos=code.source_pos) + return mkdebug(pc_debugger=True, ast_source=code.ast_source) else: # pragma: no cover raise ValueError(f"Weird code element: {type(code)} {code}") -def note_line_num(line_number_map, item, pos): - # Record line number attached to pos. - if isinstance(item, Instruction): - if item.lineno is not None: - offsets = (item.lineno, item.col_offset, item.end_lineno, item.end_col_offset) - else: - offsets = None +def getpos(node): + return (node.lineno, node.col_offset, node.end_lineno, node.end_col_offset) - line_number_map["pc_pos_map"][pos] = offsets + +def note_line_num(line_number_map, pc, item): + # Record AST attached to pc + if isinstance(item, Instruction): + if (ast_node := item.ast_source) is not None: + ast_node = ast_node.get_original_node() + if hasattr(ast_node, "node_id"): + line_number_map["pc_raw_ast_map"][pc] = ast_node if item.error_msg is not None: - line_number_map["error_map"][pos] = item.error_msg + line_number_map["error_map"][pc] = item.error_msg - added_line_breakpoint = note_breakpoint(line_number_map, item, pos) - return added_line_breakpoint + note_breakpoint(line_number_map, pc, item) -def note_breakpoint(line_number_map, item, pos): - # Record line number attached to pos. +def note_breakpoint(line_number_map, pc, item): + # Record line number attached to pc if item == "DEBUG": # Is PC debugger, create PC breakpoint. if item.pc_debugger: - line_number_map["pc_breakpoints"].add(pos) + line_number_map["pc_breakpoints"].add(pc) # Create line number breakpoint. else: line_number_map["breakpoints"].add(item.lineno + 1) @@ -1064,7 +1062,7 @@ def adjust_pc_maps(pc_maps, ofst): ret["breakpoints"] = pc_maps["breakpoints"].copy() ret["pc_breakpoints"] = {pc + ofst for pc in pc_maps["pc_breakpoints"]} ret["pc_jump_map"] = {k + ofst: v for (k, v) in pc_maps["pc_jump_map"].items()} - ret["pc_pos_map"] = {k + ofst: v for (k, v) in pc_maps["pc_pos_map"].items()} + ret["pc_raw_ast_map"] = {k + ofst: v for (k, v) in pc_maps["pc_raw_ast_map"].items()} ret["error_map"] = {k + ofst: v for (k, v) in pc_maps["error_map"].items()} return ret @@ -1171,7 +1169,7 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_compiler_metadat "breakpoints": set(), "pc_breakpoints": set(), "pc_jump_map": {0: "-"}, - "pc_pos_map": {}, + "pc_raw_ast_map": {}, "error_map": {}, } @@ -1213,7 +1211,7 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_compiler_metadat # go through the code, resolving symbolic locations # (i.e. JUMPDEST locations) to actual code locations for i, item in enumerate(assembly): - note_line_num(line_number_map, item, pc) + note_line_num(line_number_map, pc, item) if item == "DEBUG": continue # skip debug diff --git a/vyper/ir/optimizer.py b/vyper/ir/optimizer.py index 75e9b46783..7ff5390e4b 100644 --- a/vyper/ir/optimizer.py +++ b/vyper/ir/optimizer.py @@ -436,7 +436,7 @@ def _optimize(node: IRnode, parent: Optional[IRnode]) -> Tuple[bool, IRnode]: value = node.value typ = node.typ location = node.location - source_pos = node.source_pos + ast_source = node.ast_source error_msg = node.error_msg annotation = node.annotation add_gas_estimate = node.add_gas_estimate @@ -460,7 +460,7 @@ def finalize(val, args): ir_builder, typ=typ, location=location, - source_pos=source_pos, + ast_source=ast_source, error_msg=error_msg, annotation=annotation, add_gas_estimate=add_gas_estimate, @@ -552,7 +552,7 @@ def finalize(val, args): if _evm_int(argz[0]) == 0: raise StaticAssertionException( f"assertion found to fail at compile time. (hint: did you mean `raise`?) {node}", - source_pos, + ast_source, ) else: changed = True @@ -615,7 +615,7 @@ def _merge_memzero(argz): changed = True new_ir = IRnode.from_list( ["calldatacopy", initial_offset, "calldatasize", total_length], - source_pos=mstore_nodes[0].source_pos, + ast_source=mstore_nodes[0].ast_source, ) # replace first zero'ing operation with optimized node and remove the rest argz[idx] = new_ir @@ -658,7 +658,7 @@ def _rewrite_mstore_dload(argz): dst = arg.args[0] src = arg.args[1].args[0] len_ = 32 - argz[i] = IRnode.from_list(["dloadbytes", dst, src, len_], source_pos=arg.source_pos) + argz[i] = IRnode.from_list(["dloadbytes", dst, src, len_], ast_source=arg.ast_source) changed = True return changed @@ -716,7 +716,7 @@ def _merge_load(argz, _LOAD, _COPY, allow_overlap=True): changed = True new_ir = IRnode.from_list( [_COPY, initial_dst_offset, initial_src_offset, total_length], - source_pos=mstore_nodes[0].source_pos, + ast_source=mstore_nodes[0].ast_source, ) # replace first copy operation with optimized node and remove the rest argz[idx] = new_ir diff --git a/vyper/semantics/analysis/getters.py b/vyper/semantics/analysis/getters.py index bce64987da..ad5c8227cb 100644 --- a/vyper/semantics/analysis/getters.py +++ b/vyper/semantics/analysis/getters.py @@ -63,8 +63,7 @@ def generate_public_variable_getters(vyper_module: vy_ast.Module) -> None: # after iterating the input types, the remaining annotation node is our return type return_annotation = copy.copy(annotation) - # join everything together as a new `FunctionDef` node, annotate it - # with the type, and append it to the existing `Module` node + # join everything together as a new `FunctionDef` node expanded = vy_ast.FunctionDef( name=funcname, args=vy_ast.arguments(args=input_nodes, defaults=[]), From 9cfe7b4b6dc5db2ec0ca03b18517479f28a16791 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 12 Mar 2024 11:14:48 -0400 Subject: [PATCH 4/5] feat[lang]: allow downcasting of bytestrings (#3832) this commit extends `convert()` to allow downcasting of Bytes/Strings, i.e. converting `Bytes[20]` to `Bytes[19]`. this improves the UX of bytestrings somewhat, since currently (prior to this commit) there is no type-safe way to decrease the size of a bytestring in vyper. it also prepares us a little bit for adding generic bytestrings inside the type system (`Bytes[...]`) which can only be user-instantiated by `convert`ing to a known length. --- .../builtins/codegen/test_convert.py | 61 +++++++++++++++++-- vyper/builtins/_convert.py | 30 +++++---- 2 files changed, 76 insertions(+), 15 deletions(-) diff --git a/tests/functional/builtins/codegen/test_convert.py b/tests/functional/builtins/codegen/test_convert.py index 73b24de8a5..ad1a616300 100644 --- a/tests/functional/builtins/codegen/test_convert.py +++ b/tests/functional/builtins/codegen/test_convert.py @@ -8,6 +8,7 @@ import eth.codecs.abi.exceptions import pytest +from vyper.compiler import compile_code from vyper.exceptions import InvalidLiteral, InvalidType, TypeMismatch from vyper.semantics.types import AddressT, BoolT, BytesM_T, BytesT, DecimalT, IntegerT, StringT from vyper.semantics.types.shortcuts import BYTES20_T, BYTES32_T, UINT, UINT160_T, UINT256_T @@ -560,14 +561,15 @@ def foo(x: {i_typ}) -> {o_typ}: assert_compile_failed(lambda: get_contract(code), TypeMismatch) -@pytest.mark.parametrize("typ", sorted(TEST_TYPES)) -def test_bytes_too_large_cases(get_contract, assert_compile_failed, typ): +@pytest.mark.parametrize("typ", sorted(BASE_TYPES)) +def test_bytes_too_large_cases(typ): code_1 = f""" @external def foo(x: Bytes[33]) -> {typ}: return convert(x, {typ}) """ - assert_compile_failed(lambda: get_contract(code_1), TypeMismatch) + with pytest.raises(TypeMismatch): + compile_code(code_1) bytes_33 = b"1" * 33 code_2 = f""" @@ -575,8 +577,59 @@ def foo(x: Bytes[33]) -> {typ}: def foo() -> {typ}: return convert({bytes_33}, {typ}) """ + with pytest.raises(TypeMismatch): + compile_code(code_2) - assert_compile_failed(lambda: get_contract(code_2, TypeMismatch)) + +@pytest.mark.parametrize("cls1,cls2", itertools.product((StringT, BytesT), (StringT, BytesT))) +def test_bytestring_conversions(cls1, cls2, get_contract, tx_failed): + typ1 = cls1(33) + typ2 = cls2(32) + + def bytestring(cls, string): + if cls == BytesT: + return string.encode("utf-8") + return string + + code_1 = f""" +@external +def foo(x: {typ1}) -> {typ2}: + return convert(x, {typ2}) + """ + c = get_contract(code_1) + + for i in range(33): # inclusive 32 + s = "1" * i + arg = bytestring(cls1, s) + out = bytestring(cls2, s) + assert c.foo(arg) == out + + with tx_failed(): + # TODO: sanity check it is convert which is reverting, not arg clamping + c.foo(bytestring(cls1, "1" * 33)) + + code_2_template = """ +@external +def foo() -> {typ}: + return convert({arg}, {typ}) + """ + + # test literals + for i in range(33): # inclusive 32 + s = "1" * i + arg = bytestring(cls1, s) + out = bytestring(cls2, s) + code = code_2_template.format(typ=typ2, arg=repr(arg)) + if cls1 == cls2: # ex.: can't convert "" to String[32] + with pytest.raises(InvalidType): + compile_code(code) + else: + c = get_contract(code) + assert c.foo() == out + + failing_code = code_2_template.format(typ=typ2, arg=bytestring(cls1, "1" * 33)) + with pytest.raises(TypeMismatch): + compile_code(failing_code) @pytest.mark.parametrize("n", range(1, 33)) diff --git a/vyper/builtins/_convert.py b/vyper/builtins/_convert.py index 98c4fa7219..aa53dee429 100644 --- a/vyper/builtins/_convert.py +++ b/vyper/builtins/_convert.py @@ -422,23 +422,31 @@ def to_address(expr, arg, out_typ): return IRnode.from_list(ret, out_typ) -# question: should we allow bytesM -> String? -@_input_types(BytesT) -def to_string(expr, arg, out_typ): - _check_bytes(expr, arg, out_typ, out_typ.maxlen) +def _cast_bytestring(expr, arg, out_typ): + # ban converting Bytes[20] to Bytes[21] + if isinstance(arg.typ, out_typ.__class__) and arg.typ.maxlen <= out_typ.maxlen: + _FAIL(arg.typ, out_typ, expr) + # can't downcast literals with known length (e.g. b"abc" to Bytes[2]) + if isinstance(expr, vy_ast.Constant) and arg.typ.maxlen > out_typ.maxlen: + _FAIL(arg.typ, out_typ, expr) + ret = ["seq"] + if out_typ.maxlen < arg.typ.maxlen: + ret.append(["assert", ["le", get_bytearray_length(arg), out_typ.maxlen]]) + ret.append(arg) # NOTE: this is a pointer cast - return IRnode.from_list(arg, typ=out_typ) + return IRnode.from_list(ret, typ=out_typ, location=arg.location, encoding=arg.encoding) -@_input_types(StringT) -def to_bytes(expr, arg, out_typ): - _check_bytes(expr, arg, out_typ, out_typ.maxlen) +# question: should we allow bytesM -> String? +@_input_types(BytesT, StringT) +def to_string(expr, arg, out_typ): + return _cast_bytestring(expr, arg, out_typ) - # TODO: more casts - # NOTE: this is a pointer cast - return IRnode.from_list(arg, typ=out_typ) +@_input_types(StringT, BytesT) +def to_bytes(expr, arg, out_typ): + return _cast_bytestring(expr, arg, out_typ) @_input_types(IntegerT) From a9ee64149312cd22f324786fb3712bed6d3a663a Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 12 Mar 2024 11:24:00 -0400 Subject: [PATCH 5/5] feat: drop istanbul and berlin support (#3843) this commit drops explicit support for the istanbul and berlin hard forks per the three year rule suggested in VIP 3365 - istanbul hard fork was 2019-12-09, over 4 years ago - berlin hard fork was 2021-04-15, which should be 3 years ago by the time of 0.4.0 release (or if we can release sooner, shortly after) this commit also changes the nonreentrant key values for cancun (since `PUSH0` takes 1 less byte in the bytecode). --- docs/compiling-a-contract.rst | 16 ++-------- tests/functional/syntax/test_self_balance.py | 5 +--- .../unit/cli/vyper_json/test_get_settings.py | 4 ++- tests/unit/compiler/test_opcodes.py | 14 ++++----- tests/unit/compiler/test_pre_parser.py | 4 +-- vyper/cli/vyper_json.py | 4 ++- vyper/codegen/expr.py | 11 +------ vyper/codegen/function_definitions/common.py | 10 +++---- vyper/evm/opcodes.py | 30 +++++++++---------- 9 files changed, 38 insertions(+), 60 deletions(-) diff --git a/docs/compiling-a-contract.rst b/docs/compiling-a-contract.rst index 83571203e8..fdcb8b7271 100644 --- a/docs/compiling-a-contract.rst +++ b/docs/compiling-a-contract.rst @@ -173,20 +173,10 @@ When using the JSON interface, you can include the ``"evmVersion"`` key within t Target Options -------------- -The following is a list of supported EVM versions, and changes in the compiler introduced with each version. Backward compatibility is not guaranteed between each version. +The following is a list of supported EVM versions, and changes in the compiler introduced with each version. Backward compatibility is not guaranteed between each version. In general, the compiler team maintains an informal policy that the compiler will support 3 years of hard fork rulesets, but this policy may be revisited as appropriate. -.. py:attribute:: istanbul - - - The ``CHAINID`` opcode is accessible via ``chain.id`` - - The ``SELFBALANCE`` opcode is used for calls to ``self.balance`` - - Gas estimates changed for ``SLOAD`` and ``BALANCE`` - -.. py:attribute:: berlin - - - Gas estimates changed for ``EXTCODESIZE``, ``EXTCODECOPY``, ``EXTCODEHASH``, ``SLOAD``, ``SSTORE``, ``CALL``, ``CALLCODE``, ``DELEGATECALL`` and ``STATICCALL`` - - Functions marked with ``@nonreentrant`` are protected with different values (3 and 2) than contracts targeting pre-berlin. - - ``BASEFEE`` is accessible via ``block.basefee`` +.. py:attribute:: london .. py:attribute:: paris @@ -247,7 +237,7 @@ The following example describes the expected input format of ``vyper-json``. Com }, // Optional "settings": { - "evmVersion": "shanghai", // EVM version to compile for. Can be istanbul, berlin, paris, shanghai (default) or cancun (experimental!). + "evmVersion": "shanghai", // EVM version to compile for. Can be london, paris, shanghai (default) or cancun (experimental!). // optional, optimization mode // defaults to "gas". can be one of "gas", "codesize", "none", // false and true (the last two are for backwards compatibility). diff --git a/tests/functional/syntax/test_self_balance.py b/tests/functional/syntax/test_self_balance.py index d22d8a2750..28cbd05453 100644 --- a/tests/functional/syntax/test_self_balance.py +++ b/tests/functional/syntax/test_self_balance.py @@ -21,10 +21,7 @@ def __default__(): """ settings = Settings(evm_version=evm_version) opcodes = compiler.compile_code(code, output_formats=["opcodes"], settings=settings)["opcodes"] - if EVM_VERSIONS[evm_version] >= EVM_VERSIONS["istanbul"]: - assert "SELFBALANCE" in opcodes - else: - assert "SELFBALANCE" not in opcodes + assert "SELFBALANCE" in opcodes c = get_contract_with_gas_estimation(code, evm_version=evm_version) w3.eth.send_transaction({"to": c.address, "value": 1337}) diff --git a/tests/unit/cli/vyper_json/test_get_settings.py b/tests/unit/cli/vyper_json/test_get_settings.py index 989d4565cd..975cb9d143 100644 --- a/tests/unit/cli/vyper_json/test_get_settings.py +++ b/tests/unit/cli/vyper_json/test_get_settings.py @@ -18,6 +18,8 @@ def test_unknown_evm(): "byzantium", "constantinople", "petersburg", + "istanbul", + "berlin", ], ) def test_early_evm(evm_version): @@ -25,6 +27,6 @@ def test_early_evm(evm_version): get_evm_version({"settings": {"evmVersion": evm_version}}) -@pytest.mark.parametrize("evm_version", ["istanbul", "berlin", "paris", "shanghai", "cancun"]) +@pytest.mark.parametrize("evm_version", ["london", "paris", "shanghai", "cancun"]) def test_valid_evm(evm_version): assert evm_version == get_evm_version({"settings": {"evmVersion": evm_version}}) diff --git a/tests/unit/compiler/test_opcodes.py b/tests/unit/compiler/test_opcodes.py index ed64f343c4..710348a274 100644 --- a/tests/unit/compiler/test_opcodes.py +++ b/tests/unit/compiler/test_opcodes.py @@ -38,10 +38,10 @@ def test_version_check(evm_version): assert opcodes.version_check(begin=evm_version) assert opcodes.version_check(end=evm_version) assert opcodes.version_check(begin=evm_version, end=evm_version) - if evm_version not in ("istanbul"): - assert not opcodes.version_check(end="istanbul") - istanbul_check = opcodes.version_check(begin="istanbul") - assert istanbul_check == (opcodes.EVM_VERSIONS[evm_version] >= opcodes.EVM_VERSIONS["istanbul"]) + if evm_version not in ("london",): + assert not opcodes.version_check(end="london") + london_check = opcodes.version_check(begin="london") + assert london_check == (opcodes.EVM_VERSIONS[evm_version] >= opcodes.EVM_VERSIONS["london"]) def test_get_opcodes(evm_version): @@ -50,11 +50,7 @@ def test_get_opcodes(evm_version): assert "CHAINID" in ops assert ops["CREATE2"][-1] == 32000 - if evm_version in ("london", "berlin", "paris", "shanghai", "cancun"): - assert ops["SLOAD"][-1] == 2100 - else: - assert evm_version == "istanbul" - assert ops["SLOAD"][-1] == 800 + assert ops["SLOAD"][-1] == 2100 if evm_version in ("shanghai", "cancun"): assert "PUSH0" in ops diff --git a/tests/unit/compiler/test_pre_parser.py b/tests/unit/compiler/test_pre_parser.py index 02076ed07e..128b6b16eb 100644 --- a/tests/unit/compiler/test_pre_parser.py +++ b/tests/unit/compiler/test_pre_parser.py @@ -90,10 +90,10 @@ def test(): def test_evm_version_check(assert_compile_failed): code = """ -#pragma evm-version berlin +#pragma evm-version london """ assert compile_code(code, settings=Settings(evm_version=None)) is not None - assert compile_code(code, settings=Settings(evm_version="berlin")) is not None + assert compile_code(code, settings=Settings(evm_version="london")) is not None # should fail if compile options indicate different evm version # from source pragma with pytest.raises(StructureException): diff --git a/vyper/cli/vyper_json.py b/vyper/cli/vyper_json.py index 21073cabeb..1f914e2dc9 100755 --- a/vyper/cli/vyper_json.py +++ b/vyper/cli/vyper_json.py @@ -144,8 +144,10 @@ def get_evm_version(input_dict: dict) -> Optional[str]: "spuriousDragon", "byzantium", "constantinople", + "istanbul", + "berlin", ): - raise JSONError("Vyper does not support pre-istanbul EVM versions") + raise JSONError("Vyper does not support pre-london EVM versions") if evm_version not in EVM_VERSIONS: raise JSONError(f"Unknown EVM version - '{evm_version}'") diff --git a/vyper/codegen/expr.py b/vyper/codegen/expr.py index 7c39a4f5cf..d7afe6c7f6 100644 --- a/vyper/codegen/expr.py +++ b/vyper/codegen/expr.py @@ -31,7 +31,6 @@ from vyper.exceptions import ( CodegenPanic, CompilerPanic, - EvmVersionException, StructureException, TypeCheckFailure, TypeMismatch, @@ -222,11 +221,7 @@ def parse_Attribute(self): if self.expr.attr == "balance": addr = Expr.parse_value_expr(self.expr.value, self.context) if addr.typ == AddressT(): - if ( - isinstance(self.expr.value, vy_ast.Name) - and self.expr.value.id == "self" - and version_check(begin="istanbul") - ): + if isinstance(self.expr.value, vy_ast.Name) and self.expr.value.id == "self": seq = ["selfbalance"] else: seq = ["balance", addr] @@ -302,10 +297,6 @@ def parse_Attribute(self): elif key == "tx.gasprice": return IRnode.from_list(["gasprice"], typ=UINT256_T) elif key == "chain.id": - if not version_check(begin="istanbul"): - raise EvmVersionException( - "chain.id is unavailable prior to istanbul ruleset", self.expr - ) return IRnode.from_list(["chainid"], typ=UINT256_T) # Other variables diff --git a/vyper/codegen/function_definitions/common.py b/vyper/codegen/function_definitions/common.py index d017ba7b81..a130f41565 100644 --- a/vyper/codegen/function_definitions/common.py +++ b/vyper/codegen/function_definitions/common.py @@ -150,14 +150,14 @@ def get_nonreentrant_lock(func_t): LOAD, STORE = "sload", "sstore" if version_check(begin="cancun"): LOAD, STORE = "tload", "tstore" - - if version_check(begin="berlin"): - # any nonzero values would work here (see pricing as of net gas + # for tload/tstore we don't need to care about net gas metering, + # choose small constants (e.g. 0 can be replaced by PUSH0) + final_value, temp_value = 0, 1 + else: + # any nonzero values can work here (see pricing as of net gas # metering); these values are chosen so that downgrading to the # 0,1 scheme (if it is somehow necessary) is safe. final_value, temp_value = 3, 2 - else: - final_value, temp_value = 0, 1 check_notset = ["assert", ["ne", temp_value, [LOAD, nkey]]] diff --git a/vyper/evm/opcodes.py b/vyper/evm/opcodes.py index 767d634c89..48edf48f19 100644 --- a/vyper/evm/opcodes.py +++ b/vyper/evm/opcodes.py @@ -8,10 +8,10 @@ # 1. Fork rules go from oldest (lowest value) to newest (highest value). # 2. Fork versions aren't actually tied to anything. They are not a part of our # official API. *DO NOT USE THE VALUES FOR ANYTHING IMPORTANT* besides versioning. -# 3. Per VIP-3365, we support mainnet fork choice rules up to 1 year old +# 3. Per VIP-3365, we support mainnet fork choice rules up to 3 years old # (and may optionally have forward support for experimental/unreleased # fork choice rules) -_evm_versions = ("istanbul", "berlin", "london", "paris", "shanghai", "cancun") +_evm_versions = ("london", "paris", "shanghai", "cancun") EVM_VERSIONS: dict[str, int] = dict((v, i) for i, v in enumerate(_evm_versions)) @@ -22,7 +22,7 @@ # opcode as hex value # number of values removed from stack # number of values added to stack -# gas cost (istanbul, berlin, paris, shanghai, cancun) +# gas cost (london, paris, shanghai, cancun) OPCODES: OpcodeMap = { "STOP": (0x00, 0, 0, 0), "ADD": (0x01, 2, 1, 3), @@ -62,11 +62,11 @@ "CODESIZE": (0x38, 0, 1, 2), "CODECOPY": (0x39, 3, 0, 3), "GASPRICE": (0x3A, 0, 1, 2), - "EXTCODESIZE": (0x3B, 1, 1, (700, 2600)), - "EXTCODECOPY": (0x3C, 4, 0, (700, 2600)), + "EXTCODESIZE": (0x3B, 1, 1, 2600), + "EXTCODECOPY": (0x3C, 4, 0, 2600), "RETURNDATASIZE": (0x3D, 0, 1, 2), "RETURNDATACOPY": (0x3E, 3, 0, 3), - "EXTCODEHASH": (0x3F, 1, 1, (700, 2600)), + "EXTCODEHASH": (0x3F, 1, 1, 2600), "BLOCKHASH": (0x40, 1, 1, 20), "COINBASE": (0x41, 0, 1, 2), "TIMESTAMP": (0x42, 0, 1, 2), @@ -76,12 +76,12 @@ "GASLIMIT": (0x45, 0, 1, 2), "CHAINID": (0x46, 0, 1, 2), "SELFBALANCE": (0x47, 0, 1, 5), - "BASEFEE": (0x48, 0, 1, (None, 2)), + "BASEFEE": (0x48, 0, 1, 2), "POP": (0x50, 1, 0, 2), "MLOAD": (0x51, 1, 1, 3), "MSTORE": (0x52, 2, 0, 3), "MSTORE8": (0x53, 2, 0, 3), - "SLOAD": (0x54, 1, 1, (800, 2100)), + "SLOAD": (0x54, 1, 1, 2100), "SSTORE": (0x55, 2, 0, 20000), "JUMP": (0x56, 1, 0, 8), "JUMPI": (0x57, 2, 0, 10), @@ -89,7 +89,7 @@ "MSIZE": (0x59, 0, 1, 2), "GAS": (0x5A, 0, 1, 2), "JUMPDEST": (0x5B, 0, 0, 1), - "MCOPY": (0x5E, 3, 0, (None, None, None, None, None, 3)), + "MCOPY": (0x5E, 3, 0, (None, None, None, 3)), "PUSH0": (0x5F, 0, 1, 2), "PUSH1": (0x60, 0, 1, 3), "PUSH2": (0x61, 0, 1, 3), @@ -161,19 +161,19 @@ "LOG3": (0xA3, 5, 0, 1500), "LOG4": (0xA4, 6, 0, 1875), "CREATE": (0xF0, 3, 1, 32000), - "CALL": (0xF1, 7, 1, (700, 2100)), - "CALLCODE": (0xF2, 7, 1, (700, 2100)), + "CALL": (0xF1, 7, 1, 2100), + "CALLCODE": (0xF2, 7, 1, 2100), "RETURN": (0xF3, 2, 0, 0), - "DELEGATECALL": (0xF4, 6, 1, (700, 2100)), + "DELEGATECALL": (0xF4, 6, 1, 2100), "CREATE2": (0xF5, 4, 1, 32000), "SELFDESTRUCT": (0xFF, 1, 0, 25000), - "STATICCALL": (0xFA, 6, 1, (700, 2100)), + "STATICCALL": (0xFA, 6, 1, 2100), "REVERT": (0xFD, 2, 0, 0), "INVALID": (0xFE, 0, 0, 0), "DEBUG": (0xA5, 1, 0, 0), "BREAKPOINT": (0xA6, 0, 0, 0), - "TLOAD": (0x5C, 1, 1, (None, None, None, None, None, 100)), - "TSTORE": (0x5D, 2, 0, (None, None, None, None, None, 100)), + "TLOAD": (0x5C, 1, 1, (None, None, None, 100)), + "TSTORE": (0x5D, 2, 0, (None, None, None, 100)), } PSEUDO_OPCODES: OpcodeMap = {