diff --git a/.travis.yml b/.travis.yml index f47d82c..62d970f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,22 +1,26 @@ language: python sudo: true dist: xenial +env: +- BBLFSHD_VERSION=v2.9.1 BBLFSH_PYTHON_VERSION=v2.3.0 services: - docker cache: directories: - $HOME/.cache/pip python: - - "3.5" - "3.6" - "3.7" install: + - docker run --privileged -d -p 9432:9432 --name bblfshd bblfsh/bblfshd:$BBLFSHD_VERSION + - docker exec bblfshd bblfshctl driver install bblfsh/python-driver:$BBLFSH_PYTHON_VERSION - wget https://github.com/bblfsh/client-python/releases/download/v2.2.1/protobuf-python_3.4.1-1_amd64.deb - sudo dpkg -i protobuf-python_3.4.1-1_amd64.deb - pip3 install --upgrade pip - pip3 install -r requirements.txt - python3 setup.py --getdeps --log - pip3 install . --upgrade + - cd bblfsh && python3 -m unittest discover - if [[ -z "$TRAVIS_TAG" ]]; then exit 0; fi - if [[ $TRAVIS_PYTHON_VERSION != '3.6' ]]; then exit 0; fi # disable double uploads to pypi - echo "[distutils]" > .pypirc @@ -28,6 +32,5 @@ install: - HOME=. python setup.py sdist upload script: - python3 setup.py build_ext -i - - python3 -m unittest discover . notifications: email: false diff --git a/MANIFEST.in b/MANIFEST.in index e8ec892..9747329 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -7,5 +7,5 @@ include Makefile include github.com/gogo/protobuf/gogoproto/gogo.proto include gopkg.in/bblfsh/sdk.v1/protocol/generated.proto include gopkg.in/bblfsh/sdk.v1/uast/generated.proto -include bblfsh/memtracker.h +include bblfsh/libuast/libuast.hpp prune bblfsh/libuast diff --git a/README.md b/README.md index 8d9513d..af29b5b 100644 --- a/README.md +++ b/README.md @@ -18,16 +18,18 @@ pip install bblfsh ```bash git clone https://github.com/bblfsh/client-python.git cd client-python +pip install -r requirements.txt +python setup.py --getdeps python setup.py install +# or: pip install . ``` ### Dependencies -You need to install `libxml2` and its header files. You also will need a `curl` cli tool to dowload `libuast`, and a `g++` for building [libtuast Python bindings](https://github.com/bblfsh/client-python/blob/0037d762563ab49b3daac8a7577f7103a5628fc6/setup.py#L17). +You also will need a `curl` cli tool to dowload `libuast`, and a `g++` for building [libtuast Python bindings](https://github.com/bblfsh/client-python/blob/0037d762563ab49b3daac8a7577f7103a5628fc6/setup.py#L17). The command for Debian and derived distributions would be: ```bash -sudo apt install libxml2-dev sudo apt install curl sudo apt install build-essential ``` @@ -49,21 +51,49 @@ Please, read the [getting started](https://doc.bblf.sh/using-babelfish/getting-s import bblfsh client = bblfsh.BblfshClient("0.0.0.0:9432") -uast = client.parse("/path/to/file.py").uast -print(uast) -# "filter' allows you to use XPath queries to filter on result nodes: -print(bblfsh.filter(uast, "//Import[@roleImport and @roleDeclaration]//alias")) - -# filter\_[bool|string|number] must be used when using XPath functions returning -# these types: -print(bblfsh.filter_bool(uast, "boolean(//*[@strtOffset or @endOffset])")) -print(bblfsh.filter_string(uast, "name(//*[1])")) -print(bblfsh.filter_number(uast, "count(//*)")) +ctx = client.parse("/path/to/file.py") +print(ctx) +# or to get the results in a dictionary: +resdict = ctx.get_all() -# You can also iterate on several tree iteration orders: -it = bblfsh.iterator(uast, bblfsh.TreeOrder.PRE_ORDER) +# "filter' allows you to use XPath queries to filter on result nodes: +it = ctx.filter("//python:Call") for node in it: - print(node.internal_type) + print(node) + # or: + doSomething(node.get()) + +# filter must be used when using XPath functions returning these types: +# XPath queries can return different types (dicts, int, float, bool or str), +# calling get() with an item will return the right type, but if you must ensure +# that you are getting the expected type (to avoid errors in the queries) there +# are alterative typed versions: +x = next(ctx.filter("boolean(//*[@strtOffset or @endOffset])").get_bool() +y = next(ctx.filter("name(//*[1])")).get_str() +z = next(ctx.filter("count(//*)").get_int() # or get_float() + +# You can also iterate using iteration orders different than the +# default preorder using the `iterate` method on `parse` result or node objects: + +# Directly over parse results +it = client.parse("/path/to/file.py").iterate(bblfsh.TreeOrder.POST_ORDER) +for i in it: ... + +# Over filter results (which by default are already iterators with PRE_ORDER): +ctx = client.parse("file.py") +newiter = ctx.filter("//python:Call").iterate(bblfsh.TreeOrder.LEVEL_ORDER) +for i in newiter: ... + +# Over individual node objects to change the iteration order of +# a specific subtree: +ctx = client.parse("file.py") +first_node = next(ctx) +newiter = first_node.iterate(bblfsh.TreeOrder.POSITION_ORDER) +for i in newiter: ... + +# You can also get the non semantic UAST or native AST: +ctx = client.parse("file.py", mode=bblfsh.ModeDict["NATIVE"]) +# Possible values for ModeDict: DEFAULT_MODE, NATIVE, PREPROCESSED, ANNOTATED, SEMANTIC ``` Please read the [Babelfish clients](https://doc.bblf.sh/using-babelfish/clients.html) diff --git a/bblfsh/__init__.py b/bblfsh/__init__.py index f18f524..c7e4274 100644 --- a/bblfsh/__init__.py +++ b/bblfsh/__init__.py @@ -1,33 +1,26 @@ from bblfsh.client import BblfshClient -from bblfsh.pyuast import filter, filter_bool, filter_number, filter_string, iterator +from bblfsh.pyuast import decode, iterator, uast +from bblfsh.tree_order import TreeOrder from bblfsh.aliases import * -class TreeOrder: - PRE_ORDER = 0 - POST_ORDER = 1 - LEVEL_ORDER = 2 - POSITION_ORDER = 3 - -# "in" is a reserved keyword in Python thus can't be used as package name, so -# we import by string class RoleSearchException(Exception): pass -def role_id(role_name: str) -> int: +def role_id(rname: str) -> int: try: - name = DESCRIPTOR.enum_types_by_name["Role"].values_by_name[role_name].number + name = DESCRIPTOR.enum_types_by_name["Role"].values_by_name[rname].number except KeyError: - raise RoleSearchException("Role with name '{}' not found".format(role_name)) + raise RoleSearchException("Role with name '{}' not found".format(rname)) return name -def role_name(role_id: int) -> str: +def role_name(rid: int) -> str: try: - id_ = DESCRIPTOR.enum_types_by_name["Role"].values_by_number[role_id].name + id_ = DESCRIPTOR.enum_types_by_name["Role"].values_by_number[rid].name except KeyError: - raise RoleSearchException("Role with ID '{}' not found".format(role_id)) + raise RoleSearchException("Role with ID '{}' not found".format(rid)) return id_ diff --git a/bblfsh/__main__.py b/bblfsh/__main__.py index d8951dd..7467b16 100644 --- a/bblfsh/__main__.py +++ b/bblfsh/__main__.py @@ -1,69 +1,62 @@ import argparse +import pprint import sys -import bblfsh -from bblfsh.pyuast import filter - from bblfsh.client import BblfshClient from bblfsh.launcher import ensure_bblfsh_is_running -def setup(): +def setup() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Query for a UAST to Babelfish and dump it to stdout." ) parser.add_argument("-e", "--endpoint", default="0.0.0.0:9432", - help="bblfsh gRPC endpoint.") + help="bblfsh gRPC endpoint.", type=str) parser.add_argument("-f", "--file", required=True, - help="File to parse.") + help="File to parse.", type=str) parser.add_argument("-l", "--language", default=None, - help="File's language. The default is to autodetect.") + help="File's language. The default is to autodetect.", type=str) parser.add_argument("--disable-bblfsh-autorun", action="store_true", help="Do not automatically launch Babelfish server " "if it is not running.") - parser.add_argument("-q", "--query", default="", help="xpath query") - parser.add_argument("-m", "--mapn", default="", help="transform function of the results (n)") - parser.add_argument("-a", "--array", help='print results as an array', action='store_true') + parser.add_argument("-q", "--query", default="", help="xpath query", type=str) + parser.add_argument("-a", "--array", help='print results as a parseable Python array', action='store_true') - args = parser.parse_args() - return args + return parser.parse_args() -def run_query(root: bblfsh.Node, query: str, mapn: str, as_array: bool) -> None: - result = list(filter(root, query)) - if not result: +def run_query(uast, query: str, array: bool) -> None: + result_iter = uast.filter(query) + if not result_iter: print("Nothing found") - else: - if mapn: - result = [eval(mapn) for n in result] + result_list = [x.load() for x in result_iter] - if as_array: - print("results[{}] = {}".format(len(result), result)) - else: - print("Running xpath query: {}".format(query)) - print("FOUND {} roots".format(len(result))) + if array: + pprint.pprint(result_list) + else: + print("%d Results:" % len(result_list)) + for i, node in enumerate(result_list): + print("== {} ==================================".format(i+1)) + print(node) - for i, node in enumerate(result): - print("== {} ==================================".format(i+1)) - print(node) -def main(): +def main() -> int: args = setup() if not args.disable_bblfsh_autorun: ensure_bblfsh_is_running() client = BblfshClient(args.endpoint) - response = client.parse(args.file, args.language) - root = response.uast - if len(response.errors): - sys.stderr.write("\n".join(response.errors) + "\n") - query = args.query - if query: - run_query(root, query, args.mapn, args.array) + ctx = client.parse(args.file, args.language) + + if args.query: + run_query(ctx, args.query, array=args.array) else: - print(root) + pprint.pprint(ctx.load()) + + return 0 + if __name__ == "__main__": sys.exit(main()) diff --git a/bblfsh/aliases.py b/bblfsh/aliases.py index 82c2057..c516b00 100644 --- a/bblfsh/aliases.py +++ b/bblfsh/aliases.py @@ -1,27 +1,39 @@ -__all__ = ["DESCRIPTOR", "Node", "Position", "ParseResponse", "NativeParseResponse", - "ParseRequest", "NativeParseRequest", "VersionRequest", "ProtocolServiceStub"] - import importlib - -from bblfsh.sdkversion import VERSION +import google # "in" is a reserved keyword in Python thus can't be used as package name, so # we import by string -uast_module = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.%s.uast.generated_pb2" % VERSION) -protocol_module = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION) -protocol_grpc_module = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2_grpc" % VERSION) +uast_v2_module = importlib.import_module( + "bblfsh.gopkg.in.bblfsh.sdk.v2.uast.generated_pb2") +protocol_v2_module = importlib.import_module( + "bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2") +protocol_grpc_v2_module = importlib.import_module( + "bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2_grpc") +protocol_v1_module = importlib.import_module( + "bblfsh.gopkg.in.bblfsh.sdk.v1.protocol.generated_pb2") +protocol_grpc_v1_module = importlib.import_module( + "bblfsh.gopkg.in.bblfsh.sdk.v1.protocol.generated_pb2_grpc") + +DESCRIPTOR = uast_v2_module.DESCRIPTOR +ParseRequest = protocol_v2_module.ParseRequest +ParseResponse = protocol_v2_module.ParseResponse +ParseError = protocol_v2_module.ParseError +Mode = protocol_v2_module.Mode +ModeType = google.protobuf.internal.enum_type_wrapper.EnumTypeWrapper + + +class Modes: + pass + +# Current values: {'DEFAULT_MODE': 0, 'NATIVE': 1, 'PREPROCESSED': 2, 'ANNOTATED': 4, 'SEMANTIC': 8} +for k, v in Mode.DESCRIPTOR.values_by_name.items(): + setattr(Modes, k, v.number) + +DriverStub = protocol_grpc_v2_module.DriverStub +DriverServicer = protocol_grpc_v2_module.DriverServicer -DESCRIPTOR = uast_module.DESCRIPTOR -Node = uast_module.Node -Position = uast_module.Position -ParseResponse = protocol_module.ParseResponse -NativeParseResponse = protocol_module.NativeParseResponse -ParseRequest = protocol_module.ParseRequest -NativeParseRequest = protocol_module.NativeParseRequest -VersionRequest = protocol_module.VersionRequest -SupportedLanguagesRequest = protocol_module.SupportedLanguagesRequest -SupportedLanguagesResponse = protocol_module.SupportedLanguagesResponse -ProtocolServiceStub = protocol_grpc_module.ProtocolServiceStub +VersionRequest = protocol_v1_module.VersionRequest +VersionResponse = protocol_v1_module.VersionResponse +SupportedLanguagesRequest = protocol_v1_module.SupportedLanguagesRequest +SupportedLanguagesResponse = protocol_v1_module.SupportedLanguagesResponse +ProtocolServiceStub = protocol_grpc_v1_module.ProtocolServiceStub diff --git a/bblfsh/client.py b/bblfsh/client.py index aae2922..fc975c3 100644 --- a/bblfsh/client.py +++ b/bblfsh/client.py @@ -1,54 +1,61 @@ import os -import sys +from typing import Optional, Union, List import grpc -from bblfsh.aliases import (ParseRequest, ParseResponse, NativeParseRequest, NativeParseResponse, - VersionRequest, ProtocolServiceStub, SupportedLanguagesRequest, SupportedLanguagesResponse) -from bblfsh.sdkversion import VERSION - -# The following two insertions fix the broken pb import paths -sys.path.insert(0, os.path.join(os.path.dirname(__file__), - "gopkg/in/bblfsh/sdk/%s/protocol" % VERSION)) -sys.path.insert(0, os.path.dirname(__file__)) +from bblfsh.aliases import (ParseRequest, DriverStub, ProtocolServiceStub, + VersionRequest, SupportedLanguagesRequest, ModeType, + VersionResponse) +from bblfsh.result_context import ResultContext class NonUTF8ContentException(Exception): pass -class BblfshClient(object): +class BblfshClient: """ - Babelfish gRPC client. Currently it is only capable of fetching UASTs. + Babelfish gRPC client. """ - def __init__(self, endpoint: str): + def __init__(self, endpoint: Union[str, grpc.Channel]) -> None: """ Initializes a new instance of BblfshClient. :param endpoint: The address of the Babelfish server, \ for example "0.0.0.0:9432" + :type endpoint: str """ - self._channel = grpc.insecure_channel(endpoint) - self._stub = ProtocolServiceStub(self._channel) + + if isinstance(endpoint, str): + self._channel = grpc.insecure_channel(endpoint) + else: + self._channel = grpc.endpoint + + self._stub_v1 = ProtocolServiceStub(self._channel) + self._stub_v2 = DriverStub(self._channel) @staticmethod - def _check_utf8(text: str) -> None: + def _ensure_utf8(text: bytes) -> str: try: - text.decode("utf-8") + return text.decode("utf-8") except UnicodeDecodeError: raise NonUTF8ContentException("Content must be UTF-8, ASCII or Base64 encoded") @staticmethod - def _get_contents(contents: str, filename: str) -> str: + def _get_contents(contents: Optional[Union[str, bytes]], filename: str) -> str: if contents is None: with open(filename, "rb") as fin: contents = fin.read() - BblfshClient._check_utf8(contents) + + if isinstance(contents, bytes): + contents = BblfshClient._ensure_utf8(contents) + return contents - def parse(self, filename: str, language: str=None, contents: str=None, - timeout: float=None) -> ParseResponse: + def parse(self, filename: str, language: Optional[str]=None, + contents: Optional[str]=None, mode: Optional[ModeType]=None, + timeout: Optional[int]=None) -> ResultContext: """ Queries the Babelfish server and receives the UAST response for the specified file. @@ -60,54 +67,39 @@ def parse(self, filename: str, language: str=None, contents: str=None, currently supported languages. None means autodetect. :param contents: The contents of the file. IF None, it is read from \ filename. + :param mode: UAST transformation mode. + :param raw: Return raw binary UAST without decoding it. :param timeout: The request timeout in seconds. + :type filename: str + :type language: str + :type contents: str + :type timeout: float :return: UAST object. """ + # TODO: handle syntax errors contents = self._get_contents(contents, filename) request = ParseRequest(filename=os.path.basename(filename), - content=contents, + content=contents, mode=mode, language=self._scramble_language(language)) - return self._stub.Parse(request, timeout=timeout) - - def native_parse(self, filename: str, language: str=None, contents: str=None, - timeout: float=None) -> NativeParseResponse: - """ - Queries the Babelfish server and receives the native AST response for the specified - file. + response = self._stub_v2.Parse(request, timeout=timeout) + return ResultContext(response) - :param filename: The path to the file. Can be arbitrary if contents \ - is not None. - :param language: The programming language of the file. Refer to \ - https://doc.bblf.sh/languages.html for the list of \ - currently supported languages. None means autodetect. - :param contents: The contents of the file. IF None, it is read from \ - filename. - :param timeout: The request timeout in seconds. - :return: Native AST object. - """ - - contents = self._get_contents(contents, filename) - request = NativeParseRequest(filename=os.path.basename(filename), - content=contents, - language=self._scramble_language(language)) - return self._stub.NativeParse(request, timeout=timeout) - - def supported_languages(self): - sup_response = self._stub.SupportedLanguages(SupportedLanguagesRequest()) + def supported_languages(self) -> List[str]: + sup_response = self._stub_v1.SupportedLanguages(SupportedLanguagesRequest()) return sup_response.languages - def version(self): + def version(self) -> VersionResponse: """ Queries the Babelfish server for version and runtime information. :return: A dictionary with the keys "version" for the semantic version and "build" for the build timestamp. """ - return self._stub.Version(VersionRequest()) + return self._stub_v1.Version(VersionRequest()) @staticmethod - def _scramble_language(lang: str) -> str: + def _scramble_language(lang: Optional[str]) -> Optional[str]: if lang is None: return None lang = lang.lower() @@ -115,3 +107,11 @@ def _scramble_language(lang: str) -> str: lang = lang.replace("+", "p") lang = lang.replace("#", "sharp") return lang + + def close(self) -> None: + """ + Close the gRPC channel and free the acquired resources. Using a closed client is + not supported. + """ + self._channel.close() + self._channel = self._stub_v1 = self._stub_v2 = None diff --git a/bblfsh/fixtures/test.py b/bblfsh/fixtures/test.py new file mode 100644 index 0000000..943c193 --- /dev/null +++ b/bblfsh/fixtures/test.py @@ -0,0 +1,322 @@ +import os +import resource +import unittest + +import docker + +from bblfsh import (BblfshClient, iterator, role_id, + role_name, ParseResponse, TreeOrder) +from bblfsh.launcher import ensure_bblfsh_is_running +from bblfsh.client import NonUTF8ContentException +from bblfsh.result_context import Node, NodeIterator + + +class BblfshTests(unittest.TestCase): + BBLFSH_SERVER_EXISTED = None + + @classmethod + def setUpClass(cls): + cls.BBLFSH_SERVER_EXISTED = ensure_bblfsh_is_running() + + @classmethod + def tearDownClass(cls): + if not cls.BBLFSH_SERVER_EXISTED: + client = docker.from_env(version="auto") + client.containers.get("bblfshd").remove(force=True) + client.api.close() + + def setUp(self): + self.client = BblfshClient("0.0.0.0:9432") + + def testVersion(self): + version = self.client.version() + self.assertTrue(hasattr(version, "version")) + self.assertTrue(version.version) + self.assertTrue(hasattr(version, "build")) + self.assertTrue(version.build) + + # def testNativeParse(self): + # reply = self.client.native_parse(__file__) + # assert(reply.ast) + # + def testNonUTF8ParseError(self): + self.assertRaises(NonUTF8ContentException, + self.client.parse, "", "Python", b"a = '\x80abc'") + # + def testUASTDefaultLanguage(self): + self._validate_ctx(self.client.parse(__file__)) + + def testUASTPython(self): + ctx = self.client.parse(__file__, language="Python") + self._validate_ctx(ctx) + self.assertEqual(ctx.language, "python") + + def testUASTFileContents(self): + with open(__file__, "rb") as fin: + contents = fin.read() + ctx = self.client.parse("file.py", contents=contents) + self._validate_ctx(ctx) + self._validate_filter(ctx) + # + # def testBrokenFilter(self): + # self.assertRaises(RuntimeError, filter, 0, "foo") + # + # def testFilterInternalType(self): + # node = Node() + # node.internal_type = 'a' + # self.assertTrue(any(filter(node, "//a"))) + # self.assertFalse(any(filter(node, "//b"))) + # + # def testFilterToken(self): + # node = Node() + # node.token = 'a' + # self.assertTrue(any(filter(node, "//*[@token='a']"))) + # self.assertFalse(any(filter(node, "//*[@token='b']"))) + # + # def testFilterRoles(self): + # node = Node() + # node.roles.append(1) + # self.assertTrue(any(filter(node, "//*[@roleIdentifier]"))) + # self.assertFalse(any(filter(node, "//*[@roleQualified]"))) + # + # def testFilterProperties(self): + # node = Node() + # node.properties['k1'] = 'v2' + # node.properties['k2'] = 'v1' + # self.assertTrue(any(filter(node, "//*[@k2='v1']"))) + # self.assertTrue(any(filter(node, "//*[@k1='v2']"))) + # self.assertFalse(any(filter(node, "//*[@k1='v1']"))) + # + # def testFilterStartOffset(self): + # node = Node() + # node.start_position.offset = 100 + # self.assertTrue(any(filter(node, "//*[@startOffset=100]"))) + # self.assertFalse(any(filter(node, "//*[@startOffset=10]"))) + # + # def testFilterStartLine(self): + # node = Node() + # node.start_position.line = 10 + # self.assertTrue(any(filter(node, "//*[@startLine=10]"))) + # self.assertFalse(any(filter(node, "//*[@startLine=100]"))) + # + # def testFilterStartCol(self): + # node = Node() + # node.start_position.col = 50 + # self.assertTrue(any(filter(node, "//*[@startCol=50]"))) + # self.assertFalse(any(filter(node, "//*[@startCol=5]"))) + # + # def testFilterEndOffset(self): + # node = Node() + # node.end_position.offset = 100 + # self.assertTrue(any(filter(node, "//*[@endOffset=100]"))) + # self.assertFalse(any(filter(node, "//*[@endOffset=10]"))) + # + # def testFilterEndLine(self): + # node = Node() + # node.end_position.line = 10 + # self.assertTrue(any(filter(node, "//*[@endLine=10]"))) + # self.assertFalse(any(filter(node, "//*[@endLine=100]"))) + # + # def testFilterEndCol(self): + # node = Node() + # node.end_position.col = 50 + # self.assertTrue(any(filter(node, "//*[@endCol=50]"))) + # self.assertFalse(any(filter(node, "//*[@endCol=5]"))) + # + # def testFilterBool(self): + # node = Node() + # self.assertTrue(filter_bool(node, "boolean(//*[@startOffset or @endOffset])")) + # self.assertFalse(filter_bool(node, "boolean(//*[@blah])")) + # + # def testFilterNumber(self): + # node = Node() + # node.children.extend([Node(), Node(), Node()]) + # self.assertEqual(int(filter_number(node, "count(//*)")), 4) + # + # def testFilterString(self): + # node = Node() + # node.internal_type = "test" + # self.assertEqual(filter_string(node, "name(//*[1])"), "test") + # + # def testFilterBadQuery(self): + # node = Node() + # self.assertRaises(RuntimeError, filter, node, "//*roleModule") + # + # def testFilterBadType(self): + # node = Node() + # node.end_position.col = 50 + # self.assertRaises(RuntimeError, filter, node, "boolean(//*[@startPosition or @endPosition])") + # + # def testRoleIdName(self): + # self.assertEqual(role_id(role_name(1)), 1) + # self.assertEqual(role_name(role_id("IDENTIFIER")), "IDENTIFIER") + # + # def _itTestTree(self): + # root = Node() + # root.internal_type = 'root' + # root.start_position.offset = 0 + # root.start_position.line = 0 + # root.start_position.col = 1 + # + # son1 = Node() + # son1.internal_type = 'son1' + # son1.start_position.offset = 1 + # + # son1_1 = Node() + # son1_1.internal_type = 'son1_1' + # son1_1.start_position.offset = 10 + # + # son1_2 = Node() + # son1_2.internal_type = 'son1_2' + # son1_2.start_position.offset = 10 + # + # son1.children.extend([son1_1, son1_2]) + # + # son2 = Node() + # son2.internal_type = 'son2' + # son2.start_position.offset = 100 + # + # son2_1 = Node() + # son2_1.internal_type = 'son2_1' + # son2_1.start_position.offset = 5 + # + # son2_2 = Node() + # son2_2.internal_type = 'son2_2' + # son2_2.start_position.offset = 15 + # + # son2.children.extend([son2_1, son2_2]) + # root.children.extend([son1, son2]) + # + # return root + # + # def testIteratorPreOrder(self): + # root = self._itTestTree() + # it = iterator(root, TreeOrder.PRE_ORDER) + # self.assertIsNotNone(it) + # expanded = [node.internal_type for node in it] + # self.assertListEqual(expanded, ['root', 'son1', 'son1_1', 'son1_2', + # 'son2', 'son2_1', 'son2_2']) + # + # def testIteratorPostOrder(self): + # root = self._itTestTree() + # it = iterator(root, TreeOrder.POST_ORDER) + # self.assertIsNotNone(it) + # expanded = [node.internal_type for node in it] + # self.assertListEqual(expanded, ['son1_1', 'son1_2', 'son1', 'son2_1', + # 'son2_2', 'son2', 'root']) + # + # def testIteratorLevelOrder(self): + # root = self._itTestTree() + # it = iterator(root, TreeOrder.LEVEL_ORDER) + # self.assertIsNotNone(it) + # expanded = [node.internal_type for node in it] + # self.assertListEqual(expanded, ['root', 'son1', 'son2', 'son1_1', + # 'son1_2', 'son2_1', 'son2_2']) + # + # def testIteratorPositionOrder(self): + # root = self._itTestTree() + # it = iterator(root, TreeOrder.POSITION_ORDER) + # self.assertIsNotNone(it) + # expanded = [node.internal_type for node in it] + # self.assertListEqual(expanded, ['root', 'son1', 'son2_1', 'son1_1', + # 'son1_2', 'son2_2', 'son2']) + # + def _validate_ctx(self, ctx): + import bblfsh + self.assertIsNotNone(ctx) + self.assertIsInstance(ctx, bblfsh.result_context.ResultContext) + self.assertIsInstance(ctx.uast, bytes) + + # def testFilterInsideIter(self): + # root = self.client.parse(__file__).uast + # it = iterator(root, TreeOrder.PRE_ORDER) + # self.assertIsNotNone(it) + # for n in it: + # filter(n, "//*[@roleIdentifier]") + # + # def testItersMixingIterations(self): + # root = self.client.parse(__file__).uast + # it = iterator(root, TreeOrder.PRE_ORDER) + # next(it); next(it); next(it) + # n = next(it) + # it2 = iterator(n, TreeOrder.PRE_ORDER) + # next(it2) + # assert(next(it) == next(it2)) + # + # def testManyFilters(self): + # root = self.client.parse(__file__).uast + # root.properties['k1'] = 'v2' + # root.properties['k2'] = 'v1' + # + # before = resource.getrusage(resource.RUSAGE_SELF) + # for _ in range(500): + # filter(root, "//*[@roleIdentifier]") + # + # after = resource.getrusage(resource.RUSAGE_SELF) + # + # # Check that memory usage has not doubled after running the filter + # self.assertLess(after[2] / before[2], 2.0) + # + # def testManyParses(self): + # before = resource.getrusage(resource.RUSAGE_SELF) + # for _ in range(100): + # root = self.client.parse(__file__).uast + # root.properties['k1'] = 'v2' + # root.properties['k2'] = 'v1' + # + # after = resource.getrusage(resource.RUSAGE_SELF) + # + # # Check that memory usage has not doubled after running the parse+filter + # self.assertLess(after[2] / before[2], 2.0) + # + # def testManyParsersAndFilters(self): + # before = resource.getrusage(resource.RUSAGE_SELF) + # for _ in range(100): + # root = self.client.parse(__file__).uast + # root.properties['k1'] = 'v2' + # root.properties['k2'] = 'v1' + # + # filter(root, "//*[@roleIdentifier]") + # + # after = resource.getrusage(resource.RUSAGE_SELF) + # + # # Check that memory usage has not doubled after running the parse+filter + # self.assertLess(after[2] / before[2], 2.0) + # + # def testSupportedLanguages(self): + # res = self.client.supported_languages() + # self.assertGreater(len(res), 0) + # for l in res: + # for key in ('language', 'version', 'status', 'features'): + # print(key) + # self.assertTrue(hasattr(l, key)) + # self.assertIsNotNone(getattr(l, key)) + + def _validate_filter(self, ctx): + def assert_strnode(n: Node, expected: str) -> None: + self.assertEqual(n.get(), expected) + self.assertIsInstance(n.get_str(), str) + self.assertEqual(n.get_str(), expected) + + # print(ctx) + it = ctx.filter("//uast:RuntimeImport/Path/uast:Alias/Name/uast:Identifier/Name") + self.assertIsInstance(it, NodeIterator) + # wtf = next(it) + # print(type(wtf)) + # print(wtf) + + assert_strnode(next(it), "os") + assert_strnode(next(it), "resource") + assert_strnode(next(it), "unittest") + assert_strnode(next(it), "docker") + assert_strnode(next(it), "bblfsh") + self.assertRaises(StopIteration, next(it)) + + # self.assertEqual(next(results).token, "0") + # self.assertEqual(next(results).token, "1") + # self.assertEqual(next(results).token, "100") + # self.assertEqual(next(results).token, "10") + + +if __name__ == "__main__": + unittest.main() diff --git a/bblfsh/launcher.py b/bblfsh/launcher.py index 9dec012..c4d1f3b 100644 --- a/bblfsh/launcher.py +++ b/bblfsh/launcher.py @@ -5,29 +5,26 @@ import docker +def after_container_start(cont: docker.models.resource.Model, log: logging.Logger) -> None: + log.warning( + "Launched the Babelfish server (name bblfshd, id %s).\nStop it " + "with: docker rm -f bblfshd", cont.id) + + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: + result = -1 + while result != 0: + time.sleep(0.1) + result = sock.connect_ex(("0.0.0.0", 9432)) + + log.warning("Babelfish server is up and running.") + log.info("Installing Python driver") + cont.exec_run("bblfshctl driver install python bblfsh/python-driver:latest") + + def ensure_bblfsh_is_running() -> bool: log = logging.getLogger("bblfsh") try: client = docker.from_env(version="auto") - except docker.errors.DockerException as e: - log.warning("Failed to connect to the Docker daemon and ensure " - "that the Babelfish server is running. %s", e) - return False - - def after_start(container): - log.warning( - "Launched the Babelfish server (name bblfshd, id %s).\nStop it " - "with: docker rm -f bblfshd", container.id) - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: - result = -1 - while result != 0: - time.sleep(0.1) - result = sock.connect_ex(("0.0.0.0", 9432)) - log.warning("Babelfish server is up and running.") - log.info("Installing Python driver") - container.exec_run("bblfshctl driver install python bblfsh/python-driver:latest") - - try: container = client.containers.get("bblfshd") if container.status != "running": try: @@ -36,20 +33,28 @@ def after_start(container): log.warning("Failed to start the existing bblfshd container: " "%s: %s", type(e).__name__, e) else: - after_start(container) + after_container_start(container, log) return False - return True + except docker.errors.DockerException as e: + log.warning("Failed to connect to the Docker daemon and ensure " + "that the Babelfish server is running. %s", e) + return False + except AttributeError: log.error("You hit https://github.com/docker/docker-py/issues/1353\n" "Uninstall docker-py and docker and install *only* docker.\n" "Failed to ensure that the Babelfish server is running.") return False + except docker.errors.NotFound: container = client.containers.run( - "bblfsh/bblfshd", name="bblfshd", detach=True, privileged=True, + "bblfsh/bblfshd", name="bblfshd", detach=True, privileged=True, ports={9432: 9432} ) - after_start(container) + after_container_start(container, log) return False + finally: client.api.close() + + return True diff --git a/bblfsh/memtracker.cc b/bblfsh/memtracker.cc deleted file mode 100644 index d7322d4..0000000 --- a/bblfsh/memtracker.cc +++ /dev/null @@ -1,35 +0,0 @@ -#include "memtracker.h" - -UastIterator* MemTracker::CurrentIterator() { return currentIter_; } -void MemTracker::ClearCurrentIterator() { currentIter_ = nullptr; } -void MemTracker::EnterFilter() { inFilter_ = true; } -void MemTracker::ExitFilter() { inFilter_ = false; } -bool MemTracker::CurrentIteratorSet() { return currentIter_ != nullptr; } -void MemTracker::SetCurrentIterator(UastIterator *iter) { currentIter_ = iter; } - -void MemTracker::TrackItem(PyObject *o) -{ - if (inFilter_) { - filterItemAllocs_.push_back(o); - } else { - iterItemAllocs_[currentIter_].push_back(o); - } -} - -void MemTracker::DisposeMem() -{ - if (inFilter_) { - for (auto &i : filterItemAllocs_) { - Py_CLEAR(i); - } - filterItemAllocs_.clear(); - filterItemAllocs_.shrink_to_fit(); - } else { - for (auto &i : iterItemAllocs_[currentIter_]) { - Py_CLEAR(i); - } - iterItemAllocs_[currentIter_].clear(); - iterItemAllocs_.erase(currentIter_); - ClearCurrentIterator(); - } -} diff --git a/bblfsh/memtracker.h b/bblfsh/memtracker.h deleted file mode 100644 index a8994e1..0000000 --- a/bblfsh/memtracker.h +++ /dev/null @@ -1,25 +0,0 @@ -#include -#include - -#include "uast.h" - -#include - -class MemTracker { -private: - UastIterator *currentIter_ = nullptr; - bool inFilter_ = false; - - std::unordered_map> iterItemAllocs_; - std::vector filterItemAllocs_; - -public: - UastIterator *CurrentIterator(); - void SetCurrentIterator(UastIterator *iter); - bool CurrentIteratorSet(); - void ClearCurrentIterator(); - void EnterFilter(); - void ExitFilter(); - void TrackItem(PyObject *ref); - void DisposeMem(); -}; diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index 83072c1..ee0da1a 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -1,272 +1,739 @@ #include #include #include +#include #include +#include -#include "uast.h" -#include "memtracker.h" - - +#include "libuast.hpp" // Used to store references to the Pyobjects instanced in String() and // ItemAt() methods. Those can't be DECREF'ed to 0 because libuast uses them // so we pass ownership to these lists and free them at the end of filter() -MemTracker memTracker; -// WARNING: calls to Attribute MUST Py_DECREF the returned value once -// used (or add it to the memtracker) -static PyObject *Attribute(const void *node, const char *prop) { - PyObject *n = (PyObject *)node; - return PyObject_GetAttrString(n, prop); -} +PyObject* asPyBuffer(uast::Buffer buf) { + PyObject* arr = PyByteArray_FromStringAndSize((const char*)(buf.ptr), buf.size); + free(buf.ptr); + return arr; -// WARNING: calls to AttributeValue MUST Py_DECREF the returned value once -// used (or add it to the memtracker) -static PyObject *AttributeValue(const void *node, const char *prop) { - PyObject *a = Attribute(node, prop); - return a && a != Py_None ? a : NULL; + // TODO: this is an alternative way of exposing the data; check which one is faster + //return PyMemoryView_FromMemory((char*)(buf.ptr), buf.size, PyBUF_READ); } -static bool HasAttribute(const void *node, const char *prop) { - PyObject *o = AttributeValue(node, prop); - if (o == NULL) { - return false; - } +bool isContext(PyObject* obj); - Py_DECREF(o); - return true; +bool assertNotContext(PyObject* obj) { + if (isContext(obj)) { + PyErr_SetString(PyExc_RuntimeError, "cannot use uast context as a node"); + return false; + } + return true; } -static const char *String(const void *node, const char *prop) { - const char *retval = NULL; - PyObject *o = Attribute(node, prop); - if (o != NULL) { - retval = PyUnicode_AsUTF8(o); - memTracker.TrackItem(o); - } - return retval; -} +// ========================================== +// External UAST Node (managed by libuast) +// ========================================== -static size_t Size(const void *node, const char *prop) { - size_t retval = 0; - PyObject *o = Attribute(node, prop); - if (o != NULL) { - retval = PySequence_Size(o); - Py_DECREF(o); - } +class ContextExt; - return retval; -} +typedef struct { + PyObject_HEAD + ContextExt *ctx; + NodeHandle handle; +} PyNodeExt; -static PyObject *ItemAt(PyObject *object, int index) { - PyObject *retval = NULL; - PyObject *seq = PySequence_Fast(object, "expected a sequence"); - if (seq != NULL) { - retval = PyList_GET_ITEM(seq, index); - memTracker.TrackItem(seq); - } +static PyObject *PyNodeExt_load(PyNodeExt *self, PyObject *Py_UNUSED(ignored)); - return retval; -} +static PyMethodDef PyNodeExt_methods[] = { + {"load", (PyCFunction) PyNodeExt_load, METH_NOARGS, + "Load external node to Python" + }, + {nullptr} // Sentinel +}; -static const char *InternalType(const void *node) { - return String(node, "internal_type"); +extern "C" +{ + static PyTypeObject PyNodeExtType = { + PyVarObject_HEAD_INIT(nullptr, 0) + "pyuast.NodeExt", // tp_name + sizeof(PyNodeExt), // tp_basicsize + 0, // tp_itemsize + 0, // tp_dealloc + 0, // tp_print + 0, // tp_getattr + 0, // tp_setattr + 0, // tp_reserved + 0, // tp_repr + 0, // tp_as_number + 0, // tp_as_sequence + 0, // tp_as_mapping + 0, // tp_hash + 0, // tp_call + 0, // tp_str + 0, // tp_getattro + 0, // tp_setattro + 0, // tp_as_buffer + Py_TPFLAGS_DEFAULT, // tp_flags + "External UAST node", // tp_doc + 0, // tp_traverse + 0, // tp_clear + 0, // tp_richcompare + 0, // tp_weaklistoffset + 0, // tp_iter: __iter()__ method + 0, // tp_iternext: next() method + PyNodeExt_methods, // tp_methods + 0, // tp_members + 0, // tp_getset + 0, // tp_base + 0, // tp_dict + 0, // tp_descr_get + 0, // tp_descr_set + 0, // tp_dictoffset + 0, // tp_init + PyType_GenericAlloc, // tp_alloc + 0, // tp_new + }; } -static const char *Token(const void *node) { - return String(node, "token"); -} +// ========================================== +// External UAST iterator +// ========================================== -static size_t ChildrenSize(const void *node) { - return Size(node, "children"); -} +typedef struct { + PyObject_HEAD + ContextExt *ctx; + uast::Iterator *iter; + bool freeCtx; +} PyUastIterExt; -static void *ChildAt(const void *node, int index) { - PyObject *children = AttributeValue(node, "children"); - void *retval = nullptr; - if (children) { - retval = ItemAt(children, index); - Py_DECREF(children); - } +static void PyUastIterExt_dealloc(PyObject *self); - return retval; +static PyObject *PyUastIterExt_iter(PyObject *self) { + Py_INCREF(self); + return self; } -static size_t RolesSize(const void *node) { - return Size(node, "roles"); -} +static PyObject *PyUastIterExt_toPy(ContextExt *ctx, NodeHandle node); -static uint16_t RoleAt(const void *node, int index) { - uint16_t retval = 0; - PyObject *roles = AttributeValue(node, "roles"); - if (roles) { - retval = (uint16_t)PyLong_AsUnsignedLong(ItemAt(roles, index)); - Py_DECREF(roles); - } - return retval; -} +static PyObject *PyUastIterExt_next(PyObject *self) { + auto it = (PyUastIterExt *)self; -static size_t PropertiesSize(const void *node) { - size_t retval = 0; - PyObject *properties = AttributeValue(node, "properties"); - if (properties) { - retval = PyMapping_Size(properties); - Py_DECREF(properties); + try { + if (!it->iter->next()) { + PyErr_SetNone(PyExc_StopIteration); + return nullptr; + } + } catch (const std::exception& e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + return nullptr; } - return retval; -} -static const char *PropertyKeyAt(const void *node, int index) { - PyObject *properties = AttributeValue(node, "properties"); - if (!properties || !PyMapping_Check(properties)) { - return NULL; - } + NodeHandle node = it->iter->node(); + if (node == 0) Py_RETURN_NONE; - const char *retval = NULL; - PyObject *keys = PyMapping_Keys(properties); - Py_DECREF(properties); - if (keys != NULL) { - retval = PyUnicode_AsUTF8(ItemAt(keys, index)); - Py_DECREF(keys); - } - return retval; + return PyUastIterExt_toPy(it->ctx, node); } -static const char *PropertyValueAt(const void *node, int index) { - PyObject *properties = AttributeValue(node, "properties"); - if (!properties) - return NULL; +extern "C" +{ + static PyTypeObject PyUastIterExtType = { + PyVarObject_HEAD_INIT(nullptr, 0) + "pyuast.IteratorExt", // tp_name + sizeof(PyUastIterExt), // tp_basicsize + 0, // tp_itemsize + PyUastIterExt_dealloc, // tp_dealloc + 0, // tp_print + 0, // tp_getattr + 0, // tp_setattr + 0, // tp_reserved + 0, // tp_repr + 0, // tp_as_number + 0, // tp_as_sequence + 0, // tp_as_mapping + 0, // tp_hash + 0, // tp_call + 0, // tp_str + 0, // tp_getattro + 0, // tp_setattro + 0, // tp_as_buffer + Py_TPFLAGS_DEFAULT, // tp_flags + "External UastIterator object", // tp_doc + 0, // tp_traverse + 0, // tp_clear + 0, // tp_richcompare + 0, // tp_weaklistoffset + PyUastIterExt_iter, // tp_iter: __iter()__ method + (iternextfunc)PyUastIterExt_next, // tp_iternext: next() method + 0, // tp_methods + 0, // tp_members + 0, // tp_getset + 0, // tp_base + 0, // tp_dict + 0, // tp_descr_get + 0, // tp_descr_set + 0, // tp_dictoffset + 0, // tp_init + PyType_GenericAlloc, // tp_alloc + 0, // tp_new + }; +} - if (!PyMapping_Check(properties)) { - Py_DECREF(properties); - return NULL; - } +// ========================================== +// External UAST Context (managed by libuast) +// ========================================== + +class ContextExt { +private: + uast::Context *ctx; + + // toPy allocates a new PyNodeExt with a specified handle. + // Returns a new reference. + PyObject* toPy(NodeHandle node) { + if (node == 0) Py_RETURN_NONE; + + PyNodeExt *pyObj = PyObject_New(PyNodeExt, &PyNodeExtType); + if (!pyObj) return nullptr; + + pyObj->ctx = this; + pyObj->handle = node; + return (PyObject*)pyObj; + } + + // toHandle casts an object to PyNodeExt and returns its handle. + // Borrows the reference. + NodeHandle toHandle(PyObject* obj) { + if (!obj || obj == Py_None) return 0; + + if (!PyObject_TypeCheck(obj, &PyNodeExtType)) { + const char* err = "unknown node type"; + PyErr_SetString(PyExc_NotImplementedError, err); + ctx->SetError(err); + return 0; + } + + auto node = (PyNodeExt*)obj; + return node->handle; + } + + PyObject* newIter(uast::Iterator *it, bool freeCtx){ + PyUastIterExt *pyIt = PyObject_New(PyUastIterExt, &PyUastIterExtType); + if (!pyIt) return nullptr; + + if (!PyObject_Init((PyObject *)pyIt, &PyUastIterExtType)) { + Py_DECREF(pyIt); + return nullptr; + } + pyIt->iter = it; + pyIt->ctx = this; + pyIt->freeCtx = freeCtx; + return (PyObject*)pyIt; + } +public: + friend class Context; + + ContextExt(uast::Context *c) : ctx(c) { + } + ~ContextExt(){ + delete(ctx); + } + + // lookup searches for a specific node handle. + // Returns a new reference. + PyObject* lookup(NodeHandle node) { + return toPy(node); + } + + // RootNode returns a root UAST node, if set. + // Returns a new reference. + PyObject* RootNode(){ + NodeHandle root = ctx->RootNode(); + return lookup(root); + } + + // Iterate iterates over an external UAST tree. + // Borrows the reference. + PyObject* Iterate(PyObject* node, TreeOrder order){ + if (!assertNotContext(node)) return nullptr; + + NodeHandle h = toHandle(node); + auto iter = ctx->Iterate(h, order); + return newIter(iter, false); + } + + // Filter queries an external UAST. + // Borrows the reference. + PyObject* Filter(PyObject* node, const char* query){ + if (!assertNotContext(node)) return nullptr; + + NodeHandle unode = toHandle(node); + if (unode == 0) unode = ctx->RootNode(); + + uast::Iterator *it = ctx->Filter(unode, query); + + return newIter(it, false); + } + + // Encode serializes the external UAST. + // Borrows the reference. + PyObject* Encode(PyObject *node, UastFormat format) { + if (!assertNotContext(node)) return nullptr; + + uast::Buffer data = ctx->Encode(toHandle(node), format); + return asPyBuffer(data); + } +}; - const char *retval = NULL; - PyObject *values = PyMapping_Values(properties); - if (values != NULL) { - retval = PyUnicode_AsUTF8(ItemAt(values, index)); - Py_DECREF(values); - } - Py_DECREF(properties); - return retval; +// PyUastIterExt_toPy is a function that looks up for nodes visited by iterator. +// Returns a new reference. +static PyObject *PyUastIterExt_toPy(ContextExt *ctx, NodeHandle node) { + return ctx->lookup(node); } -static uint32_t PositionValue(const void* node, const char *prop, const char *field) { - PyObject *position = AttributeValue(node, prop); - if (!position) { - return 0; - } +// PyUastIterExt_dealloc destroys an iterator. +static void PyUastIterExt_dealloc(PyObject *self) { + auto it = (PyUastIterExt *)self; + delete(it->iter); - PyObject *offset = AttributeValue(position, field); - Py_DECREF(position); - uint32_t retval = 0; + if (it->freeCtx && it->ctx) delete(it->ctx); - if (offset) { - retval = (uint32_t)PyLong_AsUnsignedLong(offset); - Py_DECREF(offset); - } - return retval; + it->freeCtx = false; + it->ctx = nullptr; + Py_TYPE(self)->tp_free(self); } -///////////////////////////////////// -/////////// Node Interface ////////// -///////////////////////////////////// - -extern "C" -{ - static bool HasStartOffset(const void *node) { - return HasAttribute(node, "start_position"); - } +typedef struct { + PyObject_HEAD + ContextExt *p; +} PythonContextExt; - static uint32_t StartOffset(const void *node) { - return PositionValue(node, "start_position", "offset"); - } +static void PythonContextExt_dealloc(PyObject *self) { + delete(((PythonContextExt *)self)->p); + Py_TYPE(self)->tp_free(self); +} - static bool HasStartLine(const void *node) { - return HasAttribute(node, "start_position"); - } +// PythonContextExt_root returns a root node associated with this context. +// Returns a new reference. +static PyObject *PythonContextExt_root(PythonContextExt *self, PyObject *Py_UNUSED(ignored)) { + return self->p->RootNode(); +} - static uint32_t StartLine(const void *node) { - return PositionValue(node, "start_position", "line"); - } +// PythonContextExt_load returns a root node converted to Python object. +// Returns a new reference. +static PyObject *PythonContextExt_load(PythonContextExt *self, PyObject *Py_UNUSED(ignored)) { + PyObject* root = PythonContextExt_root(self, nullptr); + return PyNodeExt_load((PyNodeExt*)root, nullptr); +} - static bool HasStartCol(const void *node) { - return HasAttribute(node, "start_position"); - } +// PythonContextExt_filter filters UAST. +// Returns a new reference. +static PyObject *PythonContextExt_filter(PythonContextExt *self, PyObject *args, PyObject *kwargs) { + char* kwds[] = {(char*)"query", (char*)"node", NULL}; + const char *query = nullptr; + PyObject *node = nullptr; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|O", kwds, &query, &node)) + return nullptr; + + PyObject* it = nullptr; + try { + it = self->p->Filter(node, query); + } catch (const std::exception& e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + } + return it; +} - static uint32_t StartCol(const void *node) { - return PositionValue(node, "start_position", "col"); - } +// PythonContextExt_encode serializes UAST. +// Returns a new reference. +static PyObject *PythonContextExt_encode(PythonContextExt *self, PyObject *args) { + PyObject *node = nullptr; + UastFormat format = UAST_BINARY; // TODO: make it a kwarg and enum + if (!PyArg_ParseTuple(args, "Oi", &node, &format)) return nullptr; + return self->p->Encode(node, format); +} - static bool HasEndOffset(const void *node) { - return HasAttribute(node, "end_position"); - } +static PyMethodDef PythonContextExt_methods[] = { + {"root", (PyCFunction) PythonContextExt_root, METH_NOARGS, + "Return the root node attached to this query context" + }, + {"load", (PyCFunction) PythonContextExt_load, METH_NOARGS, + "Load external node to Python" + }, + {"filter", (PyCFunction) PythonContextExt_filter, METH_VARARGS | METH_KEYWORDS, + "Filter a provided UAST with XPath" + }, + {"encode", (PyCFunction) PythonContextExt_encode, METH_VARARGS, + "Encodes a UAST into a buffer" + }, + {nullptr} // Sentinel +}; - static uint32_t EndOffset(const void *node) { - return PositionValue(node, "end_position", "offset"); - } +extern "C" +{ + static PyTypeObject PythonContextExtType = { + PyVarObject_HEAD_INIT(nullptr, 0) + "pyuast.ContextExt", // tp_name + sizeof(PythonContextExt), // tp_basicsize + 0, // tp_itemsize + PythonContextExt_dealloc, // tp_dealloc + 0, // tp_print + 0, // tp_getattr + 0, // tp_setattr + 0, // tp_reserved + 0, // tp_repr + 0, // tp_as_number + 0, // tp_as_sequence + 0, // tp_as_mapping + 0, // tp_hash + 0, // tp_call + 0, // tp_str + 0, // tp_getattro + 0, // tp_setattro + 0, // tp_as_buffer + Py_TPFLAGS_DEFAULT, // tp_flags + "Internal ContextExt object", // tp_doc + 0, // tp_traverse + 0, // tp_clear + 0, // tp_richcompare + 0, // tp_weaklistoffset + 0, // tp_iter: __iter()__ method + 0, // tp_iternext: next() method + PythonContextExt_methods, // tp_methods + 0, // tp_members + 0, // tp_getset + 0, // tp_base + 0, // tp_dict + 0, // tp_descr_get + 0, // tp_descr_set + 0, // tp_dictoffset + 0, // tp_init + PyType_GenericAlloc, // tp_alloc + 0, // tp_new + }; +} - static bool HasEndLine(const void *node) { - return HasAttribute(node, "end_position"); - } +// ================================================ +// Python UAST Node interface (called from libuast) +// ================================================ + +class Interface; + +class Node : public uast::Node { +private: + Interface* ctx; + PyObject* obj; // Node owns a reference + NodeKind kind; + + PyObject* keys; + std::string* str; + + // checkPyException checks a Python error status, and if it's set, throws an error. + static void checkPyException() { + PyObject *type, *value, *traceback; + PyErr_Fetch(&type, &value, &traceback); + if (value == nullptr || value == Py_None) return; + + if (type) Py_DECREF(type); + if (traceback) Py_DECREF(traceback); + + PyObject* str = PyObject_Str(value); + Py_DECREF(value); + + auto err = PyUnicode_AsUTF8(str); + Py_DECREF(str); + + throw std::runtime_error(err); + } + + // kindOf returns a kind of a Python object. + // Borrows the reference. + static NodeKind kindOf(PyObject* obj) { + if (!obj || obj == Py_None) { + return NODE_NULL; + } else if (PyUnicode_Check(obj)) { + return NODE_STRING; + } else if (PyLong_Check(obj)) { + return NODE_INT; + } else if (PyFloat_Check(obj)) { + return NODE_FLOAT; + } else if (PyBool_Check(obj)) { + return NODE_BOOL; + } else if (PyList_Check(obj)) { + return NODE_ARRAY; + } + return NODE_OBJECT; + } + Node* lookupOrCreate(PyObject* obj); +public: + friend class Interface; + friend class Context; + + // Node creates a new node associated with a given Python object and sets the kind. + // Steals the reference. + Node(Interface* c, NodeKind k, PyObject* v) : keys(nullptr), str(nullptr) { + ctx = c; + obj = v; + kind = k; + } + // Node creates a new node associated with a given Python object and automatically determines the kind. + // Creates a new reference. + Node(Interface* c, PyObject* v) : keys(nullptr), str(nullptr) { + ctx = c; + obj = v; Py_INCREF(v); + kind = kindOf(v); + } + ~Node(){ + if (keys) { + Py_DECREF(keys); + keys = nullptr; + } + if (obj) Py_DECREF(obj); + if (str) delete str; + + } + + PyObject* toPy(); + + NodeKind Kind() { + return kind; + } + std::string* AsString() { + if (!str) { + const char* s = PyUnicode_AsUTF8(obj); + str = new std::string(s); + } + + std::string* s = new std::string(*str); + return s; + } + int64_t AsInt() { + long long v = PyLong_AsLongLong(obj); + return (int64_t)(v); + } + uint64_t AsUint() { + unsigned long long v = PyLong_AsUnsignedLongLong(obj); + return (uint64_t)(v); + } + double AsFloat() { + double v = PyFloat_AsDouble(obj); + return (double)(v); + } + bool AsBool() { + return obj == Py_True; + } + + size_t Size() { + if (obj == Py_None) return 0; + + size_t sz = 0; + if (PyList_Check(obj)) { + sz = (size_t)(PyList_Size(obj)); + } else { + sz = (size_t)(PyObject_Size(obj)); + } + if (int64_t(sz) == -1) { + checkPyException(); + return 0; // error + } + assert(int64_t(sz) >= 0); + return sz; + } + + std::string* KeyAt(size_t i) { + if (obj == Py_None) return nullptr; + + if (!keys) keys = PyDict_Keys(obj); + if (!keys) return nullptr; + + PyObject* key = PyList_GetItem(keys, i); // borrows + const char * k = PyUnicode_AsUTF8(key); + + std::string* s = new std::string(k); + return s; + } + Node* ValueAt(size_t i) { + if (obj == Py_None) return nullptr; + + if (PyList_Check(obj)) { + PyObject* v = PyList_GetItem(obj, i); // borrows + return lookupOrCreate(v); // new ref + } + if (!keys) keys = PyDict_Keys(obj); + PyObject* key = PyList_GetItem(keys, i); // borrows + PyObject* val = PyDict_GetItem(obj, key); // borrows + + return lookupOrCreate(val); // new ref + } + + void SetValue(size_t i, Node* val) { + PyObject* v = nullptr; + if (val && val->obj) { + v = val->obj; + } else { + v = Py_None; + } + Py_INCREF(v); + PyList_SetItem(obj, i, v); // steals + } + void SetKeyValue(std::string k, Node* val) { + PyObject* v = nullptr; + if (val && val->obj) { + v = val->obj; + } else { + v = Py_None; + } + PyDict_SetItemString(obj, k.data(), v); // new ref + } +}; - static uint32_t EndLine(const void *node) { - return PositionValue(node, "end_position", "line"); - } +// =========================================== +// Python UAST interface (called from libuast) +// =========================================== + +class Context; + +class Interface : public uast::NodeCreator { +private: + std::map obj2node; + + static PyObject* newBool(bool v) { + if (v) Py_RETURN_TRUE; + + Py_RETURN_FALSE; + } + + // lookupOrCreate either creates a new object or returns existing one. + // In the second case it creates a new reference. + Node* lookupOrCreate(PyObject* obj) { + if (!obj || obj == Py_None) return nullptr; + + Node* node = obj2node[obj]; + if (node) return node; + + node = new Node(this, obj); + obj2node[obj] = node; + return node; + } + + // create makes a new object with a specified kind. + // Steals the reference. + Node* create(NodeKind kind, PyObject* obj) { + Node* node = new Node(this, kind, obj); + obj2node[obj] = node; + return node; + } +public: + friend class Node; + friend class Context; + + Interface(){ + } + ~Interface(){ + // Only needs to deallocate Nodes, since they own + // the same object as used in the map key. + for (auto it : obj2node) { + delete(it.second); + } + } + + // toNode creates a new or returns an existing node associated with Python object. + // Creates a new reference. + Node* toNode(PyObject* obj){ + return lookupOrCreate(obj); + } + + // toPy returns a Python object associated with a node. + // Returns a new reference. + PyObject* toPy(Node* node) { + if (node == nullptr) Py_RETURN_NONE; + Py_INCREF(node->obj); + return node->obj; + } + + Node* NewObject(size_t size) { + PyObject* m = PyDict_New(); + return create(NODE_OBJECT, m); + } + Node* NewArray(size_t size) { + PyObject* arr = PyList_New(size); + return create(NODE_ARRAY, arr); + } + Node* NewString(std::string v) { + PyObject* obj = PyUnicode_FromString(v.data()); + return create(NODE_STRING, obj); + } + Node* NewInt(int64_t v) { + PyObject* obj = PyLong_FromLongLong(v); + return create(NODE_INT, obj); + } + Node* NewUint(uint64_t v) { + PyObject* obj = PyLong_FromUnsignedLongLong(v); + return create(NODE_UINT, obj); + } + Node* NewFloat(double v) { + PyObject* obj = PyFloat_FromDouble(v); + return create(NODE_FLOAT, obj); + } + Node* NewBool(bool v) { + PyObject* obj = newBool(v); + return create(NODE_BOOL, obj); + } +}; - static bool HasEndCol(const void *node) { - return HasAttribute(node, "end_position"); - } +// toPy returns a Python object associated with a node. +// Returns a new reference. +PyObject* Node::toPy() { + return ctx->toPy(this); +} - static uint32_t EndCol(const void *node) { - return PositionValue(node, "end_position", "col"); - } +// lookupOrCreate either creates a new object or returns existing one. +// In the second case it creates a new reference. +Node* Node::lookupOrCreate(PyObject* obj) { + return ctx->lookupOrCreate(obj); } -static Uast *ctx; -///////////////////////////////////// -/////////// PYTHON API ////////////// -///////////////////////////////////// +// ========================================== +// Python UAST iterator +// ========================================== + typedef struct { PyObject_HEAD - UastIterator *iter; + Context *ctx; + uast::Iterator *iter; + bool freeCtx; } PyUastIter; -// iterator.__iter__() -static PyObject *PyUastIter_iter(PyObject *self) -{ +static void PyUastIter_dealloc(PyObject *self); + +static PyObject *PyUastIter_iter(PyObject *self) { Py_INCREF(self); return self; } -// iterator.__next__() -static PyObject *PyUastIter_next(PyObject *self) -{ - - PyUastIter *it = (PyUastIter *)self; - - void *node = UastIteratorNext(it->iter); - if (!node) { - PyErr_SetNone(PyExc_StopIteration); - return NULL; +static PyObject *PyUastIter_next(PyObject *self) { + auto it = (PyUastIter *)self; + + try { + if (!it->iter->next()) { + PyErr_SetNone(PyExc_StopIteration); + return nullptr; + } + } catch (const std::exception& e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + return nullptr; } - Py_INCREF(node); - memTracker.SetCurrentIterator(it->iter); - return (PyObject *)node; -} + Node* node = it->iter->node(); + if (!node) Py_RETURN_NONE; -// Forward declaration for the Type ref -static PyObject *PyUastIter_new(PyObject *self, PyObject *args); -static void PyUastIter_dealloc(PyObject *self); + return node->toPy(); // new ref +} extern "C" { static PyTypeObject PyUastIterType = { - PyVarObject_HEAD_INIT(NULL, 0) - "pyuast.UastIterator", // tp_name + PyVarObject_HEAD_INIT(nullptr, 0) + "pyuast.Iterator", // tp_name sizeof(PyUastIter), // tp_basicsize 0, // tp_itemsize PyUastIter_dealloc, // tp_dealloc @@ -306,200 +773,319 @@ extern "C" }; } -static PyObject *PyUastIter_new(PyObject *self, PyObject *args) -{ - void *node = NULL; - uint8_t order; +// ========================================== +// Python UAST Context object +// ========================================== + +class Context { +private: + Interface *iface; + uast::PtrInterface *impl; + uast::Context *ctx; + + // toPy returns a Python object associated with a node. + // Returns a new reference. + PyObject* toPy(Node* node) { + if (node == nullptr) Py_RETURN_NONE; + return iface->toPy(node); + } + // toNode returns a node associated with a Python object. + // Creates a new reference. + Node* toNode(PyObject* obj) { + return iface->lookupOrCreate(obj); + } + PyObject* newIter(uast::Iterator *it, bool freeCtx){ + PyUastIter *pyIt = PyObject_New(PyUastIter, &PyUastIterType); + if (!pyIt) return nullptr; + + if (!PyObject_Init((PyObject *)pyIt, &PyUastIterType)) { + Py_DECREF(pyIt); + return nullptr; + } + pyIt->iter = it; + pyIt->ctx = this; + pyIt->freeCtx = freeCtx; + return (PyObject*)pyIt; + } +public: + Context(){ + // create a class that makes and tracks UAST nodes + iface = new Interface(); + // create an implementation that will handle libuast calls + impl = new uast::PtrInterface(iface); + // create a new UAST context based on this implementation + ctx = impl->NewContext(); + } + ~Context(){ + delete(ctx); + delete(impl); + delete(iface); + } + + // RootNode returns a root UAST node, if set. + // Returns a new reference. + PyObject* RootNode(){ + Node* root = ctx->RootNode(); + return toPy(root); // new ref + } + + // Iterate enumerates UAST nodes in a specified order. + // Creates a new reference. + PyObject* Iterate(PyObject* node, TreeOrder order, bool freeCtx){ + if (!assertNotContext(node)) return nullptr; + + Node* unode = toNode(node); + auto iter = ctx->Iterate(unode, order); + return newIter(iter, freeCtx); + } + + // Filter queries UAST. + // Creates a new reference. + PyObject* Filter(PyObject* node, std::string query){ + if (!assertNotContext(node)) return nullptr; + + Node* unode = toNode(node); + if (unode == nullptr) unode = ctx->RootNode(); + + auto it = ctx->Filter(unode, query); + return newIter(it, false); + } + // Encode serializes UAST. + // Creates a new reference. + PyObject* Encode(PyObject *node, UastFormat format) { + if (!assertNotContext(node)) return nullptr; + + uast::Buffer data = ctx->Encode(toNode(node), format); + return asPyBuffer(data); + } + PyObject* LoadFrom(PyNodeExt *src) { + auto sctx = src->ctx->ctx; + NodeHandle snode = src->handle; + + Node* node = uast::Load(sctx, snode, ctx); + return toPy(node); // new ref + } +}; - if (!PyArg_ParseTuple(args, "OB", &node, &order)) - return NULL; +static PyObject *PyNodeExt_load(PyNodeExt *self, PyObject *Py_UNUSED(ignored)) { + auto ctx = new Context(); + PyObject* node = ctx->LoadFrom(self); + delete(ctx); + return node; +} - PyUastIter *pyIt = PyObject_New(PyUastIter, &PyUastIterType); - if (!pyIt) - return NULL; +static void PyUastIter_dealloc(PyObject *self) { + auto it = (PyUastIter *)self; + delete(it->iter); - if (!PyObject_Init((PyObject *)pyIt, &PyUastIterType)) { - Py_DECREF(pyIt); - return NULL; - } + if (it->freeCtx && it->ctx) delete(it->ctx); - pyIt->iter = UastIteratorNew(ctx, node, (TreeOrder)order); - if (!pyIt->iter) { - Py_DECREF(pyIt); - return NULL; - } - - memTracker.ClearCurrentIterator(); - memTracker.SetCurrentIterator(pyIt->iter); - return (PyObject*)pyIt; + it->freeCtx = false; + it->ctx = nullptr; + Py_TYPE(self)->tp_free(self); } +typedef struct { + PyObject_HEAD + Context *p; +} PythonContext; -static void PyUastIter_dealloc(PyObject *self) -{ - memTracker.DisposeMem(); - UastIteratorFree(((PyUastIter *)self)->iter); +static void PythonContext_dealloc(PyObject *self) { + delete(((PythonContext *)self)->p); + Py_TYPE(self)->tp_free(self); } -static bool initFilter(PyObject *args, PyObject **obj, const char **query) -{ - if (!PyArg_ParseTuple(args, "Os", obj, query)) { - return false; - } - - memTracker.EnterFilter(); - return true; +static PyObject *PythonContext_root(PythonContext *self, PyObject *Py_UNUSED(ignored)) { + return self->p->RootNode(); } -static void cleanupFilter(void) -{ - - memTracker.DisposeMem(); - memTracker.ExitFilter(); +static PyObject *PythonContext_filter(PythonContext *self, PyObject *args, PyObject *kwargs) { + char* kwds[] = {(char*)"query", (char*)"node", NULL}; + const char *query = nullptr; + PyObject *node = nullptr; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|O", kwds, &query, &node)) + return nullptr; + + PyObject* it = nullptr; + try { + it = self->p->Filter(node, query); + } catch (const std::exception& e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + } + return it; } -static void filterError(void) -{ - char *error = LastError(); - PyErr_SetString(PyExc_RuntimeError, error); - free(error); - cleanupFilter(); +static PyObject *PythonContext_encode(PythonContext *self, PyObject *args) { + PyObject *node = nullptr; + UastFormat format = UAST_BINARY; // TODO: make it a kwarg and enum + if (!PyArg_ParseTuple(args, "Oi", &node, &format)) return nullptr; + return self->p->Encode(node, format); } -static PyObject *PyFilter(PyObject *self, PyObject *args) +static PyMethodDef PythonContext_methods[] = { + {"root", (PyCFunction) PythonContext_root, METH_NOARGS, + "Return the root node attached to this query context" + }, + {"filter", (PyCFunction) PythonContext_filter, METH_VARARGS | METH_KEYWORDS, + "Filter a provided UAST with XPath" + }, + {"encode", (PyCFunction) PythonContext_encode, METH_VARARGS, + "Encodes a UAST into a buffer" + }, + {nullptr} // Sentinel +}; + +extern "C" { - PyObject *obj = NULL; - const char *query = NULL; + static PyTypeObject PythonContextType = { + PyVarObject_HEAD_INIT(nullptr, 0) + "pyuast.Context", // tp_name + sizeof(PythonContext), // tp_basicsize + 0, // tp_itemsize + PythonContext_dealloc, // tp_dealloc + 0, // tp_print + 0, // tp_getattr + 0, // tp_setattr + 0, // tp_reserved + 0, // tp_repr + 0, // tp_as_number + 0, // tp_as_sequence + 0, // tp_as_mapping + 0, // tp_hash + 0, // tp_call + 0, // tp_str + 0, // tp_getattro + 0, // tp_setattro + 0, // tp_as_buffer + Py_TPFLAGS_DEFAULT, // tp_flags + "Internal Context object", // tp_doc + 0, // tp_traverse + 0, // tp_clear + 0, // tp_richcompare + 0, // tp_weaklistoffset + 0, // tp_iter: __iter()__ method + 0, // tp_iternext: next() method + PythonContext_methods, // tp_methods + 0, // tp_members + 0, // tp_getset + 0, // tp_base + 0, // tp_dict + 0, // tp_descr_get + 0, // tp_descr_set + 0, // tp_dictoffset + 0, // tp_init + PyType_GenericAlloc, // tp_alloc + 0, // tp_new + }; +} - if (!initFilter(args, &obj, &query)) { - return NULL; - } +// ========================================== +// Global functions +// ========================================== - Nodes *nodes = UastFilter(ctx, obj, query); - if (!nodes) { - filterError(); - cleanupFilter(); - return NULL; - } +static PyObject *PyUastIter_new(PyObject *self, PyObject *args) { + PyObject *obj = nullptr; + uint8_t order; - size_t len = NodesSize(nodes); - PyObject *list = PyList_New(len); + if (!PyArg_ParseTuple(args, "OB", &obj, &order)) return nullptr; - for (size_t i = 0; i < len; i++) { - PyObject *node = (PyObject *)NodeAt(nodes, i); - Py_INCREF(node); - PyList_SET_ITEM(list, i, node); + // the node can either be external or any other Python object + if (PyObject_TypeCheck(obj, &PyNodeExtType)) { + // external node -> external iterator + auto node = (PyNodeExt*)obj; + return node->ctx->Iterate(obj, (TreeOrder)order); } - NodesFree(nodes); - PyObject *iter = PySeqIter_New(list); - Py_DECREF(list); - - cleanupFilter(); - return iter; + // Python object -> create a new context and attach it to an iterator + Context* ctx = new Context(); + return ctx->Iterate(obj, (TreeOrder)order, true); } -static PyObject *PyFilterBool(PyObject *self, PyObject *args) -{ - PyObject *obj = NULL; - const char *query = NULL; +static PyObject *PythonContextExt_decode(PyObject *self, PyObject *args, PyObject *kwargs) { + char* kwds[] = {(char*)"data", (char*)"format", NULL}; + PyObject *obj = nullptr; + UastFormat format = UAST_BINARY; // TODO: make it an enum - if (!initFilter(args, &obj, &query)) - return NULL; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i", kwds, &obj, &format)) + return nullptr; - bool ok; - bool res = UastFilterBool(ctx, obj, query, &ok); - if (!ok) { - filterError(); - return NULL; - } + Py_buffer buf; - cleanupFilter(); - return res ? Py_True : Py_False; -} + int res = PyObject_GetBuffer(obj, &buf, PyBUF_C_CONTIGUOUS); + if (res != 0) return nullptr; -static PyObject *PyFilterNumber(PyObject *self, PyObject *args) -{ - PyObject *obj = NULL; - const char *query = NULL; + uast::Buffer ubuf(buf.buf, (size_t)(buf.len)); - if (!initFilter(args, &obj, &query)) - return NULL; + uast::Context* ctx = uast::Decode(ubuf, format); + PyBuffer_Release(&buf); - bool ok; - double res = UastFilterNumber(ctx, obj, query, &ok); - if (!ok) { - filterError(); - return NULL; - } - - cleanupFilter(); - return PyFloat_FromDouble(res); + PythonContextExt *pyU = PyObject_New(PythonContextExt, &PythonContextExtType); + if (!pyU) { + delete(ctx); + return nullptr; + } + pyU->p = new ContextExt(ctx); + return (PyObject*)pyU; } -static PyObject *PyFilterString(PyObject *self, PyObject *args) -{ - PyObject *obj = NULL; - const char *query = NULL; +static PyObject *PythonContext_new(PyObject *self, PyObject *args) { + // TODO: optionally accept root object + if (!PyArg_ParseTuple(args, "")) return nullptr; - if (!initFilter(args, &obj, &query)) - return NULL; - const char *res = UastFilterString(ctx, obj, query); - if (res == NULL) { - filterError(); - return NULL; - } + PythonContext *pyU = PyObject_New(PythonContext, &PythonContextType); + if (!pyU) return nullptr; - cleanupFilter(); - return PyUnicode_FromString(res); + pyU->p = new Context(); + return (PyObject*)pyU; } + +bool isContext(PyObject* obj) { + if (!obj || obj == Py_None) return false; + return PyObject_TypeCheck(obj, &PythonContextExtType) || PyObject_TypeCheck(obj, &PythonContextType); +} + static PyMethodDef extension_methods[] = { - {"filter", PyFilter, METH_VARARGS, "Filter nodes in the UAST using the given query"}, {"iterator", PyUastIter_new, METH_VARARGS, "Get an iterator over a node"}, - {"filter_bool", PyFilterBool, METH_VARARGS, "For queries returning boolean values"}, - {"filter_number", PyFilterNumber, METH_VARARGS, "For queries returning boolean values"}, - {"filter_string", PyFilterString, METH_VARARGS, "For queries returning boolean values"}, - {NULL, NULL, 0, NULL} + {"decode", (PyCFunction)PythonContextExt_decode, METH_VARARGS | METH_KEYWORDS, "Decode UAST from a byte array"}, + {"uast", PythonContext_new, METH_VARARGS, "Creates a new UAST context"}, + {nullptr, nullptr, 0, nullptr} }; static struct PyModuleDef module_def = { PyModuleDef_HEAD_INIT, "pyuast", - NULL, + nullptr, -1, extension_methods, - NULL, - NULL, - NULL, - NULL + nullptr, + nullptr, + nullptr, + nullptr }; PyMODINIT_FUNC PyInit_pyuast(void) { - NodeIface iface; - iface.InternalType = InternalType; - iface.Token = Token; - iface.ChildrenSize = ChildrenSize; - iface.ChildAt = ChildAt; - iface.RolesSize = RolesSize; - iface.RoleAt = RoleAt; - iface.PropertiesSize = PropertiesSize; - iface.PropertyKeyAt = PropertyKeyAt; - iface.PropertyValueAt = PropertyValueAt; - iface.HasStartOffset = HasStartOffset; - iface.StartOffset = StartOffset; - iface.HasStartLine = HasStartLine; - iface.StartLine = StartLine; - iface.HasStartCol = HasStartCol; - iface.StartCol = StartCol; - iface.HasEndOffset = HasEndOffset; - iface.EndOffset = EndOffset; - iface.HasEndLine = HasEndLine; - iface.EndLine = EndLine; - iface.HasEndCol = HasEndCol; - iface.EndCol = EndCol; - - ctx = UastNew(iface); - return PyModule_Create(&module_def); + if (PyType_Ready(&PythonContextExtType) < 0) return nullptr; + if (PyType_Ready(&PyNodeExtType) < 0) return nullptr; + if (PyType_Ready(&PyUastIterExtType) < 0) return nullptr; + + if (PyType_Ready(&PythonContextType) < 0) return nullptr; + if (PyType_Ready(&PyUastIterType) < 0) return nullptr; + + PyObject* m = PyModule_Create(&module_def); + + Py_INCREF(&PythonContextType); + PyModule_AddObject(m, "Context", (PyObject *)&PythonContextType); + + Py_INCREF(&PyNodeExtType); + PyModule_AddObject(m, "NodeExt", (PyObject *)&PyNodeExtType); + + Py_INCREF(&PyUastIterExtType); + PyModule_AddObject(m, "IteratorExt", (PyObject *)&PyUastIterExtType); + + Py_INCREF(&PyUastIterType); + PyModule_AddObject(m, "Iterator", (PyObject *)&PyUastIterType); + + return m; } diff --git a/bblfsh/result_context.py b/bblfsh/result_context.py new file mode 100644 index 0000000..f985074 --- /dev/null +++ b/bblfsh/result_context.py @@ -0,0 +1,129 @@ +import typing as t + +from bblfsh.aliases import ParseResponse +from bblfsh.pyuast import decode, IteratorExt, NodeExt, iterator +from bblfsh.tree_order import TreeOrder + + +class ResponseError(Exception): + pass + + +class ResultTypeException(Exception): + pass + + +class NotNodeIterationException(Exception): + pass + + +# ResultMultiType = t.NewType("ResultMultiType", t.Union[dict, int, float, bool, str]) +ResultMultiType = t.Union[dict, int, float, bool, str, None] + + +class Node: + def __init__(self, node_ext: NodeExt) -> None: + self._node_ext = node_ext + self._loaded_node: ResultMultiType = None + + def _ensure_load(self) -> None: + if self._loaded_node is None: + self._loaded_node = self._node_ext.load() + + def __str__(self) -> str: + return str(self.get()) + + def __repr__(self) -> str: + return repr(self.get()) + + def get(self) -> ResultMultiType: + self._ensure_load() + return self._loaded_node + + def _get_typed(self, type_: t.Union[type, t.List[type]]) -> ResultMultiType: + self._ensure_load() + + if not isinstance(type_, list) and not isinstance(type_, tuple): + type_list = [type_] + else: + type_list = type_ + + if type(self._loaded_node) not in type_list: + raise ResultTypeException("Expected {} result, but type is '{}'" + .format(str(type_list), type(self._loaded_node))) + return self._loaded_node + + def get_bool(self) -> bool: + return t.cast(bool, self._get_typed(bool)) + + def get_float(self) -> float: + res: ResultMultiType = self._get_typed([float, int]) + if isinstance(res, int): + res = float(res) + return t.cast(float, res) + + def get_int(self) -> int: + return t.cast(int, self._get_typed(int)) + + def get_str(self) -> str: + return t.cast(str, self._get_typed(str)) + + def get_dict(self) -> dict: + return t.cast(dict, self._get_typed(dict)) + + def iterate(self, order: int) -> 'NodeIterator': + if not isinstance(self._node_ext, NodeExt): + raise NotNodeIterationException("Cannot iterate over leaf of type '{}'" + .format(type(self._node_ext))) + TreeOrder.check_order(order) + return NodeIterator(iterator(self._node_ext, order)) + + +class NodeIterator: + def __init__(self, iter_ext: IteratorExt) -> None: + self._iter_ext = iter_ext + + def __iter__(self) -> 'NodeIterator': + return self + + def __next__(self) -> Node: + return Node(next(self._iter_ext)) + + def iterate(self, order: int) -> 'NodeIterator': + TreeOrder.check_order(order) + return NodeIterator(iterator(next(self._iter_ext), order)) + + +class ResultContext: + def __init__(self, grpc_response: ParseResponse) -> None: + if grpc_response.errors: + raise ResponseError("\n".join( + [error.text for error in grpc_response.errors]) + ) + + self._response = grpc_response + self._ctx = decode(grpc_response.uast, format=0) + + def filter(self, query: str) -> NodeIterator: + return NodeIterator(self._ctx.filter(query)) + + def get_all(self) -> dict: + return self._ctx.load() + + def iterate(self, order: int) -> NodeIterator: + TreeOrder.check_order(order) + return NodeIterator(iterator(self._ctx.root(), order)) + + @property + def language(self) -> str: + return self._response.language + + @property + def uast(self) -> t.Any: + return self._response.uast + + def __str__(self) -> str: + return str(self.get_all()) + + def __repr__(self) -> str: + return repr(self.get_all()) diff --git a/bblfsh/sdkversion.py b/bblfsh/sdkversion.py deleted file mode 100644 index bc55b42..0000000 --- a/bblfsh/sdkversion.py +++ /dev/null @@ -1 +0,0 @@ -VERSION = "v1" diff --git a/bblfsh/test.py b/bblfsh/test.py index 27e7933..53afbde 100644 --- a/bblfsh/test.py +++ b/bblfsh/test.py @@ -1,310 +1,347 @@ -import os import resource +import typing as t import unittest import docker -from bblfsh import (BblfshClient, filter, iterator, role_id, - role_name, Node, ParseResponse, TreeOrder, filter_bool, - filter_number, filter_string) +from bblfsh import (BblfshClient, iterator, TreeOrder, + Modes, role_id, role_name) from bblfsh.launcher import ensure_bblfsh_is_running from bblfsh.client import NonUTF8ContentException +from bblfsh.result_context import (Node, NodeIterator, + ResultContext, ResultTypeException) +from bblfsh.pyuast import uast class BblfshTests(unittest.TestCase): BBLFSH_SERVER_EXISTED = None + fixtures_file = "fixtures/test.py" @classmethod - def setUpClass(cls): + def setUpClass(cls: t.Any) -> None: cls.BBLFSH_SERVER_EXISTED = ensure_bblfsh_is_running() @classmethod - def tearDownClass(cls): + def tearDownClass(cls: t.Any) -> None: if not cls.BBLFSH_SERVER_EXISTED: client = docker.from_env(version="auto") client.containers.get("bblfshd").remove(force=True) client.api.close() - def setUp(self): + def setUp(self) -> None: self.client = BblfshClient("0.0.0.0:9432") - def testVersion(self): + def _parse_fixture(self) -> ResultContext: + ctx = self.client.parse(self.fixtures_file) + self._validate_ctx(ctx) + + return ctx + + def testVersion(self) -> None: version = self.client.version() self.assertTrue(hasattr(version, "version")) self.assertTrue(version.version) self.assertTrue(hasattr(version, "build")) self.assertTrue(version.build) - def testNativeParse(self): - reply = self.client.native_parse(__file__) - assert(reply.ast) + def testNativeParse(self) -> None: + ctx = self.client.parse(self.fixtures_file, mode=Modes.NATIVE) + self._validate_ctx(ctx) + self.assertIsNotNone(ctx) - def testNonUTF8ParseError(self): + it = ctx.filter("//*[@ast_type='NoopLine']") + self.assertIsNotNone(it) + self.assertIsInstance(it, NodeIterator) + res = list(it) + self.assertGreater(len(res), 1) + for i in res: + t = i.get_dict().get("ast_type") + self.assertIsNotNone(t) + self.assertEqual(t, "NoopLine") + + def testNonUTF8ParseError(self) -> None: self.assertRaises(NonUTF8ContentException, self.client.parse, "", "Python", b"a = '\x80abc'") - def testUASTDefaultLanguage(self): - res = self.client.parse(__file__) - print(res) - self._validate_resp(self.client.parse(__file__)) + def testUASTDefaultLanguage(self) -> None: + ctx = self._parse_fixture() + self.assertEqual(ctx.language, "python") - def testUASTPython(self): - self._validate_resp(self.client.parse(__file__, language="Python")) + def testUASTWithLanguage(self) -> None: + ctx = self.client.parse(self.fixtures_file, language="Python") + self._validate_ctx(ctx) + self.assertEqual(ctx.language, "python") - def testUASTFileContents(self): - with open(__file__, "rb") as fin: + def testUASTFileContents(self) -> None: + with open(self.fixtures_file, "r") as fin: contents = fin.read() - resp = self.client.parse("file.py", contents=contents) - self._validate_resp(resp) - self._validate_filter(resp) - - def testBrokenFilter(self): - self.assertRaises(RuntimeError, filter, 0, "foo") - - def testFilterInternalType(self): - node = Node() - node.internal_type = 'a' - self.assertTrue(any(filter(node, "//a"))) - self.assertFalse(any(filter(node, "//b"))) - - def testFilterToken(self): - node = Node() - node.token = 'a' - self.assertTrue(any(filter(node, "//*[@token='a']"))) - self.assertFalse(any(filter(node, "//*[@token='b']"))) - - def testFilterRoles(self): - node = Node() - node.roles.append(1) - self.assertTrue(any(filter(node, "//*[@roleIdentifier]"))) - self.assertFalse(any(filter(node, "//*[@roleQualified]"))) - - def testFilterProperties(self): - node = Node() - node.properties['k1'] = 'v2' - node.properties['k2'] = 'v1' - self.assertTrue(any(filter(node, "//*[@k2='v1']"))) - self.assertTrue(any(filter(node, "//*[@k1='v2']"))) - self.assertFalse(any(filter(node, "//*[@k1='v1']"))) - - def testFilterStartOffset(self): - node = Node() - node.start_position.offset = 100 - self.assertTrue(any(filter(node, "//*[@startOffset=100]"))) - self.assertFalse(any(filter(node, "//*[@startOffset=10]"))) - - def testFilterStartLine(self): - node = Node() - node.start_position.line = 10 - self.assertTrue(any(filter(node, "//*[@startLine=10]"))) - self.assertFalse(any(filter(node, "//*[@startLine=100]"))) - - def testFilterStartCol(self): - node = Node() - node.start_position.col = 50 - self.assertTrue(any(filter(node, "//*[@startCol=50]"))) - self.assertFalse(any(filter(node, "//*[@startCol=5]"))) - - def testFilterEndOffset(self): - node = Node() - node.end_position.offset = 100 - self.assertTrue(any(filter(node, "//*[@endOffset=100]"))) - self.assertFalse(any(filter(node, "//*[@endOffset=10]"))) - - def testFilterEndLine(self): - node = Node() - node.end_position.line = 10 - self.assertTrue(any(filter(node, "//*[@endLine=10]"))) - self.assertFalse(any(filter(node, "//*[@endLine=100]"))) - - def testFilterEndCol(self): - node = Node() - node.end_position.col = 50 - self.assertTrue(any(filter(node, "//*[@endCol=50]"))) - self.assertFalse(any(filter(node, "//*[@endCol=5]"))) - - def testFilterBool(self): - node = Node() - self.assertTrue(filter_bool(node, "boolean(//*[@startOffset or @endOffset])")) - self.assertFalse(filter_bool(node, "boolean(//*[@blah])")) - - def testFilterNumber(self): - node = Node() - node.children.extend([Node(), Node(), Node()]) - self.assertEqual(int(filter_number(node, "count(//*)")), 4) - - def testFilterString(self): - node = Node() - node.internal_type = "test" - self.assertEqual(filter_string(node, "name(//*[1])"), "test") - - def testFilterBadQuery(self): - node = Node() - self.assertRaises(RuntimeError, filter, node, "//*roleModule") - - def testFilterBadType(self): - node = Node() - node.end_position.col = 50 - self.assertRaises(RuntimeError, filter, node, "boolean(//*[@startPosition or @endPosition])") - - def testRoleIdName(self): - self.assertEqual(role_id(role_name(1)), 1) - self.assertEqual(role_name(role_id("IDENTIFIER")), "IDENTIFIER") - def _itTestTree(self): - root = Node() - root.internal_type = 'root' - root.start_position.offset = 0 - root.start_position.line = 0 - root.start_position.col = 1 + ctx = self.client.parse("file.py", contents=contents) + self._validate_ctx(ctx) - son1 = Node() - son1.internal_type = 'son1' - son1.start_position.offset = 1 + def assert_strnode(n: Node, expected: str) -> None: + self.assertEqual(n.get(), expected) + self.assertIsInstance(n.get_str(), str) + self.assertEqual(n.get_str(), expected) - son1_1 = Node() - son1_1.internal_type = 'son1_1' - son1_1.start_position.offset = 10 + it = ctx.filter("//uast:RuntimeImport/Path/uast:Alias/Name/uast:Identifier/Name") + self.assertIsInstance(it, NodeIterator) - son1_2 = Node() - son1_2.internal_type = 'son1_2' - son1_2.start_position.offset = 10 + assert_strnode(next(it), "os") + assert_strnode(next(it), "resource") + assert_strnode(next(it), "unittest") + assert_strnode(next(it), "docker") + assert_strnode(next(it), "bblfsh") + self.assertRaises(StopIteration, next, it) - son1.children.extend([son1_1, son1_2]) + def testBrokenFilter(self) -> None: + ctx = self._parse_fixture() - son2 = Node() - son2.internal_type = 'son2' - son2.start_position.offset = 100 + self.assertRaises(RuntimeError, ctx.filter, "dsdfkj32423#$@#$") - son2_1 = Node() - son2_1.internal_type = 'son2_1' - son2_1.start_position.offset = 5 - - son2_2 = Node() - son2_2.internal_type = 'son2_2' - son2_2.start_position.offset = 15 + # FIXME: Uncomment once https://github.com/bblfsh/sdk/issues/340 is fixed + def testFilterToken(self): + ctx = self._parse_fixture() + it = ctx.filter("//*[@token='else']/@token") + print(next(it)) + # Problem: returns the node containing the @token, not the @token string ("else") + # first = next(it).get_str() + # self.assertEqual(first, "else") + + def testFilterRoles(self) -> None: + ctx = self._parse_fixture() + it = ctx.filter("//*[@role='Identifier']") + self.assertIsInstance(it, NodeIterator) + + l = list(it) + self.assertGreater(len(l), 0) + + it = ctx.filter("//*[@role='Friend']") + self.assertIsInstance(it, NodeIterator) + l = list(it) + self.assertEqual(len(l), 0) + + def testFilterProperties(self) -> None: + ctx = uast() + obj = {"k1":"v1", "k2": "v2"} + self.assertTrue(any(ctx.filter("/*[@k1='v1']", obj))) + self.assertTrue(any(ctx.filter("/*[@k2='v2']", obj))) + self.assertFalse(any(ctx.filter("/*[@k2='v1']", obj))) + self.assertFalse(any(ctx.filter("/*[@k1='v2']", obj))) + + def testFilterStartOffset(self) -> None: + ctx = self._parse_fixture() + self.assertTrue(any(ctx.filter("//uast:Positions/start/uast:Position[@offset=11749]"))) + self.assertFalse(any(ctx.filter("//uast:Positions/start/uast:Position[@offset=99999]"))) + + def testFilterStartLine(self) -> None: + ctx = self._parse_fixture() + self.assertTrue(any(ctx.filter("//uast:Positions/start/uast:Position[@line=295]"))) + self.assertFalse(any(ctx.filter("//uast:Positions/start/uast:Position[@line=99999]"))) + + def testFilterStartCol(self) -> None: + ctx = self._parse_fixture() + self.assertTrue(any(ctx.filter("//uast:Positions/start/uast:Position[@col=42]"))) + self.assertFalse(any(ctx.filter("//uast:Positions/start/uast:Position[@col=99999]"))) + + def testFilterEndOffset(self) -> None: + ctx = self._parse_fixture() + self.assertTrue(any(ctx.filter("//uast:Positions/end/uast:Position[@offset=11757]"))) + self.assertFalse(any(ctx.filter("//uast:Positions/end/uast:Position[@offset=99999]"))) + + def testFilterEndLine(self) -> None: + ctx = self._parse_fixture() + self.assertTrue(any(ctx.filter("//uast:Positions/end/uast:Position[@line=321]"))) + self.assertFalse(any(ctx.filter("//uast:Positions/end/uast:Position[@line=99999]"))) + + def testFilterEndCol(self) -> None: + ctx = self._parse_fixture() + self.assertTrue(any(ctx.filter("//uast:Positions/end/uast:Position[@col=49]"))) + self.assertFalse(any(ctx.filter("//uast:Positions/end/uast:Position[@col=99999]"))) + + def testFilterBool(self) -> None: + ctx = self._parse_fixture() + self.assertTrue(ctx.filter("boolean(//uast:Positions/end/uast:Position[@col=49])")) + self.assertTrue(next(ctx.filter("boolean(//uast:Positions/end/uast:Position[@col=49])")).get()) + self.assertTrue(next(ctx.filter("boolean(//uast:Positions/end/uast:Position[@col=49])")).get_bool()) + + self.assertFalse(next(ctx.filter("boolean(//uast:Positions/end/uast:Position[@col=9999])")).get()) + self.assertFalse(next(ctx.filter("boolean(//uast:Positions/end/uast:Position[@col=9999])")).get_bool()) + + def testFilterNumber(self) -> None: + ctx = self._parse_fixture() + self.assertEqual(next(ctx.filter("count(//uast:Positions/end/uast:Position[@col=49])")).get(), 2) + self.assertEqual(next(ctx.filter("count(//uast:Positions/end/uast:Position[@col=49])")).get_int(), 2) + self.assertEqual(next(ctx.filter("count(//uast:Positions/end/uast:Position[@col=49])")).get_float(), 2.0) + + def testFilterString(self) -> None: + ctx = self._parse_fixture() + self.assertEqual(next(ctx.filter("name(//uast:Positions)")).get(), "uast:Positions") + self.assertEqual(next(ctx.filter("name(//uast:Positions)")).get_str(), "uast:Positions") + + def testFilterBadQuery(self) -> None: + ctx = uast() + self.assertRaises(RuntimeError, ctx.filter, "//[@roleModule]", {}) + + def testFilterBadType(self) -> None: + ctx = self._parse_fixture() + res = next(ctx.filter("count(//uast:Positions/end/uast:Position[@col=49])")) + self.assertRaises(ResultTypeException, res.get_str) + + def testRoleIdName(self) -> None: + self.assertEqual(role_id(role_name(1)), 1) + self.assertEqual(role_name(role_id("IDENTIFIER")), "IDENTIFIER") - son2.children.extend([son2_1, son2_2]) - root.children.extend([son1, son2]) + @staticmethod + def _itTestTree() -> dict: + def set_position(node: dict, start_offset: int, start_line: int, start_col: int, + end_offset: int, end_line: int, end_col: int) -> None: + node["@pos"] = { + "@type": "uast:Positions", + "start": { + "@type": "uast:Position", + "offset": start_offset, + "line": start_line, + "col": start_col + }, + "end": { + "@type": "uast:Position", + "offset": end_offset, + "line": end_line, + "col": end_col + } + } + root = {"@type": "root"} + set_position(root, 0,1,1, 1,1,2) + + son1 = {"@type": "son1"} + set_position(son1, 2,2,2, 3,2,3) + + son1_1 = {"@type": "son1_1"} + set_position(son1_1, 10,10,1, 12,2,2) + + son1_2 = {"@type": "son1_2"} + set_position(son1_2, 10,10,1, 12,2,2) + + son1["children"] = [son1_1, son1_2] + + son2 = {"@type": "son2"} + set_position(son2, 100,100,1, 101,100,2) + + son2_1 = {"@type": "son2_1"} + set_position(son2_1, 5,5,1, 6,5,2) + + son2_2 = {"@type": "son2_2"} + set_position(son2_2, 15,15,1, 16,15,2) + + son2["children"] = [son2_1, son2_2] + root["children"] = [son1, son2] return root - def testIteratorPreOrder(self): + @staticmethod + def _get_nodetypes(iterator: NodeIterator) -> t.List[str]: + return [n["@type"] for n in + filter(lambda x: isinstance(x, dict), iterator)] + + def testIteratorPreOrder(self) -> None: root = self._itTestTree() it = iterator(root, TreeOrder.PRE_ORDER) self.assertIsNotNone(it) - expanded = [node.internal_type for node in it] + expanded = self._get_nodetypes(it) self.assertListEqual(expanded, ['root', 'son1', 'son1_1', 'son1_2', 'son2', 'son2_1', 'son2_2']) - def testIteratorPostOrder(self): + def testIteratorPostOrder(self) -> None: root = self._itTestTree() it = iterator(root, TreeOrder.POST_ORDER) self.assertIsNotNone(it) - expanded = [node.internal_type for node in it] + expanded = self._get_nodetypes(it) self.assertListEqual(expanded, ['son1_1', 'son1_2', 'son1', 'son2_1', 'son2_2', 'son2', 'root']) - def testIteratorLevelOrder(self): + def testIteratorLevelOrder(self) -> None: root = self._itTestTree() it = iterator(root, TreeOrder.LEVEL_ORDER) self.assertIsNotNone(it) - expanded = [node.internal_type for node in it] + expanded = self._get_nodetypes(it) self.assertListEqual(expanded, ['root', 'son1', 'son2', 'son1_1', 'son1_2', 'son2_1', 'son2_2']) - def testIteratorPositionOrder(self): + def testIteratorPositionOrder(self) -> None: root = self._itTestTree() it = iterator(root, TreeOrder.POSITION_ORDER) self.assertIsNotNone(it) - expanded = [node.internal_type for node in it] + expanded = self._get_nodetypes(it) self.assertListEqual(expanded, ['root', 'son1', 'son2_1', 'son1_1', 'son1_2', 'son2_2', 'son2']) - def _validate_resp(self, resp): - self.assertIsNotNone(resp) - self.assertEqual(type(resp).DESCRIPTOR.full_name, - ParseResponse.DESCRIPTOR.full_name) - self.assertEqual(len(resp.errors), 0) - # self.assertIsInstance() does not work - must be some metaclass magic - # self.assertIsInstance(resp.uast, Node) - - # Sometimes its fully qualified, sometimes is just "Node"... ditto - self.assertTrue(resp.uast.__class__.__name__.endswith('Node')) - - def testFilterInsideIter(self): - root = self.client.parse(__file__).uast - it = iterator(root, TreeOrder.PRE_ORDER) - self.assertIsNotNone(it) - for n in it: - filter(n, "//*[@roleIdentifier]") - - def testItersMixingIterations(self): - root = self.client.parse(__file__).uast - it = iterator(root, TreeOrder.PRE_ORDER) + def _validate_ctx(self, ctx: ResultContext) -> None: + import bblfsh + self.assertIsNotNone(ctx) + self.assertIsInstance(ctx, bblfsh.result_context.ResultContext) + self.assertIsInstance(ctx.uast, bytes) + + def testFilterInsideIter(self) -> None: + ctx = self._parse_fixture() + c2 = uast() + for n in ctx.iterate(TreeOrder.PRE_ORDER): + c2.filter("//uast:Positions", n) + + def testItersMixingIterations(self) -> None: + ctx = self._parse_fixture() + it = ctx.iterate(TreeOrder.PRE_ORDER) next(it); next(it); next(it) + n = next(it) - it2 = iterator(n, TreeOrder.PRE_ORDER) + it2 = n.iterate(TreeOrder.PRE_ORDER) next(it2) - assert(next(it) == next(it2)) + a = next(it).get() + b = next(it2).get() + self.assertListEqual(a, b) - def testManyFilters(self): - root = self.client.parse(__file__).uast - root.properties['k1'] = 'v2' - root.properties['k2'] = 'v1' + # XXX uncomment + # def testManyFilters(self) -> None: + # ctx = self._parse_fixture() - before = resource.getrusage(resource.RUSAGE_SELF) - for _ in range(500): - filter(root, "//*[@roleIdentifier]") + # before = resource.getrusage(resource.RUSAGE_SELF) + # for _ in range(500): + # ctx.filter("//*[@role='Identifier']") - after = resource.getrusage(resource.RUSAGE_SELF) + # after = resource.getrusage(resource.RUSAGE_SELF) - # Check that memory usage has not doubled after running the filter - self.assertLess(after[2] / before[2], 2.0) + # # Check that memory usage has not doubled + # self.assertLess(after[2] / before[2], 2.0) - def testManyParses(self): - before = resource.getrusage(resource.RUSAGE_SELF) - for _ in range(100): - root = self.client.parse(__file__).uast - root.properties['k1'] = 'v2' - root.properties['k2'] = 'v1' + # def testManyParses(self) -> None: + # before = resource.getrusage(resource.RUSAGE_SELF) + # for _ in range(100): + # self._parse_fixture() - after = resource.getrusage(resource.RUSAGE_SELF) + # after = resource.getrusage(resource.RUSAGE_SELF) - # Check that memory usage has not doubled after running the parse+filter - self.assertLess(after[2] / before[2], 2.0) + # # Check that memory usage has not doubled + # self.assertLess(after[2] / before[2], 2.0) - def testManyParsersAndFilters(self): - before = resource.getrusage(resource.RUSAGE_SELF) - for _ in range(100): - root = self.client.parse(__file__).uast - root.properties['k1'] = 'v2' - root.properties['k2'] = 'v1' + # def testManyParsersAndFilters(self) -> None: + # before = resource.getrusage(resource.RUSAGE_SELF) + # for _ in range(100): + # ctx = self.client.parse(self.fixtures_file) + # ctx.filter("//*[@role='Identifier']") - filter(root, "//*[@roleIdentifier]") + # after = resource.getrusage(resource.RUSAGE_SELF) - after = resource.getrusage(resource.RUSAGE_SELF) + # # Check that memory usage has not doubled + # self.assertLess(after[2] / before[2], 2.0) - # Check that memory usage has not doubled after running the parse+filter - self.assertLess(after[2] / before[2], 2.0) - - def testSupportedLanguages(self): + def testSupportedLanguages(self) -> None: res = self.client.supported_languages() self.assertGreater(len(res), 0) for l in res: for key in ('language', 'version', 'status', 'features'): - print(key) self.assertTrue(hasattr(l, key)) self.assertIsNotNone(getattr(l, key)) - def _validate_filter(self, resp): - results = filter(resp.uast, "//Num") - self.assertIsInstance(resp.uast, Node) - self.assertEqual(next(results).token, "0") - self.assertEqual(next(results).token, "1") - self.assertEqual(next(results).token, "100") - self.assertEqual(next(results).token, "10") - if __name__ == "__main__": unittest.main() diff --git a/bblfsh/tree_order.py b/bblfsh/tree_order.py new file mode 100644 index 0000000..e02259b --- /dev/null +++ b/bblfsh/tree_order.py @@ -0,0 +1,15 @@ +from enum import IntEnum + + +class TreeOrder(IntEnum): + _MIN = 0 + PRE_ORDER = 0 + POST_ORDER = 1 + LEVEL_ORDER = 2 + POSITION_ORDER = 3 + _MAX = 3 + + @staticmethod + def check_order(order: int) -> None: + if order < TreeOrder._MIN or order > TreeOrder._MAX: + raise Exception("Wrong order value") diff --git a/setup.py b/setup.py index 40d2147..98b9123 100644 --- a/setup.py +++ b/setup.py @@ -12,37 +12,29 @@ from setuptools import setup, find_packages, Extension from setuptools.command.build_ext import build_ext -VERSION = "2.12.7" -LIBUAST_VERSION = "v1.9.5" -SDK_VERSION = "v1.16.1" -SDK_MAJOR = SDK_VERSION.split('.')[0] +VERSION = "3.0.0" +LIBUAST_VERSION = "v3.1.0" +LIBUAST_ARCH = "linux-amd64" +SDK_V1_VERSION = "v1.16.1" +SDK_V1_MAJOR = SDK_V1_VERSION.split('.')[0] +SDK_V2_VERSION = "v2.12.0" +SDK_V2_MAJOR = SDK_V2_VERSION.split('.')[0] + FORMAT_ARGS = globals() +sources = ["bblfsh/pyuast.cc"] +log = logging.getLogger("setup.py") + # For debugging libuast-client interactions, set to True in production! GET_LIBUAST = True if not GET_LIBUAST: - print("WARNING: not retrieving libuast, using local version") - -if os.getenv("CC") is None: - os.environ["CC"] = "g++" # yes, g++ - otherwise distutils will use gcc -std=c++11 and explode -if os.getenv("CXX") is None: - os.environ["CXX"] = "g++" -libraries = ['xml2'] -sources = ["bblfsh/pyuast.cc", "bblfsh/memtracker.cc"] -log = logging.getLogger("setup.py") + log.warning("WARNING: not retrieving libuast, using local version") class CustomBuildExt(build_ext): def run(self): - global libraries global sources - if "--global-uast" in sys.argv: - libraries.append("uast") - else: - sources.append("bblfsh/libuast/uast.cc") - sources.append("bblfsh/libuast/roles.c") - get_libuast() build_ext.run(self) @@ -51,6 +43,14 @@ def j(*paths): return os.path.join(*paths) +def runorexit(cmd, errmsg=""): + log.info(">>", cmd) + if os.system(cmd) != 0: + sep = ". " if errmsg else "" + log.error(errmsg + sep + "Failed command: '%s'" % cmd) + sys.exit(1) + + def mkdir(path): path = path.format(**FORMAT_ARGS) log.info("mkdir -p " + path) @@ -66,7 +66,7 @@ def rimraf(path): def mv(src, dst): src = src.format(**FORMAT_ARGS) dst = dst.format(**FORMAT_ARGS) - log.info("mv %s %s", src, dst) + log.info(">> mv %s %s", src, dst) shutil.rmtree(dst, ignore_errors=True) os.rename(src, dst) @@ -74,7 +74,7 @@ def mv(src, dst): def cp(src, dst): src = src.format(**FORMAT_ARGS) dst = dst.format(**FORMAT_ARGS) - log.info("cp -p %s %s", src, dst) + log.info(">> cp %s %s", src, dst) shutil.rmtree(dst, ignore_errors=True) shutil.copy2(src, dst) @@ -82,16 +82,17 @@ def cp(src, dst): def cpr(src, dst): src = src.format(**FORMAT_ARGS) dst = dst.format(**FORMAT_ARGS) - log.info("cp -pr %s %s", src, dst) + log.info(">> cp -pr %s %s", src, dst) if os.path.isdir(dst): shutil.rmtree(dst) shutil.copytree(src, dst, symlinks=True) def untar_url(url, path="."): - log.info("tar xf " + url) + log.info(">> tar xf " + url) with urlopen(url) as response: - response.tell = lambda: 0 # tarfile calls it only once in the beginning + # tarfile calls it only once in the beginning + response.tell = lambda: 0 with tarfile.open(fileobj=response, mode=("r:" + url.rsplit(".", 1)[-1])) as tar: tar.extractall(path=path) @@ -101,15 +102,15 @@ def call(*cmd): subprocess.check_call(cmd) -def create_dirs(): - mkdir(j("proto", "gopkg.in", "bblfsh", "sdk.{SDK_MAJOR}", "protocol")) - mkdir(j("proto", "gopkg.in", "bblfsh", "sdk.{SDK_MAJOR}", "uast")) - mkdir(j("bblfsh", "gopkg", "in", "bblfsh", "sdk", SDK_MAJOR, "protocol")) - mkdir(j("bblfsh", "gopkg", "in", "bblfsh", "sdk", SDK_MAJOR, "uast")) +def create_dirs(sdk_major): + mkdir(j("proto", "gopkg.in", "bblfsh", "sdk.%s" % sdk_major, "protocol")) + mkdir(j("proto", "gopkg.in", "bblfsh", "sdk.%s" % sdk_major, "uast")) + mkdir(j("bblfsh", "gopkg", "in", "bblfsh", "sdk", sdk_major, "protocol")) + mkdir(j("bblfsh", "gopkg", "in", "bblfsh", "sdk", sdk_major, "uast")) mkdir(j("bblfsh", "github", "com", "gogo", "protobuf", "gogoproto")) -def create_inits(): +def create_inits(sdk_major): init_files = [ j("bblfsh", "github", "__init__.py"), j("bblfsh", "github", "com", "__init__.py"), @@ -120,9 +121,9 @@ def create_inits(): j("bblfsh", "gopkg", "in", "__init__.py"), j("bblfsh", "gopkg", "in", "bblfsh", "__init__.py"), j("bblfsh", "gopkg", "in", "bblfsh", "sdk", "__init__.py"), - j("bblfsh", "gopkg", "in", "bblfsh", "sdk", SDK_MAJOR, "__init__.py"), - j("bblfsh", "gopkg", "in", "bblfsh", "sdk", SDK_MAJOR, "uast", "__init__.py"), - j("bblfsh", "gopkg", "in", "bblfsh", "sdk", SDK_MAJOR, "protocol", "__init__.py"), + j("bblfsh", "gopkg", "in", "bblfsh", "sdk", sdk_major, "__init__.py"), + j("bblfsh", "gopkg", "in", "bblfsh", "sdk", sdk_major, "uast", "__init__.py"), + j("bblfsh", "gopkg", "in", "bblfsh", "sdk", sdk_major, "protocol", "__init__.py"), ] for f in init_files: @@ -133,20 +134,44 @@ def get_libuast(): if not GET_LIBUAST: return - untar_url( - "https://github.com/bblfsh/libuast/archive/{LIBUAST_VERSION}/{LIBUAST_VERSION}.tar.gz" - .format(**FORMAT_ARGS)) - mv("libuast-" + LIBUAST_VERSION.replace("v", ""), "libuast") - cpr(j("libuast", "src"), j("bblfsh", "libuast")) - rimraf("libuast") + gopath = os.environ.get("GOPATH") + if not gopath: + gopath = subprocess.check_output( + ['go', 'env', 'GOPATH']).decode("utf-8").strip() + if not gopath: + log.error("GOPATH must be set") + sys.exit(1) + + py_dir = os.getcwd() + local_libuast = j(py_dir, "bblfsh", "libuast") + mkdir(local_libuast) + + # Retrieve libuast + untar_url("https://github.com/bblfsh/libuast/releases/download/%s/libuast-%s.tar.gz" % (LIBUAST_VERSION, LIBUAST_ARCH)) + mv(LIBUAST_ARCH, local_libuast) + + +def proto_download_v1(): + url = "https://github.com/bblfsh/sdk/archive/%s.tar.gz" % SDK_V1_VERSION + untar_url(url) + sdkdir = "sdk-" + SDK_V1_VERSION[1:] + destdir = j("proto", "gopkg.in", "bblfsh", "sdk.{SDK_V1_MAJOR}") + cp(j(sdkdir, "protocol", "generated.proto"), + j(destdir, "protocol", "generated.proto")) + cp(j(sdkdir, "uast", "generated.proto"), + j(destdir, "uast", "generated.proto")) + rimraf(sdkdir) -def proto_download(): - untar_url("https://github.com/bblfsh/sdk/archive/%s.tar.gz" % SDK_VERSION) - sdkdir = "sdk-" + SDK_VERSION[1:] - destdir = j("proto", "gopkg.in", "bblfsh", "sdk.{SDK_MAJOR}") - cp(j(sdkdir, "protocol", "generated.proto"), j(destdir, "protocol", "generated.proto")) - cp(j(sdkdir, "uast", "generated.proto"), j(destdir, "uast", "generated.proto")) +def proto_download_v2(): + untar_url("https://github.com/bblfsh/sdk/archive/%s.tar.gz" + % SDK_V2_VERSION) + sdkdir = "sdk-" + SDK_V2_VERSION[1:] + destdir = j("proto", "gopkg.in", "bblfsh", "sdk.{SDK_V2_MAJOR}") + cp(j(sdkdir, "protocol", "driver.proto"), + j(destdir, "protocol", "generated.proto")) + cp(j(sdkdir, "uast", "role", "generated.proto"), + j(destdir, "uast", "generated.proto")) rimraf(sdkdir) @@ -177,23 +202,33 @@ def patch(file, *patchers): def protoc(proto_file, grpc=False): main_args = [protoc_module.__file__, "--python_out=bblfsh"] target_dir = j("bblfsh", *os.path.dirname(proto_file).split(".")) + if grpc: # using "." creates "gopkg.in" instead of "gopkg/in" directories main_args += ["--grpc_python_out=" + target_dir] + main_args += ["-Iproto", sysinclude, j("proto", proto_file)] - log.info("%s -m grpc.tools.protoc " + " ".join(main_args[1:]), sys.executable) + log.info("%s -m grpc.tools.protoc " + + " ".join(main_args[1:]), sys.executable) protoc_module.main(main_args) + if grpc: # we need to move the file back to grpc_out grpc_garbage_dir = None target = j(target_dir, "generated_pb2_grpc.py") + for root, dirnames, filenames in os.walk(target_dir): for filename in filenames: - if filename == "generated_pb2_grpc.py" and grpc_garbage_dir is not None: + + if filename == "generated_pb2_grpc.py" and\ + grpc_garbage_dir is not None: mv(j(root, filename), target) + if os.path.samefile(root, target_dir): grpc_garbage_dir = j(root, dirnames[0]) - rimraf(grpc_garbage_dir) + + if grpc_garbage_dir: + rimraf(grpc_garbage_dir) # grpc ignores "in" and we need to patch the import path def grpc_replacer(match): @@ -215,20 +250,31 @@ def importlib_import_replacer(match): (from_import_re, from_import_replacer), (importlib_import_re, importlib_import_replacer)) - protoc(j("gopkg.in", "bblfsh", "sdk." + SDK_MAJOR, "protocol", "generated.proto"), True) protoc(j("github.com", "gogo", "protobuf", "gogoproto", "gogo.proto")) - protoc(j("gopkg.in", "bblfsh", "sdk." + SDK_MAJOR, "uast", "generated.proto")) + + protoc(j("gopkg.in", "bblfsh", "sdk." + SDK_V1_MAJOR, "protocol", "generated.proto"), True) + protoc(j("gopkg.in", "bblfsh", "sdk." + SDK_V1_MAJOR, "uast", "generated.proto")) + + protoc(j("gopkg.in", "bblfsh", "sdk." + SDK_V2_MAJOR, "uast", "generated.proto")) + protoc(j("gopkg.in", "bblfsh", "sdk." + SDK_V2_MAJOR, "protocol", "generated.proto"), True) def do_get_deps(): get_libuast() - create_dirs() - create_inits() - proto_download() + + create_dirs(SDK_V1_MAJOR) + create_dirs(SDK_V2_MAJOR) + + create_inits(SDK_V1_MAJOR) + create_inits(SDK_V2_MAJOR) + + proto_download_v1() + proto_download_v2() proto_compile() def clean(): + rimraf("build") rimraf("gopkg.in") rimraf(j("bblfsh", "github")) rimraf(j("bblfsh", "gopkg")) @@ -237,9 +283,12 @@ def clean(): def main(): - # The --global-uast flag allows to install the python driver using the installed uast library + # The --global-uast flag allows to install the python driver + # using the installed uast library if "--log" in sys.argv: logging.basicConfig(level=logging.INFO) + else: + logging.basicConfig(level=logging.ERROR) if "--getdeps" in sys.argv: do_get_deps() @@ -249,16 +298,27 @@ def main(): clean() sys.exit() + libraries = [] + static_lib_dir = j("bblfsh", "libuast") + static_libraries = ["{}/libuast".format(static_lib_dir)] + + if sys.platform == 'win32': + libraries.extend(static_libraries) + libraries.extend(["legacy_stdio_definitions", "winmm", "ws2_32"]) + extra_objects = [] + else: # POSIX + extra_objects = ['{}.a'.format(l) for l in static_libraries] + libuast_module = Extension( "bblfsh.pyuast", libraries=libraries, - library_dirs=["/usr/lib", "/usr/local/lib"], extra_compile_args=["-std=c++11"], - include_dirs=[j("bblfsh", "libuast"), "/usr/local/include", "/usr/local/include/libxml2", - "/usr/include", "/usr/include/libxml2"], sources=sources) + extra_objects=extra_objects, + include_dirs=[j("bblfsh", "libuast")], + sources=sources) setup( - cmdclass = { + cmdclass={ "build_ext": CustomBuildExt, }, name="bblfsh", @@ -272,7 +332,8 @@ def main(): packages=find_packages(), exclude=["bblfsh/test.py"], keywords=["babelfish", "uast"], - install_requires=["grpcio>=1.13.0,<2.0", "grpcio-tools>=1.13.0,<2.0", "docker", "protobuf>=3.4.0"], + install_requires=["grpcio>=1.13.0", "grpcio-tools>=1.13.0", + "docker", "protobuf>=3.4.0"], package_data={"": ["LICENSE", "README.md"]}, ext_modules=[libuast_module], classifiers=[ @@ -286,7 +347,8 @@ def main(): "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Topic :: Software Development :: Libraries" - ] + ], + zip_safe=False, )