From 9b5b502d4d26642a63fbf5116791c3327321de2e Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Thu, 4 Oct 2018 03:10:17 +0300 Subject: [PATCH 01/48] use the new libuast; rewrite bindings using cpp layer of libuast Signed-off-by: Denys Smirnov --- README.md | 3 +- bblfsh/memtracker.cc | 22 +- bblfsh/memtracker.h | 12 - bblfsh/pyuast.cc | 1099 +++++++++++++++++++++++++++++------------- setup.py | 18 +- 5 files changed, 774 insertions(+), 380 deletions(-) diff --git a/README.md b/README.md index 6004cf7..3f58e06 100644 --- a/README.md +++ b/README.md @@ -23,11 +23,10 @@ python setup.py install ### Dependencies -You need to install `libxml2` and its header files. You also will need a `curl` cli tool to dowload `libuast`, and a `g++` for building [libtuast Python bindings](https://github.com/bblfsh/client-python/blob/0037d762563ab49b3daac8a7577f7103a5628fc6/setup.py#L17). +You also will need a `curl` cli tool to dowload `libuast`, and a `g++` for building [libtuast Python bindings](https://github.com/bblfsh/client-python/blob/0037d762563ab49b3daac8a7577f7103a5628fc6/setup.py#L17). The command for Debian and derived distributions would be: ```bash -sudo apt install libxml2-dev sudo apt install curl sudo apt install build-essential ``` diff --git a/bblfsh/memtracker.cc b/bblfsh/memtracker.cc index d7322d4..d9626d5 100644 --- a/bblfsh/memtracker.cc +++ b/bblfsh/memtracker.cc @@ -1,35 +1,15 @@ #include "memtracker.h" -UastIterator* MemTracker::CurrentIterator() { return currentIter_; } -void MemTracker::ClearCurrentIterator() { currentIter_ = nullptr; } -void MemTracker::EnterFilter() { inFilter_ = true; } -void MemTracker::ExitFilter() { inFilter_ = false; } -bool MemTracker::CurrentIteratorSet() { return currentIter_ != nullptr; } -void MemTracker::SetCurrentIterator(UastIterator *iter) { currentIter_ = iter; } - void MemTracker::TrackItem(PyObject *o) { - if (inFilter_) { - filterItemAllocs_.push_back(o); - } else { - iterItemAllocs_[currentIter_].push_back(o); - } + filterItemAllocs_.push_back(o); } void MemTracker::DisposeMem() { - if (inFilter_) { for (auto &i : filterItemAllocs_) { Py_CLEAR(i); } filterItemAllocs_.clear(); filterItemAllocs_.shrink_to_fit(); - } else { - for (auto &i : iterItemAllocs_[currentIter_]) { - Py_CLEAR(i); - } - iterItemAllocs_[currentIter_].clear(); - iterItemAllocs_.erase(currentIter_); - ClearCurrentIterator(); - } } diff --git a/bblfsh/memtracker.h b/bblfsh/memtracker.h index a8994e1..7dd8d6e 100644 --- a/bblfsh/memtracker.h +++ b/bblfsh/memtracker.h @@ -1,25 +1,13 @@ #include #include -#include "uast.h" - #include class MemTracker { private: - UastIterator *currentIter_ = nullptr; - bool inFilter_ = false; - - std::unordered_map> iterItemAllocs_; std::vector filterItemAllocs_; public: - UastIterator *CurrentIterator(); - void SetCurrentIterator(UastIterator *iter); - bool CurrentIteratorSet(); - void ClearCurrentIterator(); - void EnterFilter(); - void ExitFilter(); void TrackItem(PyObject *ref); void DisposeMem(); }; diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index 83072c1..be17790 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -1,272 +1,620 @@ #include #include #include +#include #include +#include -#include "uast.h" +#include "libuast.hpp" #include "memtracker.h" - +#define DEBUG_HERE PyErr_SetString(PyExc_NotImplementedError, "HERE"); return NULL; // Used to store references to the Pyobjects instanced in String() and // ItemAt() methods. Those can't be DECREF'ed to 0 because libuast uses them // so we pass ownership to these lists and free them at the end of filter() -MemTracker memTracker; -// WARNING: calls to Attribute MUST Py_DECREF the returned value once -// used (or add it to the memtracker) -static PyObject *Attribute(const void *node, const char *prop) { - PyObject *n = (PyObject *)node; - return PyObject_GetAttrString(n, prop); +PyObject* asPyBuffer(uast::Buffer buf) { + // return PyByteArray_FromStringAndSize((const char*)(data), size); + return PyMemoryView_FromMemory((char*)(buf.ptr), buf.size, PyBUF_READ); } -// WARNING: calls to AttributeValue MUST Py_DECREF the returned value once -// used (or add it to the memtracker) -static PyObject *AttributeValue(const void *node, const char *prop) { - PyObject *a = Attribute(node, prop); - return a && a != Py_None ? a : NULL; +/* +static bool checkError(const Uast* ctx) { + char *error = LastError((Uast*)ctx); + if (!error) { + return true; + } + PyErr_SetString(PyExc_RuntimeError, error); + free(error); + return false; } +*/ -static bool HasAttribute(const void *node, const char *prop) { - PyObject *o = AttributeValue(node, prop); - if (o == NULL) { - return false; - } +// ========================================== +// External UAST Node (managed by libuast) +// ========================================== - Py_DECREF(o); - return true; -} +class ContextExt; -static const char *String(const void *node, const char *prop) { - const char *retval = NULL; - PyObject *o = Attribute(node, prop); - if (o != NULL) { - retval = PyUnicode_AsUTF8(o); - memTracker.TrackItem(o); - } - return retval; -} +typedef struct { + PyObject_HEAD + ContextExt *ctx; + NodeHandle handle; +} NodeExt; -static size_t Size(const void *node, const char *prop) { - size_t retval = 0; - PyObject *o = Attribute(node, prop); - if (o != NULL) { - retval = PySequence_Size(o); - Py_DECREF(o); - } +static PyObject *NodeExt_load(NodeExt *self, PyObject *Py_UNUSED(ignored)); - return retval; +static PyMethodDef NodeExt_methods[] = { + {"load", (PyCFunction) NodeExt_load, METH_NOARGS, + "Load external node to Python" + }, + {NULL} // Sentinel +}; + +extern "C" +{ + static PyTypeObject NodeExtType = { + PyVarObject_HEAD_INIT(NULL, 0) + "pyuast.NodeExt", // tp_name + sizeof(NodeExt), // tp_basicsize + 0, // tp_itemsize + 0, // tp_dealloc + 0, // tp_print + 0, // tp_getattr + 0, // tp_setattr + 0, // tp_reserved + 0, // tp_repr + 0, // tp_as_number + 0, // tp_as_sequence + 0, // tp_as_mapping + 0, // tp_hash + 0, // tp_call + 0, // tp_str + 0, // tp_getattro + 0, // tp_setattro + 0, // tp_as_buffer + Py_TPFLAGS_DEFAULT, // tp_flags + "External UAST node", // tp_doc + 0, // tp_traverse + 0, // tp_clear + 0, // tp_richcompare + 0, // tp_weaklistoffset + 0, // tp_iter: __iter()__ method + 0, // tp_iternext: next() method + NodeExt_methods, // tp_methods + 0, // tp_members + 0, // tp_getset + 0, // tp_base + 0, // tp_dict + 0, // tp_descr_get + 0, // tp_descr_set + 0, // tp_dictoffset + 0, // tp_init + PyType_GenericAlloc, // tp_alloc + 0, // tp_new + }; } -static PyObject *ItemAt(PyObject *object, int index) { - PyObject *retval = NULL; - PyObject *seq = PySequence_Fast(object, "expected a sequence"); - if (seq != NULL) { - retval = PyList_GET_ITEM(seq, index); - memTracker.TrackItem(seq); - } +// ========================================== +// External UAST iterator +// ========================================== - return retval; -} +typedef struct { + PyObject_HEAD + ContextExt *ctx; + uast::Iterator *iter; + bool freeCtx; +} PyUastIterExt; -static const char *InternalType(const void *node) { - return String(node, "internal_type"); -} +static void PyUastIterExt_dealloc(PyObject *self); -static const char *Token(const void *node) { - return String(node, "token"); +static PyObject *PyUastIterExt_iter(PyObject *self) { + Py_INCREF(self); + return self; } -static size_t ChildrenSize(const void *node) { - return Size(node, "children"); -} +static PyObject *PyUastIterExt_toPy(ContextExt *ctx, NodeHandle node); + +static PyObject *PyUastIterExt_next(PyObject *self) { + auto it = (PyUastIterExt *)self; -static void *ChildAt(const void *node, int index) { - PyObject *children = AttributeValue(node, "children"); - void *retval = nullptr; - if (children) { - retval = ItemAt(children, index); - Py_DECREF(children); + // TODO: check errors + if (!it->iter->next()) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; } - return retval; -} + NodeHandle node = it->iter->node(); + if (node == 0) Py_RETURN_NONE; -static size_t RolesSize(const void *node) { - return Size(node, "roles"); + return PyUastIterExt_toPy(it->ctx, node); } -static uint16_t RoleAt(const void *node, int index) { - uint16_t retval = 0; - PyObject *roles = AttributeValue(node, "roles"); - if (roles) { - retval = (uint16_t)PyLong_AsUnsignedLong(ItemAt(roles, index)); - Py_DECREF(roles); - } - return retval; +extern "C" +{ + static PyTypeObject PyUastIterExtType = { + PyVarObject_HEAD_INIT(NULL, 0) + "pyuast.IteratorExt", // tp_name + sizeof(PyUastIterExt), // tp_basicsize + 0, // tp_itemsize + PyUastIterExt_dealloc, // tp_dealloc + 0, // tp_print + 0, // tp_getattr + 0, // tp_setattr + 0, // tp_reserved + 0, // tp_repr + 0, // tp_as_number + 0, // tp_as_sequence + 0, // tp_as_mapping + 0, // tp_hash + 0, // tp_call + 0, // tp_str + 0, // tp_getattro + 0, // tp_setattro + 0, // tp_as_buffer + Py_TPFLAGS_DEFAULT, // tp_flags + "External UastIterator object", // tp_doc + 0, // tp_traverse + 0, // tp_clear + 0, // tp_richcompare + 0, // tp_weaklistoffset + PyUastIterExt_iter, // tp_iter: __iter()__ method + (iternextfunc)PyUastIterExt_next, // tp_iternext: next() method + 0, // tp_methods + 0, // tp_members + 0, // tp_getset + 0, // tp_base + 0, // tp_dict + 0, // tp_descr_get + 0, // tp_descr_set + 0, // tp_dictoffset + 0, // tp_init + PyType_GenericAlloc, // tp_alloc + 0, // tp_new + }; } -static size_t PropertiesSize(const void *node) { - size_t retval = 0; - PyObject *properties = AttributeValue(node, "properties"); - if (properties) { - retval = PyMapping_Size(properties); - Py_DECREF(properties); - } - return retval; +// ========================================== +// External UAST Context (managed by libuast) +// ========================================== + +class ContextExt { +private: + uast::Context *ctx; + + PyObject* toPy(NodeHandle node) { + if (node == 0) Py_RETURN_NONE; + + NodeExt *pyObj = PyObject_New(NodeExt, &NodeExtType); + if (!pyObj) return NULL; + + pyObj->ctx = this; + pyObj->handle = node; + return (PyObject*)pyObj; + } + NodeHandle toHandle(PyObject* obj) { + if (!obj || obj == Py_None) return 0; + + if (!PyObject_TypeCheck(obj, &NodeExtType)) { + const char* err = "unknown node type"; + PyErr_SetString(PyExc_NotImplementedError, err); + ctx->SetError(err); + return 0; + } + + auto node = (NodeExt*)obj; + return node->handle; + } + + PyObject* newIter(uast::Iterator *it, bool freeCtx){ + PyUastIterExt *pyIt = PyObject_New(PyUastIterExt, &PyUastIterExtType); + if (!pyIt) + return NULL; + + if (!PyObject_Init((PyObject *)pyIt, &PyUastIterExtType)) { + Py_DECREF(pyIt); + return NULL; + } + pyIt->iter = it; + pyIt->ctx = this; + pyIt->freeCtx = freeCtx; + return (PyObject*)pyIt; + } +public: + friend class Context; + + ContextExt(uast::Context *c) : ctx(c) { + } + ~ContextExt(){ + delete(ctx); + } + + PyObject* lookup(NodeHandle node) { + return toPy(node); + } + + PyObject* RootNode(){ + NodeHandle root = ctx->RootNode(); + return toPy(root); + } + + PyObject* Iterate(PyObject* node, TreeOrder order){ + NodeHandle h = toHandle(node); + auto iter = ctx->Iterate(h, order); + return newIter(iter, false); + } + + PyObject* Filter(PyObject* node, char* query){ + NodeHandle unode = toHandle(node); + if (unode == 0) { + unode = ctx->RootNode(); + } + + uast::Iterator *it = ctx->Filter(unode, query); + delete(query); + return newIter(it, false); + } + PyObject* Encode(PyObject *node, UastFormat format) { + uast::Buffer data = ctx->Encode(toHandle(node), format); + return asPyBuffer(data); + } +}; + +static PyObject *PyUastIterExt_toPy(ContextExt *ctx, NodeHandle node) { + return ctx->lookup(node); } -static const char *PropertyKeyAt(const void *node, int index) { - PyObject *properties = AttributeValue(node, "properties"); - if (!properties || !PyMapping_Check(properties)) { - return NULL; - } +static void PyUastIterExt_dealloc(PyObject *self) { + auto it = (PyUastIterExt *)self; + delete(it->iter); - const char *retval = NULL; - PyObject *keys = PyMapping_Keys(properties); - Py_DECREF(properties); - if (keys != NULL) { - retval = PyUnicode_AsUTF8(ItemAt(keys, index)); - Py_DECREF(keys); + if (it->freeCtx && it->ctx) { + delete(it->ctx); } - return retval; + it->freeCtx = false; + it->ctx = NULL; } -static const char *PropertyValueAt(const void *node, int index) { - PyObject *properties = AttributeValue(node, "properties"); - if (!properties) - return NULL; - - if (!PyMapping_Check(properties)) { - Py_DECREF(properties); - return NULL; - } +typedef struct { + PyObject_HEAD + ContextExt *p; +} PyContextExt; - const char *retval = NULL; - PyObject *values = PyMapping_Values(properties); - if (values != NULL) { - retval = PyUnicode_AsUTF8(ItemAt(values, index)); - Py_DECREF(values); - } - Py_DECREF(properties); - return retval; +static void PyContextExt_dealloc(PyObject *self) { + delete(((PyContextExt *)self)->p); } -static uint32_t PositionValue(const void* node, const char *prop, const char *field) { - PyObject *position = AttributeValue(node, prop); - if (!position) { - return 0; - } +static PyObject *PyContextExt_root(PyContextExt *self, PyObject *Py_UNUSED(ignored)) { + return self->p->RootNode(); +} - PyObject *offset = AttributeValue(position, field); - Py_DECREF(position); - uint32_t retval = 0; +static PyObject *PyContextExt_filter(PyContextExt *self, PyObject *args) { + PyObject *node = NULL; + char *query = NULL; + if (!PyArg_ParseTuple(args, "Os", &node, &query)) + return NULL; + return self->p->Filter(node, query); +} - if (offset) { - retval = (uint32_t)PyLong_AsUnsignedLong(offset); - Py_DECREF(offset); - } - return retval; +static PyObject *PyContextExt_encode(PyContextExt *self, PyObject *args) { + PyObject *node = NULL; + UastFormat format = UAST_BINARY; // TODO: make it a kwarg and enum + if (!PyArg_ParseTuple(args, "Oi", &node, &format)) + return NULL; + return self->p->Encode(node, format); } -///////////////////////////////////// -/////////// Node Interface ////////// -///////////////////////////////////// +static PyMethodDef PyContextExt_methods[] = { + {"root", (PyCFunction) PyContextExt_root, METH_NOARGS, + "Return the root node attached to this query context" + }, + {"filter", (PyCFunction) PyContextExt_filter, METH_VARARGS, + "Filter a provided UAST with XPath" + }, + {"encode", (PyCFunction) PyContextExt_encode, METH_VARARGS, + "Encodes a UAST into a buffer" + }, + {NULL} // Sentinel +}; extern "C" { - static bool HasStartOffset(const void *node) { - return HasAttribute(node, "start_position"); - } - - static uint32_t StartOffset(const void *node) { - return PositionValue(node, "start_position", "offset"); - } - - static bool HasStartLine(const void *node) { - return HasAttribute(node, "start_position"); - } - - static uint32_t StartLine(const void *node) { - return PositionValue(node, "start_position", "line"); - } - - static bool HasStartCol(const void *node) { - return HasAttribute(node, "start_position"); - } - - static uint32_t StartCol(const void *node) { - return PositionValue(node, "start_position", "col"); - } - - static bool HasEndOffset(const void *node) { - return HasAttribute(node, "end_position"); - } - - static uint32_t EndOffset(const void *node) { - return PositionValue(node, "end_position", "offset"); - } - - static bool HasEndLine(const void *node) { - return HasAttribute(node, "end_position"); - } + static PyTypeObject PyContextExtType = { + PyVarObject_HEAD_INIT(NULL, 0) + "pyuast.ContextExt", // tp_name + sizeof(PyContextExt), // tp_basicsize + 0, // tp_itemsize + PyContextExt_dealloc, // tp_dealloc + 0, // tp_print + 0, // tp_getattr + 0, // tp_setattr + 0, // tp_reserved + 0, // tp_repr + 0, // tp_as_number + 0, // tp_as_sequence + 0, // tp_as_mapping + 0, // tp_hash + 0, // tp_call + 0, // tp_str + 0, // tp_getattro + 0, // tp_setattro + 0, // tp_as_buffer + Py_TPFLAGS_DEFAULT, // tp_flags + "Internal ContextExt object", // tp_doc + 0, // tp_traverse + 0, // tp_clear + 0, // tp_richcompare + 0, // tp_weaklistoffset + 0, // tp_iter: __iter()__ method + 0, // tp_iternext: next() method + PyContextExt_methods, // tp_methods + 0, // tp_members + 0, // tp_getset + 0, // tp_base + 0, // tp_dict + 0, // tp_descr_get + 0, // tp_descr_set + 0, // tp_dictoffset + 0, // tp_init + PyType_GenericAlloc, // tp_alloc + 0, // tp_new + }; +} - static uint32_t EndLine(const void *node) { - return PositionValue(node, "end_position", "line"); - } +// ================================================ +// Python UAST Node interface (called from libuast) +// ================================================ + +class Interface; + +class Node : public uast::Node { +private: + Interface* ctx; + PyObject* obj; + NodeKind kind; + + PyObject* keys; + + static void checkPyException() { + PyObject *type, *value, *traceback; + PyErr_Fetch(&type, &value, &traceback); + if (value == NULL || value == Py_None) { + return; + } + PyObject* str = PyObject_Str(value); + throw std::runtime_error(PyUnicode_AsUTF8(str)); + } + static NodeKind kindOf(PyObject* obj) { + if (!obj || obj == Py_None) { + return NODE_NULL; + } else if (PyUnicode_Check(obj)) { + return NODE_STRING; + } else if (PyLong_Check(obj)) { + return NODE_INT; + } else if (PyFloat_Check(obj)) { + return NODE_FLOAT; + } else if (PyBool_Check(obj)) { + return NODE_BOOL; + } else if (PyList_Check(obj)) { + return NODE_ARRAY; + } + return NODE_OBJECT; + } + Node* lookupOrCreate(PyObject* obj); +public: + friend class Interface; + friend class Context; + + Node(Interface* c, NodeKind k, PyObject* v) { + ctx = c; + obj = v; + kind = k; + } + Node(Interface* c, PyObject* v) { + Node(c, kindOf(v), v); + } + + PyObject* toPy(); + + NodeKind Kind() { + return kind; + } + const char* AsString() { + const char* v = PyUnicode_AsUTF8(obj); + return v; + } + int64_t AsInt() { + long long v = PyLong_AsLongLong(obj); + return (int64_t)(v); + } + uint64_t AsUint() { + unsigned long long v = PyLong_AsUnsignedLongLong(obj); + return (uint64_t)(v); + } + double AsFloat() { + double v = PyFloat_AsDouble(obj); + return (double)(v); + } + bool AsBool() { + return obj == Py_True; + } + + size_t Size() { + size_t sz = 0; + if (PyList_Check(obj)) { + sz = (size_t)(PyList_Size(obj)); + } else { + sz = (size_t)(PyObject_Size(obj)); + if (int64_t(sz) == -1) { + checkPyException(); + return 0; // error + } + } + assert(int64_t(sz) >= 0); + return sz; + } + + const char* KeyAt(size_t i) { + if (obj == Py_None) { + return NULL; + } + if (!keys) keys = PyDict_Keys(obj); + PyObject* key = PyList_GetItem(keys, i); + return PyUnicode_AsUTF8(key); + } + Node* ValueAt(size_t i) { + if (obj == Py_None) { + return 0; + } + if (PyList_Check(obj)) { + PyObject* v = PyList_GetItem(obj, i); + return lookupOrCreate(v); + } + if (!keys) keys = PyDict_Keys(obj); + PyObject* key = PyList_GetItem(keys, i); + PyObject* val = PyDict_GetItem(obj, key); + Py_DECREF(key); + + return lookupOrCreate(val); + } + + void SetValue(size_t i, Node* val) { + PyObject* v = Py_None; + if (val && val->obj) v = val->obj; + // TODO: increase ref + PyList_SetItem(obj, i, v); + } + void SetKeyValue(const char* k, Node* val) { + PyObject* v = Py_None; + if (val && val->obj) v = val->obj; + // TODO: increase ref + PyDict_SetItemString(obj, k, v); + } +}; - static bool HasEndCol(const void *node) { - return HasAttribute(node, "end_position"); - } +// =========================================== +// Python UAST interface (called from libuast) +// =========================================== + +class Context; + +class Interface : public uast::NodeCreator { +private: + // TODO: track objects + std::map obj2node; + + static PyObject* newBool(bool v) { + if (v) { + Py_RETURN_TRUE; + } + Py_RETURN_FALSE; + } + Node* lookupOrCreate(PyObject* obj) { + Node* node = obj2node[obj]; + if (node) return node; + + node = new Node(this, obj); + obj2node[obj] = node; + return node; + } + Node* create(NodeKind kind, PyObject* obj) { + Node* node = new Node(this, kind, obj); + obj2node[obj] = node; + return node; + } +public: + friend class Node; + friend class Context; + + Interface(){ + } + ~Interface(){ + // TODO: dealloc for Nodes and DECREF for PyObjects + } + Node* toNode(PyObject* obj){ + return lookupOrCreate(obj); + } + PyObject* toPy(Node* node) { + if (node == NULL) Py_RETURN_NONE; + return node->obj; // TODO incref? + } + Node* NewObject(size_t size) { + PyObject* m = PyDict_New(); + return create(NODE_OBJECT, m); + } + Node* NewArray(size_t size) { + PyObject* arr = PyList_New(size); + return create(NODE_ARRAY, arr); + } + Node* NewString(const char* v) { + PyObject* obj = PyUnicode_FromString(v); + return create(NODE_STRING, obj); + } + Node* NewInt(int64_t v) { + PyObject* obj = PyLong_FromLongLong(v); + return create(NODE_INT, obj); + } + Node* NewUint(uint64_t v) { + PyObject* obj = PyLong_FromUnsignedLongLong(v); + return create(NODE_UINT, obj); + } + Node* NewFloat(double v) { + PyObject* obj = PyFloat_FromDouble(v); + return create(NODE_FLOAT, obj); + } + Node* NewBool(bool v) { + PyObject* obj = newBool(v); + return create(NODE_BOOL, obj); + } +}; - static uint32_t EndCol(const void *node) { - return PositionValue(node, "end_position", "col"); - } +PyObject* Node::toPy() { + return ctx->toPy(this); } -static Uast *ctx; +Node* Node::lookupOrCreate(PyObject* obj) { + return ctx->lookupOrCreate(obj); +} + +// ========================================== +// Python UAST iterator +// ========================================== -///////////////////////////////////// -/////////// PYTHON API ////////////// -///////////////////////////////////// typedef struct { PyObject_HEAD - UastIterator *iter; + Context *ctx; + uast::Iterator *iter; + bool freeCtx; } PyUastIter; -// iterator.__iter__() -static PyObject *PyUastIter_iter(PyObject *self) -{ +static void PyUastIter_dealloc(PyObject *self); + +static PyObject *PyUastIter_iter(PyObject *self) { Py_INCREF(self); return self; } -// iterator.__next__() -static PyObject *PyUastIter_next(PyObject *self) -{ - - PyUastIter *it = (PyUastIter *)self; +static PyObject *PyUastIter_next(PyObject *self) { + auto it = (PyUastIter *)self; - void *node = UastIteratorNext(it->iter); - if (!node) { + // TODO: check errors + if (!it->iter->next()) { PyErr_SetNone(PyExc_StopIteration); return NULL; } - Py_INCREF(node); - memTracker.SetCurrentIterator(it->iter); - return (PyObject *)node; -} + Node* node = it->iter->node(); + if (!node) Py_RETURN_NONE; -// Forward declaration for the Type ref -static PyObject *PyUastIter_new(PyObject *self, PyObject *args); -static void PyUastIter_dealloc(PyObject *self); + return node->toPy(); +} extern "C" { static PyTypeObject PyUastIterType = { PyVarObject_HEAD_INIT(NULL, 0) - "pyuast.UastIterator", // tp_name + "pyuast.Iterator", // tp_name sizeof(PyUastIter), // tp_basicsize 0, // tp_itemsize PyUastIter_dealloc, // tp_dealloc @@ -306,159 +654,258 @@ extern "C" }; } -static PyObject *PyUastIter_new(PyObject *self, PyObject *args) -{ - void *node = NULL; - uint8_t order; - - if (!PyArg_ParseTuple(args, "OB", &node, &order)) - return NULL; +// ========================================== +// Python UAST Context object +// ========================================== + +class Context { +private: + Interface *iface; + uast::PtrInterface *impl; + uast::Context *ctx; + + PyObject* toPy(Node* node) { + if (node == NULL) Py_RETURN_NONE; + return iface->toPy(node); + } + Node* toNode(PyObject* obj) { + if (!obj || obj == Py_None) return NULL; + + return iface->lookupOrCreate(obj); + } + PyObject* newIter(uast::Iterator *it, bool freeCtx){ + PyUastIter *pyIt = PyObject_New(PyUastIter, &PyUastIterType); + if (!pyIt) + return NULL; + + if (!PyObject_Init((PyObject *)pyIt, &PyUastIterType)) { + Py_DECREF(pyIt); + return NULL; + } + pyIt->iter = it; + pyIt->ctx = this; + pyIt->freeCtx = freeCtx; + return (PyObject*)pyIt; + } +public: + Context(){ + // create a class that makes and tracks UAST nodes + iface = new Interface(); + // create an implementation that will handle libuast calls + auto impl = new uast::PtrInterface(iface); + // create a new UAST context based on this implementation + ctx = impl->NewContext(); + } + ~Context(){ + delete(ctx); + delete(impl); + delete(iface); + } + + PyObject* RootNode(){ + Node* root = ctx->RootNode(); + return toPy(root); + } + + PyObject* Iterate(PyObject* node, TreeOrder order, bool freeCtx){ + Node* unode = toNode(node); + auto iter = ctx->Iterate(unode, order); + return newIter(iter, freeCtx); + } + + PyObject* Filter(PyObject* node, char* query){ + Node* unode = toNode(node); + if (unode == NULL) { + unode = ctx->RootNode(); + } + + auto it = ctx->Filter(unode, query); + delete(query); + return newIter(it, false); + } + PyObject* Encode(PyObject *node, UastFormat format) { + uast::Buffer data = ctx->Encode(toNode(node), format); + return asPyBuffer(data); + } + PyObject* LoadFrom(NodeExt *src) { + auto sctx = src->ctx->ctx; + NodeHandle snode = src->handle; + + Node* node = uast::Load(sctx, snode, ctx); + return toPy(node); + } +}; - PyUastIter *pyIt = PyObject_New(PyUastIter, &PyUastIterType); - if (!pyIt) - return NULL; +static PyObject *NodeExt_load(NodeExt *self, PyObject *Py_UNUSED(ignored)) { + auto ctx = new Context(); + PyObject* node = ctx->LoadFrom(self); + delete(ctx); + return node; +} - if (!PyObject_Init((PyObject *)pyIt, &PyUastIterType)) { - Py_DECREF(pyIt); - return NULL; - } +static void PyUastIter_dealloc(PyObject *self) { + auto it = (PyUastIter *)self; + delete(it->iter); - pyIt->iter = UastIteratorNew(ctx, node, (TreeOrder)order); - if (!pyIt->iter) { - Py_DECREF(pyIt); - return NULL; + if (it->freeCtx && it->ctx) { + delete(it->ctx); } - - memTracker.ClearCurrentIterator(); - memTracker.SetCurrentIterator(pyIt->iter); - return (PyObject*)pyIt; + it->freeCtx = false; + it->ctx = NULL; } +typedef struct { + PyObject_HEAD + Context *p; +} PyUast; -static void PyUastIter_dealloc(PyObject *self) -{ - memTracker.DisposeMem(); - UastIteratorFree(((PyUastIter *)self)->iter); +static void PyUast_dealloc(PyObject *self) { + delete(((PyUast *)self)->p); } -static bool initFilter(PyObject *args, PyObject **obj, const char **query) -{ - if (!PyArg_ParseTuple(args, "Os", obj, query)) { - return false; - } - - memTracker.EnterFilter(); - return true; +static PyObject *PyUast_root(PyUast *self, PyObject *Py_UNUSED(ignored)) { + return self->p->RootNode(); } -static void cleanupFilter(void) -{ - - memTracker.DisposeMem(); - memTracker.ExitFilter(); +static PyObject *PyUast_filter(PyContextExt *self, PyObject *args) { + PyObject *node = NULL; + char *query = NULL; + if (!PyArg_ParseTuple(args, "Os", &node, &query)) + return NULL; + return self->p->Filter(node, query); } -static void filterError(void) -{ - char *error = LastError(); - PyErr_SetString(PyExc_RuntimeError, error); - free(error); - cleanupFilter(); +static PyObject *PyUast_encode(PyContextExt *self, PyObject *args) { + PyObject *node = NULL; + UastFormat format = UAST_BINARY; // TODO: make it a kwarg and enum + if (!PyArg_ParseTuple(args, "Oi", &node, &format)) + return NULL; + return self->p->Encode(node, format); } -static PyObject *PyFilter(PyObject *self, PyObject *args) -{ - PyObject *obj = NULL; - const char *query = NULL; - - if (!initFilter(args, &obj, &query)) { - return NULL; - } - - Nodes *nodes = UastFilter(ctx, obj, query); - if (!nodes) { - filterError(); - cleanupFilter(); - return NULL; - } - - size_t len = NodesSize(nodes); - PyObject *list = PyList_New(len); - - for (size_t i = 0; i < len; i++) { - PyObject *node = (PyObject *)NodeAt(nodes, i); - Py_INCREF(node); - PyList_SET_ITEM(list, i, node); - } - NodesFree(nodes); - PyObject *iter = PySeqIter_New(list); - Py_DECREF(list); +static PyMethodDef PyUast_methods[] = { + {"root", (PyCFunction) PyUast_root, METH_NOARGS, + "Return the root node attached to this query context" + }, + {"filter", (PyCFunction) PyUast_filter, METH_VARARGS, + "Filter a provided UAST with XPath" + }, + {"encode", (PyCFunction) PyUast_encode, METH_VARARGS, + "Encodes a UAST into a buffer" + }, + {NULL} // Sentinel +}; - cleanupFilter(); - return iter; +extern "C" +{ + static PyTypeObject PyUastType = { + PyVarObject_HEAD_INIT(NULL, 0) + "pyuast.Context", // tp_name + sizeof(PyUast), // tp_basicsize + 0, // tp_itemsize + PyUast_dealloc, // tp_dealloc + 0, // tp_print + 0, // tp_getattr + 0, // tp_setattr + 0, // tp_reserved + 0, // tp_repr + 0, // tp_as_number + 0, // tp_as_sequence + 0, // tp_as_mapping + 0, // tp_hash + 0, // tp_call + 0, // tp_str + 0, // tp_getattro + 0, // tp_setattro + 0, // tp_as_buffer + Py_TPFLAGS_DEFAULT, // tp_flags + "Internal Context object", // tp_doc + 0, // tp_traverse + 0, // tp_clear + 0, // tp_richcompare + 0, // tp_weaklistoffset + 0, // tp_iter: __iter()__ method + 0, // tp_iternext: next() method + PyUast_methods, // tp_methods + 0, // tp_members + 0, // tp_getset + 0, // tp_base + 0, // tp_dict + 0, // tp_descr_get + 0, // tp_descr_set + 0, // tp_dictoffset + 0, // tp_init + PyType_GenericAlloc, // tp_alloc + 0, // tp_new + }; } -static PyObject *PyFilterBool(PyObject *self, PyObject *args) -{ +// ========================================== +// Global functions +// ========================================== + +static PyObject *PyUastIter_new(PyObject *self, PyObject *args) { PyObject *obj = NULL; - const char *query = NULL; + uint8_t order; - if (!initFilter(args, &obj, &query)) + if (!PyArg_ParseTuple(args, "OB", &obj, &order)) return NULL; - bool ok; - bool res = UastFilterBool(ctx, obj, query, &ok); - if (!ok) { - filterError(); - return NULL; + // the node can either be external or any other Python object + if (PyObject_TypeCheck(obj, &NodeExtType)) { + // external node -> external iterator + auto node = (NodeExt*)obj; + return node->ctx->Iterate(obj, (TreeOrder)order); } - - cleanupFilter(); - return res ? Py_True : Py_False; + // Python object -> create a new context and attach it to an iterator + Context* ctx = new Context(); + return ctx->Iterate(obj, (TreeOrder)order, true); } -static PyObject *PyFilterNumber(PyObject *self, PyObject *args) -{ - PyObject *obj = NULL; - const char *query = NULL; +static PyObject *PyUastDecode(PyObject *self, PyObject *args) { + PyObject *obj = NULL; + UastFormat format = UAST_BINARY; // TODO: make it a kwarg - if (!initFilter(args, &obj, &query)) - return NULL; + if (!PyArg_ParseTuple(args, "Oi", &obj, &format)) + return NULL; - bool ok; - double res = UastFilterNumber(ctx, obj, query, &ok); - if (!ok) { - filterError(); - return NULL; - } + Py_buffer buf; - cleanupFilter(); - return PyFloat_FromDouble(res); -} + int res = PyObject_GetBuffer(obj, &buf, PyBUF_C_CONTIGUOUS); + if (res != 0) + return NULL; -static PyObject *PyFilterString(PyObject *self, PyObject *args) -{ - PyObject *obj = NULL; - const char *query = NULL; + uast::Buffer ubuf(buf.buf, (size_t)(buf.len)); - if (!initFilter(args, &obj, &query)) - return NULL; + uast::Context* ctx = uast::Decode(ubuf, format); + PyBuffer_Release(&buf); - const char *res = UastFilterString(ctx, obj, query); - if (res == NULL) { - filterError(); - return NULL; - } + PyContextExt *pyU = PyObject_New(PyContextExt, &PyContextExtType); + if (!pyU) { + delete(ctx); + return NULL; + } + pyU->p = new ContextExt(ctx); + return (PyObject*)pyU; +} - cleanupFilter(); - return PyUnicode_FromString(res); +static PyObject *PyUast_new(PyObject *self, PyObject *args) { + if (!PyArg_ParseTuple(args, "")) { + return NULL; + } + + PyUast *pyU = PyObject_New(PyUast, &PyUastType); + if (!pyU) { + return NULL; + } + pyU->p = new Context(); + return (PyObject*)pyU; } + static PyMethodDef extension_methods[] = { - {"filter", PyFilter, METH_VARARGS, "Filter nodes in the UAST using the given query"}, {"iterator", PyUastIter_new, METH_VARARGS, "Get an iterator over a node"}, - {"filter_bool", PyFilterBool, METH_VARARGS, "For queries returning boolean values"}, - {"filter_number", PyFilterNumber, METH_VARARGS, "For queries returning boolean values"}, - {"filter_string", PyFilterString, METH_VARARGS, "For queries returning boolean values"}, + {"decode", PyUastDecode, METH_VARARGS, "Decode UAST from a byte array"}, + {"uast", PyUast_new, METH_VARARGS, "Creates a new UAST context"}, {NULL, NULL, 0, NULL} }; @@ -477,29 +924,11 @@ static struct PyModuleDef module_def = { PyMODINIT_FUNC PyInit_pyuast(void) { - NodeIface iface; - iface.InternalType = InternalType; - iface.Token = Token; - iface.ChildrenSize = ChildrenSize; - iface.ChildAt = ChildAt; - iface.RolesSize = RolesSize; - iface.RoleAt = RoleAt; - iface.PropertiesSize = PropertiesSize; - iface.PropertyKeyAt = PropertyKeyAt; - iface.PropertyValueAt = PropertyValueAt; - iface.HasStartOffset = HasStartOffset; - iface.StartOffset = StartOffset; - iface.HasStartLine = HasStartLine; - iface.StartLine = StartLine; - iface.HasStartCol = HasStartCol; - iface.StartCol = StartCol; - iface.HasEndOffset = HasEndOffset; - iface.EndOffset = EndOffset; - iface.HasEndLine = HasEndLine; - iface.EndLine = EndLine; - iface.HasEndCol = HasEndCol; - iface.EndCol = EndCol; - - ctx = UastNew(iface); + if (PyType_Ready(&PyContextExtType) < 0) return NULL; + if (PyType_Ready(&NodeExtType) < 0) return NULL; + if (PyType_Ready(&PyUastIterExtType) < 0) return NULL; + + if (PyType_Ready(&PyUastType) < 0) return NULL; + if (PyType_Ready(&PyUastIterType) < 0) return NULL; return PyModule_Create(&module_def); } diff --git a/setup.py b/setup.py index 27ee98e..fb469a7 100644 --- a/setup.py +++ b/setup.py @@ -12,9 +12,9 @@ from setuptools import setup, find_packages, Extension from setuptools.command.build_ext import build_ext -VERSION = "2.12.1" -LIBUAST_VERSION = "v1.9.5" -SDK_VERSION = "v1.16.1" +VERSION = "3.0.0" +LIBUAST_VERSION = "v2.0.0-rc1" +SDK_VERSION = "v2.2.3" SDK_MAJOR = SDK_VERSION.split('.')[0] FORMAT_ARGS = globals() @@ -27,7 +27,7 @@ os.environ["CC"] = "g++" # yes, g++ - otherwise distutils will use gcc -std=c++11 and explode if os.getenv("CXX") is None: os.environ["CXX"] = "g++" -libraries = ['xml2'] +libraries = [] sources = ["bblfsh/pyuast.cc", "bblfsh/memtracker.cc"] log = logging.getLogger("setup.py") @@ -39,9 +39,6 @@ def run(self): if "--global-uast" in sys.argv: libraries.append("uast") - else: - sources.append("bblfsh/libuast/uast.cc") - sources.append("bblfsh/libuast/roles.c") get_libuast() build_ext.run(self) @@ -254,8 +251,8 @@ def main(): libraries=libraries, library_dirs=["/usr/lib", "/usr/local/lib"], extra_compile_args=["-std=c++11"], - include_dirs=[j("bblfsh", "libuast"), "/usr/local/include", "/usr/local/include/libxml2", - "/usr/include", "/usr/include/libxml2"], sources=sources) + include_dirs=[j("bblfsh", "libuast"), "/usr/local/include", + "/usr/include"], sources=sources) setup( cmdclass = { @@ -286,7 +283,8 @@ def main(): "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Topic :: Software Development :: Libraries" - ] + ], + zip_safe=False, ) From 2788a1a7f6bc5c6846d3999e9d30e1cc41004f5a Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Thu, 11 Oct 2018 21:11:48 +0300 Subject: [PATCH 02/48] working bindings prototype Signed-off-by: Denys Smirnov --- MANIFEST.in | 2 +- bblfsh/memtracker.cc | 15 ---- bblfsh/memtracker.h | 13 --- bblfsh/pyuast.cc | 184 ++++++++++++++++++++++++++++++++----------- setup.py | 8 +- 5 files changed, 143 insertions(+), 79 deletions(-) delete mode 100644 bblfsh/memtracker.cc delete mode 100644 bblfsh/memtracker.h diff --git a/MANIFEST.in b/MANIFEST.in index e8ec892..9747329 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -7,5 +7,5 @@ include Makefile include github.com/gogo/protobuf/gogoproto/gogo.proto include gopkg.in/bblfsh/sdk.v1/protocol/generated.proto include gopkg.in/bblfsh/sdk.v1/uast/generated.proto -include bblfsh/memtracker.h +include bblfsh/libuast/libuast.hpp prune bblfsh/libuast diff --git a/bblfsh/memtracker.cc b/bblfsh/memtracker.cc deleted file mode 100644 index d9626d5..0000000 --- a/bblfsh/memtracker.cc +++ /dev/null @@ -1,15 +0,0 @@ -#include "memtracker.h" - -void MemTracker::TrackItem(PyObject *o) -{ - filterItemAllocs_.push_back(o); -} - -void MemTracker::DisposeMem() -{ - for (auto &i : filterItemAllocs_) { - Py_CLEAR(i); - } - filterItemAllocs_.clear(); - filterItemAllocs_.shrink_to_fit(); -} diff --git a/bblfsh/memtracker.h b/bblfsh/memtracker.h deleted file mode 100644 index 7dd8d6e..0000000 --- a/bblfsh/memtracker.h +++ /dev/null @@ -1,13 +0,0 @@ -#include -#include - -#include - -class MemTracker { -private: - std::vector filterItemAllocs_; - -public: - void TrackItem(PyObject *ref); - void DisposeMem(); -}; diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index be17790..7b2e18f 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -9,15 +9,13 @@ #include "libuast.hpp" #include "memtracker.h" -#define DEBUG_HERE PyErr_SetString(PyExc_NotImplementedError, "HERE"); return NULL; - // Used to store references to the Pyobjects instanced in String() and // ItemAt() methods. Those can't be DECREF'ed to 0 because libuast uses them // so we pass ownership to these lists and free them at the end of filter() PyObject* asPyBuffer(uast::Buffer buf) { - // return PyByteArray_FromStringAndSize((const char*)(data), size); - return PyMemoryView_FromMemory((char*)(buf.ptr), buf.size, PyBUF_READ); + return PyByteArray_FromStringAndSize((const char*)(buf.ptr), buf.size); + //return PyMemoryView_FromMemory((char*)(buf.ptr), buf.size, PyBUF_READ); } /* @@ -184,6 +182,8 @@ class ContextExt { private: uast::Context *ctx; + // toPy allocates a new NodeExt with a specified handle. + // Returns a new reference. PyObject* toPy(NodeHandle node) { if (node == 0) Py_RETURN_NONE; @@ -194,6 +194,9 @@ class ContextExt { pyObj->handle = node; return (PyObject*)pyObj; } + + // toHandle casts an object to NodeExt and returns its handle. + // Borrows the reference. NodeHandle toHandle(PyObject* obj) { if (!obj || obj == Py_None) return 0; @@ -231,21 +234,29 @@ class ContextExt { delete(ctx); } + // lookup searches for a specific node handle. + // Returns a new reference. PyObject* lookup(NodeHandle node) { return toPy(node); } + // RootNode returns a root UAST node, if set. + // Returns a new reference. PyObject* RootNode(){ NodeHandle root = ctx->RootNode(); - return toPy(root); + return lookup(root); } + // Iterate iterates over an external UAST tree. + // Borrows the reference. PyObject* Iterate(PyObject* node, TreeOrder order){ NodeHandle h = toHandle(node); auto iter = ctx->Iterate(h, order); return newIter(iter, false); } + // Filter queries an external UAST. + // Borrows the reference. PyObject* Filter(PyObject* node, char* query){ NodeHandle unode = toHandle(node); if (unode == 0) { @@ -256,16 +267,22 @@ class ContextExt { delete(query); return newIter(it, false); } + + // Encode serializes the external UAST. + // Borrows the reference. PyObject* Encode(PyObject *node, UastFormat format) { uast::Buffer data = ctx->Encode(toHandle(node), format); return asPyBuffer(data); } }; +// PyUastIterExt_toPy is a function that looks up for nodes visited by iterator. +// Returns a new reference. static PyObject *PyUastIterExt_toPy(ContextExt *ctx, NodeHandle node) { return ctx->lookup(node); } +// PyUastIterExt_dealloc destroys an iterator. static void PyUastIterExt_dealloc(PyObject *self) { auto it = (PyUastIterExt *)self; delete(it->iter); @@ -284,12 +301,17 @@ typedef struct { static void PyContextExt_dealloc(PyObject *self) { delete(((PyContextExt *)self)->p); + // TODO: delete self? } +// PyContextExt_root returns a root node associated with this context. +// Returns a new reference. static PyObject *PyContextExt_root(PyContextExt *self, PyObject *Py_UNUSED(ignored)) { return self->p->RootNode(); } +// PyContextExt_filter filters UAST. +// Returns a new reference. static PyObject *PyContextExt_filter(PyContextExt *self, PyObject *args) { PyObject *node = NULL; char *query = NULL; @@ -298,6 +320,8 @@ static PyObject *PyContextExt_filter(PyContextExt *self, PyObject *args) { return self->p->Filter(node, query); } +// PyContextExt_filter serializes UAST. +// Returns a new reference. static PyObject *PyContextExt_encode(PyContextExt *self, PyObject *args) { PyObject *node = NULL; UastFormat format = UAST_BINARY; // TODO: make it a kwarg and enum @@ -372,20 +396,32 @@ class Interface; class Node : public uast::Node { private: Interface* ctx; - PyObject* obj; + PyObject* obj; // Node owns a reference NodeKind kind; PyObject* keys; + // checkPyException checks a Python error status, and if it's set, throws an error. static void checkPyException() { PyObject *type, *value, *traceback; PyErr_Fetch(&type, &value, &traceback); if (value == NULL || value == Py_None) { return; } + Py_DECREF(type); + Py_DECREF(traceback); + PyObject* str = PyObject_Str(value); - throw std::runtime_error(PyUnicode_AsUTF8(str)); + Py_DECREF(value); + + auto err = PyUnicode_AsUTF8(str); + Py_DECREF(str); + + throw std::runtime_error(err); } + + // kindOf returns a kind of a Python object. + // Borrows the reference. static NodeKind kindOf(PyObject* obj) { if (!obj || obj == Py_None) { return NODE_NULL; @@ -407,13 +443,28 @@ class Node : public uast::Node { friend class Interface; friend class Context; - Node(Interface* c, NodeKind k, PyObject* v) { + // Node creates a new node associated with a given Python object and sets the kind. + // Steals the reference. + Node(Interface* c, NodeKind k, PyObject* v) : keys(nullptr) { ctx = c; obj = v; kind = k; } - Node(Interface* c, PyObject* v) { - Node(c, kindOf(v), v); + // Node creates a new node associated with a given Python object and automatically determines the kind. + // Creates a new reference. + Node(Interface* c, PyObject* v) : keys(nullptr) { + ctx = c; + obj = v; Py_INCREF(v); + kind = kindOf(v); + } + ~Node(){ + if (keys) { + Py_DECREF(keys); + keys = nullptr; + } + if (obj) { + Py_DECREF(obj); + } } PyObject* toPy(); @@ -421,7 +472,7 @@ class Node : public uast::Node { NodeKind Kind() { return kind; } - const char* AsString() { + std::string AsString() { const char* v = PyUnicode_AsUTF8(obj); return v; } @@ -456,41 +507,48 @@ class Node : public uast::Node { return sz; } - const char* KeyAt(size_t i) { + std::string KeyAt(size_t i) { if (obj == Py_None) { return NULL; } if (!keys) keys = PyDict_Keys(obj); - PyObject* key = PyList_GetItem(keys, i); - return PyUnicode_AsUTF8(key); + PyObject* key = PyList_GetItem(keys, i); // borrows + auto k = PyUnicode_AsUTF8(key); + return k; } Node* ValueAt(size_t i) { if (obj == Py_None) { - return 0; + return NULL; } if (PyList_Check(obj)) { - PyObject* v = PyList_GetItem(obj, i); - return lookupOrCreate(v); + PyObject* v = PyList_GetItem(obj, i); // borrows + return lookupOrCreate(v); // new ref } if (!keys) keys = PyDict_Keys(obj); - PyObject* key = PyList_GetItem(keys, i); - PyObject* val = PyDict_GetItem(obj, key); - Py_DECREF(key); + PyObject* key = PyList_GetItem(keys, i); // borrows + PyObject* val = PyDict_GetItem(obj, key); // borrows - return lookupOrCreate(val); + return lookupOrCreate(val); // new ref } void SetValue(size_t i, Node* val) { - PyObject* v = Py_None; - if (val && val->obj) v = val->obj; - // TODO: increase ref - PyList_SetItem(obj, i, v); + PyObject* v = nullptr; + if (val && val->obj) { + v = val->obj; + } else { + v = Py_None; + } + Py_INCREF(v); + PyList_SetItem(obj, i, v); // steals } - void SetKeyValue(const char* k, Node* val) { - PyObject* v = Py_None; - if (val && val->obj) v = val->obj; - // TODO: increase ref - PyDict_SetItemString(obj, k, v); + void SetKeyValue(std::string k, Node* val) { + PyObject* v = nullptr; + if (val && val->obj) { + v = val->obj; + } else { + v = Py_None; + } + PyDict_SetItemString(obj, k.data(), v); // new ref } }; @@ -502,7 +560,6 @@ class Context; class Interface : public uast::NodeCreator { private: - // TODO: track objects std::map obj2node; static PyObject* newBool(bool v) { @@ -511,7 +568,12 @@ class Interface : public uast::NodeCreator { } Py_RETURN_FALSE; } + + // lookupOrCreate either creates a new object or returns existing one. + // In the second case it creates a new reference. Node* lookupOrCreate(PyObject* obj) { + if (!obj || obj == Py_None) return NULL; + Node* node = obj2node[obj]; if (node) return node; @@ -519,6 +581,9 @@ class Interface : public uast::NodeCreator { obj2node[obj] = node; return node; } + + // create makes a new object with a specified kind. + // Steals the reference. Node* create(NodeKind kind, PyObject* obj) { Node* node = new Node(this, kind, obj); obj2node[obj] = node; @@ -531,15 +596,27 @@ class Interface : public uast::NodeCreator { Interface(){ } ~Interface(){ - // TODO: dealloc for Nodes and DECREF for PyObjects + // Only needs to deallocate Nodes, since they own + // the same object as used in the map key. + for (auto it : obj2node) { + delete(it.second); + } } + + // toNode creates a new or returns an existing node associated with Python object. + // Creates a new reference. Node* toNode(PyObject* obj){ return lookupOrCreate(obj); } + + // toPy returns a Python object associated with a node. + // Returns a new reference. PyObject* toPy(Node* node) { if (node == NULL) Py_RETURN_NONE; - return node->obj; // TODO incref? + Py_INCREF(node->obj); + return node->obj; } + Node* NewObject(size_t size) { PyObject* m = PyDict_New(); return create(NODE_OBJECT, m); @@ -548,8 +625,8 @@ class Interface : public uast::NodeCreator { PyObject* arr = PyList_New(size); return create(NODE_ARRAY, arr); } - Node* NewString(const char* v) { - PyObject* obj = PyUnicode_FromString(v); + Node* NewString(std::string v) { + PyObject* obj = PyUnicode_FromString(v.data()); return create(NODE_STRING, obj); } Node* NewInt(int64_t v) { @@ -570,9 +647,14 @@ class Interface : public uast::NodeCreator { } }; +// toPy returns a Python object associated with a node. +// Returns a new reference. PyObject* Node::toPy() { return ctx->toPy(this); } + +// lookupOrCreate either creates a new object or returns existing one. +// In the second case it creates a new reference. Node* Node::lookupOrCreate(PyObject* obj) { return ctx->lookupOrCreate(obj); } @@ -607,7 +689,7 @@ static PyObject *PyUastIter_next(PyObject *self) { Node* node = it->iter->node(); if (!node) Py_RETURN_NONE; - return node->toPy(); + return node->toPy(); // new ref } extern "C" @@ -664,13 +746,15 @@ class Context { uast::PtrInterface *impl; uast::Context *ctx; + // toPy returns a Python object associated with a node. + // Returns a new reference. PyObject* toPy(Node* node) { if (node == NULL) Py_RETURN_NONE; return iface->toPy(node); } + // toNode returns a node associated with a Python object. + // Creates a new reference. Node* toNode(PyObject* obj) { - if (!obj || obj == Py_None) return NULL; - return iface->lookupOrCreate(obj); } PyObject* newIter(uast::Iterator *it, bool freeCtx){ @@ -692,7 +776,7 @@ class Context { // create a class that makes and tracks UAST nodes iface = new Interface(); // create an implementation that will handle libuast calls - auto impl = new uast::PtrInterface(iface); + impl = new uast::PtrInterface(iface); // create a new UAST context based on this implementation ctx = impl->NewContext(); } @@ -702,37 +786,44 @@ class Context { delete(iface); } + // RootNode returns a root UAST node, if set. + // Returns a new reference. PyObject* RootNode(){ Node* root = ctx->RootNode(); - return toPy(root); + return toPy(root); // new ref } + // Iterate enumerates UAST nodes in a specified order. + // Creates a new reference. PyObject* Iterate(PyObject* node, TreeOrder order, bool freeCtx){ Node* unode = toNode(node); auto iter = ctx->Iterate(unode, order); return newIter(iter, freeCtx); } - PyObject* Filter(PyObject* node, char* query){ + // Filter queries UAST. + // Creates a new reference. + PyObject* Filter(PyObject* node, std::string query){ Node* unode = toNode(node); if (unode == NULL) { unode = ctx->RootNode(); } auto it = ctx->Filter(unode, query); - delete(query); return newIter(it, false); } + // Encode serializes UAST. + // Creates a new reference. PyObject* Encode(PyObject *node, UastFormat format) { uast::Buffer data = ctx->Encode(toNode(node), format); - return asPyBuffer(data); + return asPyBuffer(data); // TODO: this probably won't deallocate the buffer } PyObject* LoadFrom(NodeExt *src) { auto sctx = src->ctx->ctx; NodeHandle snode = src->handle; Node* node = uast::Load(sctx, snode, ctx); - return toPy(node); + return toPy(node); // new ref } }; @@ -761,13 +852,14 @@ typedef struct { static void PyUast_dealloc(PyObject *self) { delete(((PyUast *)self)->p); + // TODO: delete self? } static PyObject *PyUast_root(PyUast *self, PyObject *Py_UNUSED(ignored)) { return self->p->RootNode(); } -static PyObject *PyUast_filter(PyContextExt *self, PyObject *args) { +static PyObject *PyUast_filter(PyUast *self, PyObject *args) { PyObject *node = NULL; char *query = NULL; if (!PyArg_ParseTuple(args, "Os", &node, &query)) @@ -775,7 +867,7 @@ static PyObject *PyUast_filter(PyContextExt *self, PyObject *args) { return self->p->Filter(node, query); } -static PyObject *PyUast_encode(PyContextExt *self, PyObject *args) { +static PyObject *PyUast_encode(PyUast *self, PyObject *args) { PyObject *node = NULL; UastFormat format = UAST_BINARY; // TODO: make it a kwarg and enum if (!PyArg_ParseTuple(args, "Oi", &node, &format)) diff --git a/setup.py b/setup.py index fb469a7..94481b2 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ FORMAT_ARGS = globals() # For debugging libuast-client interactions, set to True in production! -GET_LIBUAST = True +GET_LIBUAST = False if not GET_LIBUAST: print("WARNING: not retrieving libuast, using local version") @@ -27,8 +27,8 @@ os.environ["CC"] = "g++" # yes, g++ - otherwise distutils will use gcc -std=c++11 and explode if os.getenv("CXX") is None: os.environ["CXX"] = "g++" -libraries = [] -sources = ["bblfsh/pyuast.cc", "bblfsh/memtracker.cc"] +libraries = ['uast'] +sources = ["bblfsh/pyuast.cc"] log = logging.getLogger("setup.py") @@ -249,7 +249,7 @@ def main(): libuast_module = Extension( "bblfsh.pyuast", libraries=libraries, - library_dirs=["/usr/lib", "/usr/local/lib"], + library_dirs=["/usr/lib", "/usr/local/lib", "."], extra_compile_args=["-std=c++11"], include_dirs=[j("bblfsh", "libuast"), "/usr/local/include", "/usr/include"], sources=sources) From 8e39632706e86f2aa8e6f60370b82ca728a81e72 Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Tue, 16 Oct 2018 16:04:50 +0300 Subject: [PATCH 03/48] fix string memory management Signed-off-by: Denys Smirnov --- bblfsh/pyuast.cc | 136 +++++++++++++++++++++++++---------------------- 1 file changed, 73 insertions(+), 63 deletions(-) diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index 7b2e18f..16ba4a2 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -7,7 +7,6 @@ #include #include "libuast.hpp" -#include "memtracker.h" // Used to store references to the Pyobjects instanced in String() and // ItemAt() methods. Those can't be DECREF'ed to 0 because libuast uses them @@ -48,13 +47,13 @@ static PyMethodDef NodeExt_methods[] = { {"load", (PyCFunction) NodeExt_load, METH_NOARGS, "Load external node to Python" }, - {NULL} // Sentinel + {nullptr} // Sentinel }; extern "C" { static PyTypeObject NodeExtType = { - PyVarObject_HEAD_INIT(NULL, 0) + PyVarObject_HEAD_INIT(nullptr, 0) "pyuast.NodeExt", // tp_name sizeof(NodeExt), // tp_basicsize 0, // tp_itemsize @@ -121,7 +120,7 @@ static PyObject *PyUastIterExt_next(PyObject *self) { // TODO: check errors if (!it->iter->next()) { PyErr_SetNone(PyExc_StopIteration); - return NULL; + return nullptr; } NodeHandle node = it->iter->node(); @@ -133,7 +132,7 @@ static PyObject *PyUastIterExt_next(PyObject *self) { extern "C" { static PyTypeObject PyUastIterExtType = { - PyVarObject_HEAD_INIT(NULL, 0) + PyVarObject_HEAD_INIT(nullptr, 0) "pyuast.IteratorExt", // tp_name sizeof(PyUastIterExt), // tp_basicsize 0, // tp_itemsize @@ -188,7 +187,7 @@ class ContextExt { if (node == 0) Py_RETURN_NONE; NodeExt *pyObj = PyObject_New(NodeExt, &NodeExtType); - if (!pyObj) return NULL; + if (!pyObj) return nullptr; pyObj->ctx = this; pyObj->handle = node; @@ -214,11 +213,11 @@ class ContextExt { PyObject* newIter(uast::Iterator *it, bool freeCtx){ PyUastIterExt *pyIt = PyObject_New(PyUastIterExt, &PyUastIterExtType); if (!pyIt) - return NULL; + return nullptr; if (!PyObject_Init((PyObject *)pyIt, &PyUastIterExtType)) { Py_DECREF(pyIt); - return NULL; + return nullptr; } pyIt->iter = it; pyIt->ctx = this; @@ -291,7 +290,7 @@ static void PyUastIterExt_dealloc(PyObject *self) { delete(it->ctx); } it->freeCtx = false; - it->ctx = NULL; + it->ctx = nullptr; } typedef struct { @@ -313,20 +312,20 @@ static PyObject *PyContextExt_root(PyContextExt *self, PyObject *Py_UNUSED(ignor // PyContextExt_filter filters UAST. // Returns a new reference. static PyObject *PyContextExt_filter(PyContextExt *self, PyObject *args) { - PyObject *node = NULL; - char *query = NULL; + PyObject *node = nullptr; + char *query = nullptr; if (!PyArg_ParseTuple(args, "Os", &node, &query)) - return NULL; + return nullptr; return self->p->Filter(node, query); } // PyContextExt_filter serializes UAST. // Returns a new reference. static PyObject *PyContextExt_encode(PyContextExt *self, PyObject *args) { - PyObject *node = NULL; + PyObject *node = nullptr; UastFormat format = UAST_BINARY; // TODO: make it a kwarg and enum if (!PyArg_ParseTuple(args, "Oi", &node, &format)) - return NULL; + return nullptr; return self->p->Encode(node, format); } @@ -340,13 +339,13 @@ static PyMethodDef PyContextExt_methods[] = { {"encode", (PyCFunction) PyContextExt_encode, METH_VARARGS, "Encodes a UAST into a buffer" }, - {NULL} // Sentinel + {nullptr} // Sentinel }; extern "C" { static PyTypeObject PyContextExtType = { - PyVarObject_HEAD_INIT(NULL, 0) + PyVarObject_HEAD_INIT(nullptr, 0) "pyuast.ContextExt", // tp_name sizeof(PyContextExt), // tp_basicsize 0, // tp_itemsize @@ -400,12 +399,13 @@ class Node : public uast::Node { NodeKind kind; PyObject* keys; + std::string* str; // checkPyException checks a Python error status, and if it's set, throws an error. static void checkPyException() { PyObject *type, *value, *traceback; PyErr_Fetch(&type, &value, &traceback); - if (value == NULL || value == Py_None) { + if (value == nullptr || value == Py_None) { return; } Py_DECREF(type); @@ -424,7 +424,7 @@ class Node : public uast::Node { // Borrows the reference. static NodeKind kindOf(PyObject* obj) { if (!obj || obj == Py_None) { - return NODE_NULL; + return NODE_nullptr; } else if (PyUnicode_Check(obj)) { return NODE_STRING; } else if (PyLong_Check(obj)) { @@ -445,14 +445,14 @@ class Node : public uast::Node { // Node creates a new node associated with a given Python object and sets the kind. // Steals the reference. - Node(Interface* c, NodeKind k, PyObject* v) : keys(nullptr) { + Node(Interface* c, NodeKind k, PyObject* v) : keys(nullptr), str(nullptr) { ctx = c; obj = v; kind = k; } // Node creates a new node associated with a given Python object and automatically determines the kind. // Creates a new reference. - Node(Interface* c, PyObject* v) : keys(nullptr) { + Node(Interface* c, PyObject* v) : keys(nullptr), str(nullptr) { ctx = c; obj = v; Py_INCREF(v); kind = kindOf(v); @@ -465,6 +465,9 @@ class Node : public uast::Node { if (obj) { Py_DECREF(obj); } + if (str) { + delete str; + } } PyObject* toPy(); @@ -472,9 +475,14 @@ class Node : public uast::Node { NodeKind Kind() { return kind; } - std::string AsString() { - const char* v = PyUnicode_AsUTF8(obj); - return v; + std::string* AsString() { + if (!str) { + const char* s = PyUnicode_AsUTF8(obj); + str = new std::string(s); + } + + std::string* s = new std::string(*str); + return s; } int64_t AsInt() { long long v = PyLong_AsLongLong(obj); @@ -507,18 +515,20 @@ class Node : public uast::Node { return sz; } - std::string KeyAt(size_t i) { + std::string* KeyAt(size_t i) { if (obj == Py_None) { - return NULL; + return nullptr; } if (!keys) keys = PyDict_Keys(obj); PyObject* key = PyList_GetItem(keys, i); // borrows - auto k = PyUnicode_AsUTF8(key); - return k; + const char * k = PyUnicode_AsUTF8(key); + + std::string* s = new std::string(k); + return s; } Node* ValueAt(size_t i) { if (obj == Py_None) { - return NULL; + return nullptr; } if (PyList_Check(obj)) { PyObject* v = PyList_GetItem(obj, i); // borrows @@ -572,7 +582,7 @@ class Interface : public uast::NodeCreator { // lookupOrCreate either creates a new object or returns existing one. // In the second case it creates a new reference. Node* lookupOrCreate(PyObject* obj) { - if (!obj || obj == Py_None) return NULL; + if (!obj || obj == Py_None) return nullptr; Node* node = obj2node[obj]; if (node) return node; @@ -612,7 +622,7 @@ class Interface : public uast::NodeCreator { // toPy returns a Python object associated with a node. // Returns a new reference. PyObject* toPy(Node* node) { - if (node == NULL) Py_RETURN_NONE; + if (node == nullptr) Py_RETURN_NONE; Py_INCREF(node->obj); return node->obj; } @@ -683,7 +693,7 @@ static PyObject *PyUastIter_next(PyObject *self) { // TODO: check errors if (!it->iter->next()) { PyErr_SetNone(PyExc_StopIteration); - return NULL; + return nullptr; } Node* node = it->iter->node(); @@ -695,7 +705,7 @@ static PyObject *PyUastIter_next(PyObject *self) { extern "C" { static PyTypeObject PyUastIterType = { - PyVarObject_HEAD_INIT(NULL, 0) + PyVarObject_HEAD_INIT(nullptr, 0) "pyuast.Iterator", // tp_name sizeof(PyUastIter), // tp_basicsize 0, // tp_itemsize @@ -749,7 +759,7 @@ class Context { // toPy returns a Python object associated with a node. // Returns a new reference. PyObject* toPy(Node* node) { - if (node == NULL) Py_RETURN_NONE; + if (node == nullptr) Py_RETURN_NONE; return iface->toPy(node); } // toNode returns a node associated with a Python object. @@ -760,11 +770,11 @@ class Context { PyObject* newIter(uast::Iterator *it, bool freeCtx){ PyUastIter *pyIt = PyObject_New(PyUastIter, &PyUastIterType); if (!pyIt) - return NULL; + return nullptr; if (!PyObject_Init((PyObject *)pyIt, &PyUastIterType)) { Py_DECREF(pyIt); - return NULL; + return nullptr; } pyIt->iter = it; pyIt->ctx = this; @@ -805,7 +815,7 @@ class Context { // Creates a new reference. PyObject* Filter(PyObject* node, std::string query){ Node* unode = toNode(node); - if (unode == NULL) { + if (unode == nullptr) { unode = ctx->RootNode(); } @@ -842,7 +852,7 @@ static void PyUastIter_dealloc(PyObject *self) { delete(it->ctx); } it->freeCtx = false; - it->ctx = NULL; + it->ctx = nullptr; } typedef struct { @@ -860,18 +870,18 @@ static PyObject *PyUast_root(PyUast *self, PyObject *Py_UNUSED(ignored)) { } static PyObject *PyUast_filter(PyUast *self, PyObject *args) { - PyObject *node = NULL; - char *query = NULL; + PyObject *node = nullptr; + char *query = nullptr; if (!PyArg_ParseTuple(args, "Os", &node, &query)) - return NULL; + return nullptr; return self->p->Filter(node, query); } static PyObject *PyUast_encode(PyUast *self, PyObject *args) { - PyObject *node = NULL; + PyObject *node = nullptr; UastFormat format = UAST_BINARY; // TODO: make it a kwarg and enum if (!PyArg_ParseTuple(args, "Oi", &node, &format)) - return NULL; + return nullptr; return self->p->Encode(node, format); } @@ -885,13 +895,13 @@ static PyMethodDef PyUast_methods[] = { {"encode", (PyCFunction) PyUast_encode, METH_VARARGS, "Encodes a UAST into a buffer" }, - {NULL} // Sentinel + {nullptr} // Sentinel }; extern "C" { static PyTypeObject PyUastType = { - PyVarObject_HEAD_INIT(NULL, 0) + PyVarObject_HEAD_INIT(nullptr, 0) "pyuast.Context", // tp_name sizeof(PyUast), // tp_basicsize 0, // tp_itemsize @@ -937,11 +947,11 @@ extern "C" // ========================================== static PyObject *PyUastIter_new(PyObject *self, PyObject *args) { - PyObject *obj = NULL; + PyObject *obj = nullptr; uint8_t order; if (!PyArg_ParseTuple(args, "OB", &obj, &order)) - return NULL; + return nullptr; // the node can either be external or any other Python object if (PyObject_TypeCheck(obj, &NodeExtType)) { @@ -955,17 +965,17 @@ static PyObject *PyUastIter_new(PyObject *self, PyObject *args) { } static PyObject *PyUastDecode(PyObject *self, PyObject *args) { - PyObject *obj = NULL; + PyObject *obj = nullptr; UastFormat format = UAST_BINARY; // TODO: make it a kwarg if (!PyArg_ParseTuple(args, "Oi", &obj, &format)) - return NULL; + return nullptr; Py_buffer buf; int res = PyObject_GetBuffer(obj, &buf, PyBUF_C_CONTIGUOUS); if (res != 0) - return NULL; + return nullptr; uast::Buffer ubuf(buf.buf, (size_t)(buf.len)); @@ -975,7 +985,7 @@ static PyObject *PyUastDecode(PyObject *self, PyObject *args) { PyContextExt *pyU = PyObject_New(PyContextExt, &PyContextExtType); if (!pyU) { delete(ctx); - return NULL; + return nullptr; } pyU->p = new ContextExt(ctx); return (PyObject*)pyU; @@ -983,12 +993,12 @@ static PyObject *PyUastDecode(PyObject *self, PyObject *args) { static PyObject *PyUast_new(PyObject *self, PyObject *args) { if (!PyArg_ParseTuple(args, "")) { - return NULL; + return nullptr; } PyUast *pyU = PyObject_New(PyUast, &PyUastType); if (!pyU) { - return NULL; + return nullptr; } pyU->p = new Context(); return (PyObject*)pyU; @@ -998,29 +1008,29 @@ static PyMethodDef extension_methods[] = { {"iterator", PyUastIter_new, METH_VARARGS, "Get an iterator over a node"}, {"decode", PyUastDecode, METH_VARARGS, "Decode UAST from a byte array"}, {"uast", PyUast_new, METH_VARARGS, "Creates a new UAST context"}, - {NULL, NULL, 0, NULL} + {nullptr, nullptr, 0, nullptr} }; static struct PyModuleDef module_def = { PyModuleDef_HEAD_INIT, "pyuast", - NULL, + nullptr, -1, extension_methods, - NULL, - NULL, - NULL, - NULL + nullptr, + nullptr, + nullptr, + nullptr }; PyMODINIT_FUNC PyInit_pyuast(void) { - if (PyType_Ready(&PyContextExtType) < 0) return NULL; - if (PyType_Ready(&NodeExtType) < 0) return NULL; - if (PyType_Ready(&PyUastIterExtType) < 0) return NULL; + if (PyType_Ready(&PyContextExtType) < 0) return nullptr; + if (PyType_Ready(&NodeExtType) < 0) return nullptr; + if (PyType_Ready(&PyUastIterExtType) < 0) return nullptr; - if (PyType_Ready(&PyUastType) < 0) return NULL; - if (PyType_Ready(&PyUastIterType) < 0) return NULL; + if (PyType_Ready(&PyUastType) < 0) return nullptr; + if (PyType_Ready(&PyUastIterType) < 0) return nullptr; return PyModule_Create(&module_def); } From d3b6bf91e33de00d447d3d996c57bd9d98e1df14 Mon Sep 17 00:00:00 2001 From: Alexander Bezzubov Date: Tue, 16 Oct 2018 22:23:06 +0300 Subject: [PATCH 04/48] refactoring: NodeExtType->PyNodeExtType for consistency Signed-off-by: Alexander Bezzubov --- bblfsh/pyuast.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index 16ba4a2..c7ba57b 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -52,7 +52,7 @@ static PyMethodDef NodeExt_methods[] = { extern "C" { - static PyTypeObject NodeExtType = { + static PyTypeObject PyNodeExtType = { PyVarObject_HEAD_INIT(nullptr, 0) "pyuast.NodeExt", // tp_name sizeof(NodeExt), // tp_basicsize @@ -186,7 +186,7 @@ class ContextExt { PyObject* toPy(NodeHandle node) { if (node == 0) Py_RETURN_NONE; - NodeExt *pyObj = PyObject_New(NodeExt, &NodeExtType); + NodeExt *pyObj = PyObject_New(NodeExt, &PyNodeExtType); if (!pyObj) return nullptr; pyObj->ctx = this; @@ -199,7 +199,7 @@ class ContextExt { NodeHandle toHandle(PyObject* obj) { if (!obj || obj == Py_None) return 0; - if (!PyObject_TypeCheck(obj, &NodeExtType)) { + if (!PyObject_TypeCheck(obj, &PyNodeExtType)) { const char* err = "unknown node type"; PyErr_SetString(PyExc_NotImplementedError, err); ctx->SetError(err); @@ -954,7 +954,7 @@ static PyObject *PyUastIter_new(PyObject *self, PyObject *args) { return nullptr; // the node can either be external or any other Python object - if (PyObject_TypeCheck(obj, &NodeExtType)) { + if (PyObject_TypeCheck(obj, &PyNodeExtType)) { // external node -> external iterator auto node = (NodeExt*)obj; return node->ctx->Iterate(obj, (TreeOrder)order); @@ -1027,7 +1027,7 @@ PyMODINIT_FUNC PyInit_pyuast(void) { if (PyType_Ready(&PyContextExtType) < 0) return nullptr; - if (PyType_Ready(&NodeExtType) < 0) return nullptr; + if (PyType_Ready(&PyNodeExtType) < 0) return nullptr; if (PyType_Ready(&PyUastIterExtType) < 0) return nullptr; if (PyType_Ready(&PyUastType) < 0) return nullptr; From 6ce2b11720045aa8375b890af5504029a51af73c Mon Sep 17 00:00:00 2001 From: Alexander Bezzubov Date: Tue, 16 Oct 2018 22:31:53 +0300 Subject: [PATCH 05/48] refactoring: NodeExt->PyNodeExt for consistency Signed-off-by: Alexander Bezzubov --- bblfsh/pyuast.cc | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index c7ba57b..483b3f9 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -39,12 +39,12 @@ typedef struct { PyObject_HEAD ContextExt *ctx; NodeHandle handle; -} NodeExt; +} PyNodeExt; -static PyObject *NodeExt_load(NodeExt *self, PyObject *Py_UNUSED(ignored)); +static PyObject *PyNodeExt_load(PyNodeExt *self, PyObject *Py_UNUSED(ignored)); -static PyMethodDef NodeExt_methods[] = { - {"load", (PyCFunction) NodeExt_load, METH_NOARGS, +static PyMethodDef PyNodeExt_methods[] = { + {"load", (PyCFunction) PyNodeExt_load, METH_NOARGS, "Load external node to Python" }, {nullptr} // Sentinel @@ -55,7 +55,7 @@ extern "C" static PyTypeObject PyNodeExtType = { PyVarObject_HEAD_INIT(nullptr, 0) "pyuast.NodeExt", // tp_name - sizeof(NodeExt), // tp_basicsize + sizeof(PyNodeExt), // tp_basicsize 0, // tp_itemsize 0, // tp_dealloc 0, // tp_print @@ -80,7 +80,7 @@ extern "C" 0, // tp_weaklistoffset 0, // tp_iter: __iter()__ method 0, // tp_iternext: next() method - NodeExt_methods, // tp_methods + PyNodeExt_methods, // tp_methods 0, // tp_members 0, // tp_getset 0, // tp_base @@ -186,7 +186,7 @@ class ContextExt { PyObject* toPy(NodeHandle node) { if (node == 0) Py_RETURN_NONE; - NodeExt *pyObj = PyObject_New(NodeExt, &PyNodeExtType); + PyNodeExt *pyObj = PyObject_New(PyNodeExt, &PyNodeExtType); if (!pyObj) return nullptr; pyObj->ctx = this; @@ -206,7 +206,7 @@ class ContextExt { return 0; } - auto node = (NodeExt*)obj; + auto node = (PyNodeExt*)obj; return node->handle; } @@ -828,7 +828,7 @@ class Context { uast::Buffer data = ctx->Encode(toNode(node), format); return asPyBuffer(data); // TODO: this probably won't deallocate the buffer } - PyObject* LoadFrom(NodeExt *src) { + PyObject* LoadFrom(PyNodeExt *src) { auto sctx = src->ctx->ctx; NodeHandle snode = src->handle; @@ -837,7 +837,7 @@ class Context { } }; -static PyObject *NodeExt_load(NodeExt *self, PyObject *Py_UNUSED(ignored)) { +static PyObject *PyNodeExt_load(PyNodeExt *self, PyObject *Py_UNUSED(ignored)) { auto ctx = new Context(); PyObject* node = ctx->LoadFrom(self); delete(ctx); @@ -956,7 +956,7 @@ static PyObject *PyUastIter_new(PyObject *self, PyObject *args) { // the node can either be external or any other Python object if (PyObject_TypeCheck(obj, &PyNodeExtType)) { // external node -> external iterator - auto node = (NodeExt*)obj; + auto node = (PyNodeExt*)obj; return node->ctx->Iterate(obj, (TreeOrder)order); } // Python object -> create a new context and attach it to an iterator From 7bac87f747157226f148fc9262b739a5137dd584 Mon Sep 17 00:00:00 2001 From: Alexander Bezzubov Date: Tue, 16 Oct 2018 22:33:50 +0300 Subject: [PATCH 06/48] refactoring: PyUastType->PyContextType for consistency Signed-off-by: Alexander Bezzubov --- bblfsh/pyuast.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index 483b3f9..b0a6130 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -900,7 +900,7 @@ static PyMethodDef PyUast_methods[] = { extern "C" { - static PyTypeObject PyUastType = { + static PyTypeObject PyContextType = { PyVarObject_HEAD_INIT(nullptr, 0) "pyuast.Context", // tp_name sizeof(PyUast), // tp_basicsize @@ -996,7 +996,7 @@ static PyObject *PyUast_new(PyObject *self, PyObject *args) { return nullptr; } - PyUast *pyU = PyObject_New(PyUast, &PyUastType); + PyUast *pyU = PyObject_New(PyUast, &PyContextType); if (!pyU) { return nullptr; } @@ -1030,7 +1030,7 @@ PyInit_pyuast(void) if (PyType_Ready(&PyNodeExtType) < 0) return nullptr; if (PyType_Ready(&PyUastIterExtType) < 0) return nullptr; - if (PyType_Ready(&PyUastType) < 0) return nullptr; + if (PyType_Ready(&PyContextType) < 0) return nullptr; if (PyType_Ready(&PyUastIterType) < 0) return nullptr; return PyModule_Create(&module_def); } From 8b84e4a8807f97fb8b26b6aac764c94a00778ea6 Mon Sep 17 00:00:00 2001 From: Alexander Bezzubov Date: Tue, 16 Oct 2018 22:39:26 +0300 Subject: [PATCH 07/48] refactoring: PyUast->PyContext for consistency Signed-off-by: Alexander Bezzubov --- bblfsh/pyuast.cc | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index b0a6130..66036bb 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -858,18 +858,18 @@ static void PyUastIter_dealloc(PyObject *self) { typedef struct { PyObject_HEAD Context *p; -} PyUast; +} PyContext; -static void PyUast_dealloc(PyObject *self) { +static void PyContext_dealloc(PyObject *self) { delete(((PyUast *)self)->p); // TODO: delete self? } -static PyObject *PyUast_root(PyUast *self, PyObject *Py_UNUSED(ignored)) { +static PyObject *PyContext_root(PyContext *self, PyObject *Py_UNUSED(ignored)) { return self->p->RootNode(); } -static PyObject *PyUast_filter(PyUast *self, PyObject *args) { +static PyObject *PyContext_filter(PyContext *self, PyObject *args) { PyObject *node = nullptr; char *query = nullptr; if (!PyArg_ParseTuple(args, "Os", &node, &query)) @@ -877,7 +877,7 @@ static PyObject *PyUast_filter(PyUast *self, PyObject *args) { return self->p->Filter(node, query); } -static PyObject *PyUast_encode(PyUast *self, PyObject *args) { +static PyObject *PyContext_encode(PyContext *self, PyObject *args) { PyObject *node = nullptr; UastFormat format = UAST_BINARY; // TODO: make it a kwarg and enum if (!PyArg_ParseTuple(args, "Oi", &node, &format)) @@ -885,14 +885,14 @@ static PyObject *PyUast_encode(PyUast *self, PyObject *args) { return self->p->Encode(node, format); } -static PyMethodDef PyUast_methods[] = { - {"root", (PyCFunction) PyUast_root, METH_NOARGS, +static PyMethodDef PyContext_methods[] = { + {"root", (PyCFunction) PyContext_root, METH_NOARGS, "Return the root node attached to this query context" }, - {"filter", (PyCFunction) PyUast_filter, METH_VARARGS, + {"filter", (PyCFunction) PyContext_filter, METH_VARARGS, "Filter a provided UAST with XPath" }, - {"encode", (PyCFunction) PyUast_encode, METH_VARARGS, + {"encode", (PyCFunction) PyContext_encode, METH_VARARGS, "Encodes a UAST into a buffer" }, {nullptr} // Sentinel @@ -903,9 +903,9 @@ extern "C" static PyTypeObject PyContextType = { PyVarObject_HEAD_INIT(nullptr, 0) "pyuast.Context", // tp_name - sizeof(PyUast), // tp_basicsize + sizeof(PyContext), // tp_basicsize 0, // tp_itemsize - PyUast_dealloc, // tp_dealloc + PyContext_dealloc, // tp_dealloc 0, // tp_print 0, // tp_getattr 0, // tp_setattr @@ -928,7 +928,7 @@ extern "C" 0, // tp_weaklistoffset 0, // tp_iter: __iter()__ method 0, // tp_iternext: next() method - PyUast_methods, // tp_methods + PyContext_methods, // tp_methods 0, // tp_members 0, // tp_getset 0, // tp_base @@ -964,7 +964,7 @@ static PyObject *PyUastIter_new(PyObject *self, PyObject *args) { return ctx->Iterate(obj, (TreeOrder)order, true); } -static PyObject *PyUastDecode(PyObject *self, PyObject *args) { +static PyObject *PyContextDecode(PyObject *self, PyObject *args) { PyObject *obj = nullptr; UastFormat format = UAST_BINARY; // TODO: make it a kwarg @@ -991,12 +991,12 @@ static PyObject *PyUastDecode(PyObject *self, PyObject *args) { return (PyObject*)pyU; } -static PyObject *PyUast_new(PyObject *self, PyObject *args) { +static PyObject *PyContext_new(PyObject *self, PyObject *args) { if (!PyArg_ParseTuple(args, "")) { return nullptr; } - PyUast *pyU = PyObject_New(PyUast, &PyContextType); + PyContext *pyU = PyObject_New(PyContext, &PyContextType); if (!pyU) { return nullptr; } @@ -1006,8 +1006,8 @@ static PyObject *PyUast_new(PyObject *self, PyObject *args) { static PyMethodDef extension_methods[] = { {"iterator", PyUastIter_new, METH_VARARGS, "Get an iterator over a node"}, - {"decode", PyUastDecode, METH_VARARGS, "Decode UAST from a byte array"}, - {"uast", PyUast_new, METH_VARARGS, "Creates a new UAST context"}, + {"decode", PyContextDecode, METH_VARARGS, "Decode UAST from a byte array"}, + {"uast", PyContext_new, METH_VARARGS, "Creates a new UAST context"}, {nullptr, nullptr, 0, nullptr} }; From 1fa1d0d9dedd4637f06cdebded9bed5e310d228a Mon Sep 17 00:00:00 2001 From: Alexander Bezzubov Date: Wed, 17 Oct 2018 12:06:16 +0300 Subject: [PATCH 08/48] refactoring: fix comments + fmt after rebase Signed-off-by: Alexander Bezzubov --- bblfsh/pyuast.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index 66036bb..dc143a7 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -55,7 +55,7 @@ extern "C" static PyTypeObject PyNodeExtType = { PyVarObject_HEAD_INIT(nullptr, 0) "pyuast.NodeExt", // tp_name - sizeof(PyNodeExt), // tp_basicsize + sizeof(PyNodeExt), // tp_basicsize 0, // tp_itemsize 0, // tp_dealloc 0, // tp_print @@ -80,7 +80,7 @@ extern "C" 0, // tp_weaklistoffset 0, // tp_iter: __iter()__ method 0, // tp_iternext: next() method - PyNodeExt_methods, // tp_methods + PyNodeExt_methods, // tp_methods 0, // tp_members 0, // tp_getset 0, // tp_base @@ -181,7 +181,7 @@ class ContextExt { private: uast::Context *ctx; - // toPy allocates a new NodeExt with a specified handle. + // toPy allocates a new PyNodeExt with a specified handle. // Returns a new reference. PyObject* toPy(NodeHandle node) { if (node == 0) Py_RETURN_NONE; @@ -194,7 +194,7 @@ class ContextExt { return (PyObject*)pyObj; } - // toHandle casts an object to NodeExt and returns its handle. + // toHandle casts an object to PyNodeExt and returns its handle. // Borrows the reference. NodeHandle toHandle(PyObject* obj) { if (!obj || obj == Py_None) return 0; @@ -903,9 +903,9 @@ extern "C" static PyTypeObject PyContextType = { PyVarObject_HEAD_INIT(nullptr, 0) "pyuast.Context", // tp_name - sizeof(PyContext), // tp_basicsize + sizeof(PyContext), // tp_basicsize 0, // tp_itemsize - PyContext_dealloc, // tp_dealloc + PyContext_dealloc, // tp_dealloc 0, // tp_print 0, // tp_getattr 0, // tp_setattr @@ -928,7 +928,7 @@ extern "C" 0, // tp_weaklistoffset 0, // tp_iter: __iter()__ method 0, // tp_iternext: next() method - PyContext_methods, // tp_methods + PyContext_methods, // tp_methods 0, // tp_members 0, // tp_getset 0, // tp_base From 9e8873355f7a6004b71c46ae78c25d4fc777883d Mon Sep 17 00:00:00 2001 From: Alexander Date: Wed, 17 Oct 2018 12:59:51 +0300 Subject: [PATCH 09/48] apply review feedback --- bblfsh/pyuast.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index dc143a7..ea89328 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -964,7 +964,7 @@ static PyObject *PyUastIter_new(PyObject *self, PyObject *args) { return ctx->Iterate(obj, (TreeOrder)order, true); } -static PyObject *PyContextDecode(PyObject *self, PyObject *args) { +static PyObject *PyContextExt_decode(PyObject *self, PyObject *args) { PyObject *obj = nullptr; UastFormat format = UAST_BINARY; // TODO: make it a kwarg From 98b3ef863be4972834159053bc1ecc669a25aa20 Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Wed, 17 Oct 2018 15:06:53 +0300 Subject: [PATCH 10/48] fix replace Signed-off-by: Denys Smirnov --- bblfsh/pyuast.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index ea89328..0a46c06 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -424,7 +424,7 @@ class Node : public uast::Node { // Borrows the reference. static NodeKind kindOf(PyObject* obj) { if (!obj || obj == Py_None) { - return NODE_nullptr; + return NODE_NULL; } else if (PyUnicode_Check(obj)) { return NODE_STRING; } else if (PyLong_Check(obj)) { From 96abf64cbfcdc2459d1c9a316c34ff3e1fc04342 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 18 Oct 2018 14:05:32 +0200 Subject: [PATCH 11/48] Build fixes, comment out v1 things, some other adjustements Signed-off-by: Juanjo Alvarez --- bblfsh/__init__.py | 2 +- bblfsh/aliases.py | 60 ++++++++++++++++++++------------ bblfsh/client.py | 82 ++++++++++++++++++++++---------------------- bblfsh/pyuast.cc | 12 +++---- bblfsh/sdkversion.py | 2 +- setup.py | 80 +++++++++++++++++++++++++++++------------- 6 files changed, 144 insertions(+), 94 deletions(-) diff --git a/bblfsh/__init__.py b/bblfsh/__init__.py index 0044f36..3e89a6e 100644 --- a/bblfsh/__init__.py +++ b/bblfsh/__init__.py @@ -1,5 +1,5 @@ from bblfsh.client import BblfshClient -from bblfsh.pyuast import filter, filter_bool, filter_number, filter_string, iterator +from bblfsh.pyuast import decode, iterator, uast from bblfsh.aliases import * class TreeOrder: diff --git a/bblfsh/aliases.py b/bblfsh/aliases.py index 77aec68..2ebbeed 100644 --- a/bblfsh/aliases.py +++ b/bblfsh/aliases.py @@ -1,6 +1,4 @@ -__all__ = ["DESCRIPTOR", "Node", "ParseResponse", "NativeParseResponse", - "ParseRequest", "NativeParseRequest", "VersionRequest", - "ProtocolServiceStub"] +__all__ = ["DESCRIPTOR", "ParseResponse", "ParseRequest"] import importlib @@ -12,35 +10,53 @@ DESCRIPTOR = importlib.import_module( "bblfsh.gopkg.in.bblfsh.sdk.%s.uast.generated_pb2" % VERSION).DESCRIPTOR -Node = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.%s.uast.generated_pb2" % VERSION).Node +# Node = importlib.import_module( + # "bblfsh.gopkg.in.bblfsh.sdk.%s.uast.generated_pb2" % VERSION).Node ParseResponse = importlib.import_module( "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION).ParseResponse -NativeParseResponse = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION - ).NativeParseResponse +ParseError = importlib.import_module( + "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION).ParseError + +Mode = importlib.import_module( + "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION).Mode + +# NativeParseResponse = importlib.import_module( + # "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION + # ).NativeParseResponse ParseRequest = importlib.import_module( "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION).ParseRequest -NativeParseRequest = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION - ).NativeParseRequest +# NativeParseRequest = importlib.import_module( + # "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION + # ).NativeParseRequest + +# VersionRequest = importlib.import_module( + # "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION + # ).VersionRequest -VersionRequest = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION - ).VersionRequest +# VersionRequest = importlib.import_module( + # "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION + # ).Driver -SupportedLanguagesRequest = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION - ).SupportedLanguagesRequest +# SupportedLanguagesRequest = importlib.import_module( + # "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION + # ).SupportedLanguagesRequest -SupportedLanguagesResponse = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION - ).SupportedLanguagesResponse +# SupportedLanguagesResponse = importlib.import_module( + # "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION + # ).SupportedLanguagesResponse + +# ProtocolServiceStub = importlib.import_module( + # "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2_grpc" % VERSION + # ).ProtocolServiceStub + +DriverStub = importlib.import_module( + "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2_grpc" % VERSION + ).DriverStub -ProtocolServiceStub = importlib.import_module( +DriverServicer = importlib.import_module( "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2_grpc" % VERSION - ).ProtocolServiceStub + ).DriverServicer diff --git a/bblfsh/client.py b/bblfsh/client.py index 91b143e..e045f93 100644 --- a/bblfsh/client.py +++ b/bblfsh/client.py @@ -3,8 +3,8 @@ import grpc -from bblfsh.aliases import (ParseRequest, NativeParseRequest, VersionRequest, - ProtocolServiceStub, SupportedLanguagesRequest, SupportedLanguagesResponse) +from bblfsh.aliases import ParseRequest, DriverStub + from bblfsh.sdkversion import VERSION # The following two insertions fix the broken pb import paths @@ -31,7 +31,7 @@ def __init__(self, endpoint): :type endpoint: str """ self._channel = grpc.insecure_channel(endpoint) - self._stub = ProtocolServiceStub(self._channel) + self._stub = DriverStub(self._channel) @staticmethod def _check_utf8(text): @@ -74,44 +74,44 @@ def parse(self, filename, language=None, contents=None, timeout=None): language=self._scramble_language(language)) return self._stub.Parse(request, timeout=timeout) - def native_parse(self, filename, language=None, contents=None, timeout=None): - """ - Queries the Babelfish server and receives the native AST response for the specified - file. - - :param filename: The path to the file. Can be arbitrary if contents \ - is not None. - :param language: The programming language of the file. Refer to \ - https://doc.bblf.sh/languages.html for the list of \ - currently supported languages. None means autodetect. - :param contents: The contents of the file. IF None, it is read from \ - filename. - :param timeout: The request timeout in seconds. - :type filename: str - :type language: str - :type contents: str - :type timeout: float - :return: Native AST object. - """ - - contents = self._get_contents(contents, filename) - request = NativeParseRequest(filename=os.path.basename(filename), - content=contents, - language=self._scramble_language(language)) - return self._stub.NativeParse(request, timeout=timeout) - - def supported_languages(self): - sup_response = self._stub.SupportedLanguages(SupportedLanguagesRequest()) - return sup_response.languages - - def version(self): - """ - Queries the Babelfish server for version and runtime information. - - :return: A dictionary with the keys "version" for the semantic version and - "build" for the build timestamp. - """ - return self._stub.Version(VersionRequest()) + # def native_parse(self, filename, language=None, contents=None, timeout=None): + # """ + # Queries the Babelfish server and receives the native AST response for the specified + # file. + + # :param filename: The path to the file. Can be arbitrary if contents \ + # is not None. + # :param language: The programming language of the file. Refer to \ + # https://doc.bblf.sh/languages.html for the list of \ + # currently supported languages. None means autodetect. + # :param contents: The contents of the file. IF None, it is read from \ + # filename. + # :param timeout: The request timeout in seconds. + # :type filename: str + # :type language: str + # :type contents: str + # :type timeout: float + # :return: Native AST object. + # """ + + # contents = self._get_contents(contents, filename) + # request = NativeParseRequest(filename=os.path.basename(filename), + # content=contents, + # language=self._scramble_language(language)) + # return self._stub.NativeParse(request, timeout=timeout) + + # def supported_languages(self): + # sup_response = self._stub.SupportedLanguages(SupportedLanguagesRequest()) + # return sup_response.languages + + # def version(self): + # """ + # Queries the Babelfish server for version and runtime information. + + # :return: A dictionary with the keys "version" for the semantic version and + # "build" for the build timestamp. + # """ + # return self._stub.Version(VersionRequest()) @staticmethod def _scramble_language(lang): diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index 0a46c06..0a38721 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -499,7 +499,7 @@ class Node : public uast::Node { bool AsBool() { return obj == Py_True; } - + size_t Size() { size_t sz = 0; if (PyList_Check(obj)) { @@ -514,7 +514,7 @@ class Node : public uast::Node { assert(int64_t(sz) >= 0); return sz; } - + std::string* KeyAt(size_t i) { if (obj == Py_None) { return nullptr; @@ -540,7 +540,7 @@ class Node : public uast::Node { return lookupOrCreate(val); // new ref } - + void SetValue(size_t i, Node* val) { PyObject* v = nullptr; if (val && val->obj) { @@ -571,7 +571,7 @@ class Context; class Interface : public uast::NodeCreator { private: std::map obj2node; - + static PyObject* newBool(bool v) { if (v) { Py_RETURN_TRUE; @@ -861,7 +861,7 @@ typedef struct { } PyContext; static void PyContext_dealloc(PyObject *self) { - delete(((PyUast *)self)->p); + delete(((PyContext *)self)->p); // TODO: delete self? } @@ -1006,7 +1006,7 @@ static PyObject *PyContext_new(PyObject *self, PyObject *args) { static PyMethodDef extension_methods[] = { {"iterator", PyUastIter_new, METH_VARARGS, "Get an iterator over a node"}, - {"decode", PyContextDecode, METH_VARARGS, "Decode UAST from a byte array"}, + {"decode", PyContextExt_decode, METH_VARARGS, "Decode UAST from a byte array"}, {"uast", PyContext_new, METH_VARARGS, "Creates a new UAST context"}, {nullptr, nullptr, 0, nullptr} }; diff --git a/bblfsh/sdkversion.py b/bblfsh/sdkversion.py index bc55b42..9ce0bd7 100644 --- a/bblfsh/sdkversion.py +++ b/bblfsh/sdkversion.py @@ -1 +1 @@ -VERSION = "v1" +VERSION = "v2" diff --git a/setup.py b/setup.py index 94481b2..4574f87 100644 --- a/setup.py +++ b/setup.py @@ -13,24 +13,21 @@ from setuptools.command.build_ext import build_ext VERSION = "3.0.0" -LIBUAST_VERSION = "v2.0.0-rc1" -SDK_VERSION = "v2.2.3" +LIBUAST_VERSION = "v3.0.0-rc2" +SDK_VERSION = "v2.3.0" SDK_MAJOR = SDK_VERSION.split('.')[0] FORMAT_ARGS = globals() -# For debugging libuast-client interactions, set to True in production! -GET_LIBUAST = False -if not GET_LIBUAST: - print("WARNING: not retrieving libuast, using local version") - -if os.getenv("CC") is None: - os.environ["CC"] = "g++" # yes, g++ - otherwise distutils will use gcc -std=c++11 and explode -if os.getenv("CXX") is None: - os.environ["CXX"] = "g++" libraries = ['uast'] sources = ["bblfsh/pyuast.cc"] log = logging.getLogger("setup.py") +# For debugging libuast-client interactions, set to True in production! +GET_LIBUAST = True +if not GET_LIBUAST: + log.warning("WARNING: not retrieving libuast, using local version") + + class CustomBuildExt(build_ext): def run(self): @@ -47,6 +44,13 @@ def run(self): def j(*paths): return os.path.join(*paths) +def runorexit(cmd, errmsg = ""): + log.info(">>", cmd) + if os.system(cmd) != 0: + sep = ". " if errmsg else "" + log.error(errmsg + sep + "Failed command: '%s'" % cmd) + sys.exit(1) + def mkdir(path): path = path.format(**FORMAT_ARGS) @@ -63,7 +67,7 @@ def rimraf(path): def mv(src, dst): src = src.format(**FORMAT_ARGS) dst = dst.format(**FORMAT_ARGS) - log.info("mv %s %s", src, dst) + log.info(">> mv %s %s", src, dst) shutil.rmtree(dst, ignore_errors=True) os.rename(src, dst) @@ -71,7 +75,7 @@ def mv(src, dst): def cp(src, dst): src = src.format(**FORMAT_ARGS) dst = dst.format(**FORMAT_ARGS) - log.info("cp -p %s %s", src, dst) + log.info(">> cp %s %s", src, dst) shutil.rmtree(dst, ignore_errors=True) shutil.copy2(src, dst) @@ -79,14 +83,14 @@ def cp(src, dst): def cpr(src, dst): src = src.format(**FORMAT_ARGS) dst = dst.format(**FORMAT_ARGS) - log.info("cp -pr %s %s", src, dst) + log.info(">> cp -pr %s %s", src, dst) if os.path.isdir(dst): shutil.rmtree(dst) shutil.copytree(src, dst, symlinks=True) def untar_url(url, path="."): - log.info("tar xf " + url) + log.info(">> tar xf " + url) with urlopen(url) as response: response.tell = lambda: 0 # tarfile calls it only once in the beginning with tarfile.open(fileobj=response, mode=("r:" + url.rsplit(".", 1)[-1])) as tar: @@ -130,20 +134,44 @@ def get_libuast(): if not GET_LIBUAST: return - untar_url( - "https://github.com/bblfsh/libuast/archive/{LIBUAST_VERSION}/{LIBUAST_VERSION}.tar.gz" - .format(**FORMAT_ARGS)) - mv("libuast-" + LIBUAST_VERSION.replace("v", ""), "libuast") - cpr(j("libuast", "src"), j("bblfsh", "libuast")) - rimraf("libuast") + gopath = os.environ["GOPATH"] + if not gopath: + log.error("GOPATH must be set") + sys.exit(1) + + mkdir(j("bblfsh", "libuast")) + + # Retrieve libuast + runorexit("go get -u -v github.com/bblfsh/libuast") + + # Build it + py_dir = os.getcwd() + libuast_path = j(gopath, "src", "github.com", "bblfsh", "libuast") + log.info(">> cd ", libuast_path) + libuast_dir = j(gopath, "src", "github.com", "bblfsh", "libuast") + os.chdir(libuast_dir) + runorexit("make build") + + # Generate libuast.h + local_libuast = j("bblfsh", "libuast") + mkdir(local_libuast) + runorexit("go run gen_header.go -o libuast.h") + + # Copy the files + os.chdir(py_dir) + cp(j(libuast_path, "src", "libuast.hpp"), j(local_libuast, "libuast.hpp")) + cp(j(libuast_path, "libuast.h"), j(local_libuast, "libuast.h")) + + for i in ("helpers.c", "uast_go.h", "uast.h"): + cp(j(libuast_path, "src", i), j(local_libuast, i)) def proto_download(): untar_url("https://github.com/bblfsh/sdk/archive/%s.tar.gz" % SDK_VERSION) sdkdir = "sdk-" + SDK_VERSION[1:] destdir = j("proto", "gopkg.in", "bblfsh", "sdk.{SDK_MAJOR}") - cp(j(sdkdir, "protocol", "generated.proto"), j(destdir, "protocol", "generated.proto")) - cp(j(sdkdir, "uast", "generated.proto"), j(destdir, "uast", "generated.proto")) + cp(j(sdkdir, "protocol", "driver.proto"), j(destdir, "protocol", "generated.proto")) + cp(j(sdkdir, "uast", "role", "generated.proto"), j(destdir, "uast", "generated.proto")) rimraf(sdkdir) @@ -174,20 +202,26 @@ def patch(file, *patchers): def protoc(proto_file, grpc=False): main_args = [protoc_module.__file__, "--python_out=bblfsh"] target_dir = j("bblfsh", *os.path.dirname(proto_file).split(".")) + if grpc: # using "." creates "gopkg.in" instead of "gopkg/in" directories main_args += ["--grpc_python_out=" + target_dir] + main_args += ["-Iproto", sysinclude, j("proto", proto_file)] log.info("%s -m grpc.tools.protoc " + " ".join(main_args[1:]), sys.executable) protoc_module.main(main_args) + if grpc: # we need to move the file back to grpc_out grpc_garbage_dir = None target = j(target_dir, "generated_pb2_grpc.py") + for root, dirnames, filenames in os.walk(target_dir): for filename in filenames: + if filename == "generated_pb2_grpc.py" and grpc_garbage_dir is not None: mv(j(root, filename), target) + if os.path.samefile(root, target_dir): grpc_garbage_dir = j(root, dirnames[0]) rimraf(grpc_garbage_dir) From 151e61c574b078fd9c3e6117f2435b16548cd9b0 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Mon, 22 Oct 2018 16:57:16 +0200 Subject: [PATCH 12/48] Recover grpc sdk v1 protocol for some grpc objects Signed-off-by: Juanjo Alvarez --- bblfsh/aliases.py | 54 ++++++++++++++------------------------ setup.py | 66 ++++++++++++++++++++++++++++++----------------- 2 files changed, 63 insertions(+), 57 deletions(-) diff --git a/bblfsh/aliases.py b/bblfsh/aliases.py index 2ebbeed..832bdfe 100644 --- a/bblfsh/aliases.py +++ b/bblfsh/aliases.py @@ -1,5 +1,3 @@ -__all__ = ["DESCRIPTOR", "ParseResponse", "ParseRequest"] - import importlib from bblfsh.sdkversion import VERSION @@ -8,55 +6,43 @@ # we import by string DESCRIPTOR = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.%s.uast.generated_pb2" % VERSION).DESCRIPTOR + "bblfsh.gopkg.in.bblfsh.sdk.v2.uast.generated_pb2").DESCRIPTOR # Node = importlib.import_module( - # "bblfsh.gopkg.in.bblfsh.sdk.%s.uast.generated_pb2" % VERSION).Node + # "bblfsh.gopkg.in.bblfsh.sdk.v2.uast.generated_pb2").Node ParseResponse = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION).ParseResponse + "bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2").ParseResponse ParseError = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION).ParseError + "bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2").ParseError Mode = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION).Mode - -# NativeParseResponse = importlib.import_module( - # "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION - # ).NativeParseResponse + "bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2").Mode ParseRequest = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION).ParseRequest - -# NativeParseRequest = importlib.import_module( - # "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION - # ).NativeParseRequest - -# VersionRequest = importlib.import_module( - # "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION - # ).VersionRequest + "bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2").ParseRequest -# VersionRequest = importlib.import_module( - # "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION - # ).Driver +VersionRequest = importlib.import_module( + "bblfsh.gopkg.in.bblfsh.sdk.v1.protocol.generated_pb2" + ).VersionRequest -# SupportedLanguagesRequest = importlib.import_module( - # "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION - # ).SupportedLanguagesRequest +SupportedLanguagesRequest = importlib.import_module( + "bblfsh.gopkg.in.bblfsh.sdk.v1.protocol.generated_pb2" + ).SupportedLanguagesRequest -# SupportedLanguagesResponse = importlib.import_module( - # "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION - # ).SupportedLanguagesResponse +SupportedLanguagesResponse = importlib.import_module( + "bblfsh.gopkg.in.bblfsh.sdk.v1.protocol.generated_pb2" + ).SupportedLanguagesResponse -# ProtocolServiceStub = importlib.import_module( - # "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2_grpc" % VERSION - # ).ProtocolServiceStub +ProtocolServiceStub = importlib.import_module( + "bblfsh.gopkg.in.bblfsh.sdk.v1.protocol.generated_pb2_grpc" + ).ProtocolServiceStub DriverStub = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2_grpc" % VERSION + "bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2_grpc" ).DriverStub DriverServicer = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2_grpc" % VERSION + "bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2_grpc" ).DriverServicer diff --git a/setup.py b/setup.py index 4574f87..d45f62f 100644 --- a/setup.py +++ b/setup.py @@ -14,8 +14,10 @@ VERSION = "3.0.0" LIBUAST_VERSION = "v3.0.0-rc2" -SDK_VERSION = "v2.3.0" -SDK_MAJOR = SDK_VERSION.split('.')[0] +SDK_V1_VERSION = "v1.16.1" +SDK_V1_MAJOR = SDK_V1_VERSION.split('.')[0] +SDK_V2_VERSION = "v2.3.0" +SDK_V2_MAJOR = SDK_V2_VERSION.split('.')[0] FORMAT_ARGS = globals() libraries = ['uast'] @@ -102,15 +104,15 @@ def call(*cmd): subprocess.check_call(cmd) -def create_dirs(): - mkdir(j("proto", "gopkg.in", "bblfsh", "sdk.{SDK_MAJOR}", "protocol")) - mkdir(j("proto", "gopkg.in", "bblfsh", "sdk.{SDK_MAJOR}", "uast")) - mkdir(j("bblfsh", "gopkg", "in", "bblfsh", "sdk", SDK_MAJOR, "protocol")) - mkdir(j("bblfsh", "gopkg", "in", "bblfsh", "sdk", SDK_MAJOR, "uast")) +def create_dirs(sdk_major): + mkdir(j("proto", "gopkg.in", "bblfsh", "sdk.%s" % sdk_major, "protocol")) + mkdir(j("proto", "gopkg.in", "bblfsh", "sdk.%s" % sdk_major, "uast")) + mkdir(j("bblfsh", "gopkg", "in", "bblfsh", "sdk", sdk_major, "protocol")) + mkdir(j("bblfsh", "gopkg", "in", "bblfsh", "sdk", sdk_major, "uast")) mkdir(j("bblfsh", "github", "com", "gogo", "protobuf", "gogoproto")) -def create_inits(): +def create_inits(sdk_major): init_files = [ j("bblfsh", "github", "__init__.py"), j("bblfsh", "github", "com", "__init__.py"), @@ -121,9 +123,9 @@ def create_inits(): j("bblfsh", "gopkg", "in", "__init__.py"), j("bblfsh", "gopkg", "in", "bblfsh", "__init__.py"), j("bblfsh", "gopkg", "in", "bblfsh", "sdk", "__init__.py"), - j("bblfsh", "gopkg", "in", "bblfsh", "sdk", SDK_MAJOR, "__init__.py"), - j("bblfsh", "gopkg", "in", "bblfsh", "sdk", SDK_MAJOR, "uast", "__init__.py"), - j("bblfsh", "gopkg", "in", "bblfsh", "sdk", SDK_MAJOR, "protocol", "__init__.py"), + j("bblfsh", "gopkg", "in", "bblfsh", "sdk", sdk_major, "__init__.py"), + j("bblfsh", "gopkg", "in", "bblfsh", "sdk", sdk_major, "uast", "__init__.py"), + j("bblfsh", "gopkg", "in", "bblfsh", "sdk", sdk_major, "protocol", "__init__.py"), ] for f in init_files: @@ -165,16 +167,23 @@ def get_libuast(): for i in ("helpers.c", "uast_go.h", "uast.h"): cp(j(libuast_path, "src", i), j(local_libuast, i)) +def proto_download_v1(): + url ="https://github.com/bblfsh/sdk/archive/%s.tar.gz" % SDK_V1_VERSION + untar_url(url) + sdkdir = "sdk-" + SDK_V1_VERSION[1:] + destdir = j("proto", "gopkg.in", "bblfsh", "sdk.{SDK_V1_MAJOR}") + cp(j(sdkdir, "protocol", "generated.proto"), j(destdir, "protocol", "generated.proto")) + cp(j(sdkdir, "uast", "generated.proto"), j(destdir, "uast", "generated.proto")) + rimraf(sdkdir) -def proto_download(): - untar_url("https://github.com/bblfsh/sdk/archive/%s.tar.gz" % SDK_VERSION) - sdkdir = "sdk-" + SDK_VERSION[1:] - destdir = j("proto", "gopkg.in", "bblfsh", "sdk.{SDK_MAJOR}") +def proto_download_v2(): + untar_url("https://github.com/bblfsh/sdk/archive/%s.tar.gz" % SDK_V2_VERSION) + sdkdir = "sdk-" + SDK_V2_VERSION[1:] + destdir = j("proto", "gopkg.in", "bblfsh", "sdk.{SDK_V2_MAJOR}") cp(j(sdkdir, "protocol", "driver.proto"), j(destdir, "protocol", "generated.proto")) cp(j(sdkdir, "uast", "role", "generated.proto"), j(destdir, "uast", "generated.proto")) rimraf(sdkdir) - def proto_compile(): sysinclude = "-I" + pkg_resources.resource_filename("grpc_tools", "_proto") from grpc.tools import protoc as protoc_module @@ -224,7 +233,9 @@ def protoc(proto_file, grpc=False): if os.path.samefile(root, target_dir): grpc_garbage_dir = j(root, dirnames[0]) - rimraf(grpc_garbage_dir) + + if grpc_garbage_dir: + rimraf(grpc_garbage_dir) # grpc ignores "in" and we need to patch the import path def grpc_replacer(match): @@ -246,18 +257,27 @@ def importlib_import_replacer(match): (from_import_re, from_import_replacer), (importlib_import_re, importlib_import_replacer)) - protoc(j("gopkg.in", "bblfsh", "sdk." + SDK_MAJOR, "protocol", "generated.proto"), True) protoc(j("github.com", "gogo", "protobuf", "gogoproto", "gogo.proto")) - protoc(j("gopkg.in", "bblfsh", "sdk." + SDK_MAJOR, "uast", "generated.proto")) + + protoc(j("gopkg.in", "bblfsh", "sdk." + SDK_V1_MAJOR, "protocol", "generated.proto"), True) + protoc(j("gopkg.in", "bblfsh", "sdk." + SDK_V1_MAJOR, "uast", "generated.proto")) + + protoc(j("gopkg.in", "bblfsh", "sdk." + SDK_V2_MAJOR, "uast", "generated.proto")) + protoc(j("gopkg.in", "bblfsh", "sdk." + SDK_V2_MAJOR, "protocol", "generated.proto"), True) def do_get_deps(): get_libuast() - create_dirs() - create_inits() - proto_download() - proto_compile() + create_dirs(SDK_V1_MAJOR) + create_dirs(SDK_V2_MAJOR) + + create_inits(SDK_V1_MAJOR) + create_inits(SDK_V2_MAJOR) + + proto_download_v1() + proto_download_v2() + proto_compile() def clean(): rimraf("gopkg.in") From 7f583ea0de7be4652b3599b4c7521d9e056935ed Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Mon, 22 Oct 2018 19:19:51 +0200 Subject: [PATCH 13/48] Forward port the aliases refactor by Vadim Signed-off-by: Juanjo Alvarez --- bblfsh/aliases.py | 54 +++++++++++++++-------------------------------- 1 file changed, 17 insertions(+), 37 deletions(-) diff --git a/bblfsh/aliases.py b/bblfsh/aliases.py index 832bdfe..43c5c16 100644 --- a/bblfsh/aliases.py +++ b/bblfsh/aliases.py @@ -5,44 +5,24 @@ # "in" is a reserved keyword in Python thus can't be used as package name, so # we import by string -DESCRIPTOR = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.v2.uast.generated_pb2").DESCRIPTOR +uast_v2_module = importlib.import_module("bblfsh.gopkg.in.bblfsh.sdk.v2.uast.generated_pb2") +protocol_v2_module = importlib.import_module("bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2") +protocol_grpc_v2_module = importlib.import_module("bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2_grpc") +protocol_v1_module = importlib.import_module("bblfsh.gopkg.in.bblfsh.sdk.v1.protocol.generated_pb2") +protocol_grpc_v1_module = importlib.import_module("bblfsh.gopkg.in.bblfsh.sdk.v1.protocol.generated_pb2_grpc") # Node = importlib.import_module( # "bblfsh.gopkg.in.bblfsh.sdk.v2.uast.generated_pb2").Node -ParseResponse = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2").ParseResponse - -ParseError = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2").ParseError - -Mode = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2").Mode - -ParseRequest = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2").ParseRequest - -VersionRequest = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.v1.protocol.generated_pb2" - ).VersionRequest - -SupportedLanguagesRequest = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.v1.protocol.generated_pb2" - ).SupportedLanguagesRequest - -SupportedLanguagesResponse = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.v1.protocol.generated_pb2" - ).SupportedLanguagesResponse - -ProtocolServiceStub = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.v1.protocol.generated_pb2_grpc" - ).ProtocolServiceStub - -DriverStub = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2_grpc" - ).DriverStub - -DriverServicer = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2_grpc" - ).DriverServicer +DESCRIPTOR = uast_v2_module.DESCRIPTOR +ParseRequest = protocol_v2_module.ParseRequest +ParseResponse = protocol_v2_module.ParseResponse +ParseError = protocol_v2_module.ParseError +Mode = protocol_v2_module.Mode +DriverStub = protocol_grpc_v2_module.DriverStub +DriverServicer = protocol_grpc_v2_module.DriverServicer + +VersionRequest = protocol_v1_module.VersionRequest +SupportedLanguagesRequest = protocol_v1_module.SupportedLanguagesRequest +SupportedLanguagesResponse = protocol_v1_module.SupportedLanguagesResponse +ProtocolServiceStub = protocol_grpc_v1_module.ProtocolServiceStub From 6ba57fc0d37c190790052f59b004a1725ee5dfba Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Wed, 24 Oct 2018 15:29:51 +0200 Subject: [PATCH 14/48] Forward port travis changes Signed-off-by: Juanjo Alvarez --- .travis.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.travis.yml b/.travis.yml index cca98f7..f6ab19a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,6 +17,14 @@ install: - pip3 install -r requirements.txt - python3 setup.py --getdeps --log - pip3 install . --upgrade + - if [[ -z "$TRAVIS_TAG" ]]; then exit 0; fi + - echo "[distutils]" > $HOME/.pypirc + - echo "index-servers = " >> $HOME/.pypirc + - echo " pypi" >> $HOME/.pypirc + - echo "[pypi]" >> $HOME/.pypirc + - echo "username=$PYPI_USER" >> $HOME/.pypirc + - echo "password=$PYPI_PASS" >> $home/.pypirc + - python setup.py sdist upload script: - python3 setup.py build_ext -i - python3 -m unittest discover . From a2ca47174d218b7cd335b4c012f6dc21d2ce53e9 Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Tue, 30 Oct 2018 18:57:12 +0200 Subject: [PATCH 15/48] fix pip install Signed-off-by: Denys Smirnov --- setup.py | 35 ++++++++++------------------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/setup.py b/setup.py index d45f62f..1b1388a 100644 --- a/setup.py +++ b/setup.py @@ -14,9 +14,10 @@ VERSION = "3.0.0" LIBUAST_VERSION = "v3.0.0-rc2" +LIBUAST_ARCH = "linux-amd64" SDK_V1_VERSION = "v1.16.1" SDK_V1_MAJOR = SDK_V1_VERSION.split('.')[0] -SDK_V2_VERSION = "v2.3.0" +SDK_V2_VERSION = "v2.5.0" SDK_V2_MAJOR = SDK_V2_VERSION.split('.')[0] FORMAT_ARGS = globals() @@ -136,36 +137,20 @@ def get_libuast(): if not GET_LIBUAST: return - gopath = os.environ["GOPATH"] + gopath = os.environ.get("GOPATH") + if not gopath: + gopath = subprocess.check_output(['go', 'env', 'GOPATH']).decode("utf-8").strip() if not gopath: log.error("GOPATH must be set") sys.exit(1) - mkdir(j("bblfsh", "libuast")) - - # Retrieve libuast - runorexit("go get -u -v github.com/bblfsh/libuast") - - # Build it py_dir = os.getcwd() - libuast_path = j(gopath, "src", "github.com", "bblfsh", "libuast") - log.info(">> cd ", libuast_path) - libuast_dir = j(gopath, "src", "github.com", "bblfsh", "libuast") - os.chdir(libuast_dir) - runorexit("make build") - - # Generate libuast.h - local_libuast = j("bblfsh", "libuast") + local_libuast = j(py_dir, "bblfsh", "libuast") mkdir(local_libuast) - runorexit("go run gen_header.go -o libuast.h") - - # Copy the files - os.chdir(py_dir) - cp(j(libuast_path, "src", "libuast.hpp"), j(local_libuast, "libuast.hpp")) - cp(j(libuast_path, "libuast.h"), j(local_libuast, "libuast.h")) - for i in ("helpers.c", "uast_go.h", "uast.h"): - cp(j(libuast_path, "src", i), j(local_libuast, i)) + # Retrieve libuast + untar_url("https://github.com/bblfsh/libuast/releases/download/%s/libuast-%s.tar.gz" % (LIBUAST_VERSION, LIBUAST_ARCH)) + mv(LIBUAST_ARCH, local_libuast) def proto_download_v1(): url ="https://github.com/bblfsh/sdk/archive/%s.tar.gz" % SDK_V1_VERSION @@ -303,7 +288,7 @@ def main(): libuast_module = Extension( "bblfsh.pyuast", libraries=libraries, - library_dirs=["/usr/lib", "/usr/local/lib", "."], + library_dirs=["/usr/lib", "/usr/local/lib", j("bblfsh", "libuast")], extra_compile_args=["-std=c++11"], include_dirs=[j("bblfsh", "libuast"), "/usr/local/include", "/usr/include"], sources=sources) From e308038281b9ffb56bc253cea866b5c62ee351ec Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Tue, 30 Oct 2018 19:39:31 +0200 Subject: [PATCH 16/48] update the client to use both protocols Signed-off-by: Denys Smirnov --- bblfsh/aliases.py | 38 ++++++++++++---------- bblfsh/client.py | 75 ++++++++++++++++++-------------------------- bblfsh/sdkversion.py | 1 - 3 files changed, 51 insertions(+), 63 deletions(-) delete mode 100644 bblfsh/sdkversion.py diff --git a/bblfsh/aliases.py b/bblfsh/aliases.py index 2ca12a0..43c5c16 100644 --- a/bblfsh/aliases.py +++ b/bblfsh/aliases.py @@ -4,21 +4,25 @@ # "in" is a reserved keyword in Python thus can't be used as package name, so # we import by string -uast_module = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.%s.uast.generated_pb2" % VERSION) -protocol_module = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2" % VERSION) -protocol_grpc_module = importlib.import_module( - "bblfsh.gopkg.in.bblfsh.sdk.%s.protocol.generated_pb2_grpc" % VERSION) -DESCRIPTOR = uast_module.DESCRIPTOR -Node = uast_module.Node -Position = uast_module.Position -ParseResponse = protocol_module.ParseResponse -NativeParseResponse = protocol_module.NativeParseResponse -ParseRequest = protocol_module.ParseRequest -NativeParseRequest = protocol_module.NativeParseRequest -VersionRequest = protocol_module.VersionRequest -SupportedLanguagesRequest = protocol_module.SupportedLanguagesRequest -SupportedLanguagesResponse = protocol_module.SupportedLanguagesResponse -ProtocolServiceStub = protocol_grpc_module.ProtocolServiceStub \ No newline at end of file +uast_v2_module = importlib.import_module("bblfsh.gopkg.in.bblfsh.sdk.v2.uast.generated_pb2") +protocol_v2_module = importlib.import_module("bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2") +protocol_grpc_v2_module = importlib.import_module("bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2_grpc") +protocol_v1_module = importlib.import_module("bblfsh.gopkg.in.bblfsh.sdk.v1.protocol.generated_pb2") +protocol_grpc_v1_module = importlib.import_module("bblfsh.gopkg.in.bblfsh.sdk.v1.protocol.generated_pb2_grpc") + +# Node = importlib.import_module( + # "bblfsh.gopkg.in.bblfsh.sdk.v2.uast.generated_pb2").Node + +DESCRIPTOR = uast_v2_module.DESCRIPTOR +ParseRequest = protocol_v2_module.ParseRequest +ParseResponse = protocol_v2_module.ParseResponse +ParseError = protocol_v2_module.ParseError +Mode = protocol_v2_module.Mode +DriverStub = protocol_grpc_v2_module.DriverStub +DriverServicer = protocol_grpc_v2_module.DriverServicer + +VersionRequest = protocol_v1_module.VersionRequest +SupportedLanguagesRequest = protocol_v1_module.SupportedLanguagesRequest +SupportedLanguagesResponse = protocol_v1_module.SupportedLanguagesResponse +ProtocolServiceStub = protocol_grpc_v1_module.ProtocolServiceStub diff --git a/bblfsh/client.py b/bblfsh/client.py index e045f93..cc85fab 100644 --- a/bblfsh/client.py +++ b/bblfsh/client.py @@ -3,13 +3,13 @@ import grpc -from bblfsh.aliases import ParseRequest, DriverStub +import bblfsh.pyuast -from bblfsh.sdkversion import VERSION +from bblfsh.aliases import ParseRequest, DriverStub, ProtocolServiceStub, VersionRequest, SupportedLanguagesRequest # The following two insertions fix the broken pb import paths -sys.path.insert(0, os.path.join(os.path.dirname(__file__), - "gopkg/in/bblfsh/sdk/%s/protocol" % VERSION)) +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "gopkg/in/bblfsh/sdk/v1/protocol")) +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "gopkg/in/bblfsh/sdk/v2/protocol")) sys.path.insert(0, os.path.dirname(__file__)) @@ -31,7 +31,8 @@ def __init__(self, endpoint): :type endpoint: str """ self._channel = grpc.insecure_channel(endpoint) - self._stub = DriverStub(self._channel) + self._stub_v1 = ProtocolServiceStub(self._channel) + self._stub_v2 = DriverStub(self._channel) @staticmethod def _check_utf8(text): @@ -48,7 +49,7 @@ def _get_contents(contents, filename): BblfshClient._check_utf8(contents) return contents - def parse(self, filename, language=None, contents=None, timeout=None): + def parse(self, filename, language=None, contents=None, mode=None, raw=False, timeout=None): """ Queries the Babelfish server and receives the UAST response for the specified file. @@ -60,6 +61,8 @@ def parse(self, filename, language=None, contents=None, timeout=None): currently supported languages. None means autodetect. :param contents: The contents of the file. IF None, it is read from \ filename. + :param mode: UAST transformation mode. + :param raw: Return raw binary UAST without decoding it. :param timeout: The request timeout in seconds. :type filename: str :type language: str @@ -71,47 +74,29 @@ def parse(self, filename, language=None, contents=None, timeout=None): contents = self._get_contents(contents, filename) request = ParseRequest(filename=os.path.basename(filename), content=contents, + mode=mode, language=self._scramble_language(language)) - return self._stub.Parse(request, timeout=timeout) - - # def native_parse(self, filename, language=None, contents=None, timeout=None): - # """ - # Queries the Babelfish server and receives the native AST response for the specified - # file. - - # :param filename: The path to the file. Can be arbitrary if contents \ - # is not None. - # :param language: The programming language of the file. Refer to \ - # https://doc.bblf.sh/languages.html for the list of \ - # currently supported languages. None means autodetect. - # :param contents: The contents of the file. IF None, it is read from \ - # filename. - # :param timeout: The request timeout in seconds. - # :type filename: str - # :type language: str - # :type contents: str - # :type timeout: float - # :return: Native AST object. - # """ - - # contents = self._get_contents(contents, filename) - # request = NativeParseRequest(filename=os.path.basename(filename), - # content=contents, - # language=self._scramble_language(language)) - # return self._stub.NativeParse(request, timeout=timeout) - - # def supported_languages(self): - # sup_response = self._stub.SupportedLanguages(SupportedLanguagesRequest()) - # return sup_response.languages - - # def version(self): - # """ - # Queries the Babelfish server for version and runtime information. - - # :return: A dictionary with the keys "version" for the semantic version and + response = self._stub_v2.Parse(request, timeout=timeout) + """ + TODO: return detected language + TODO: handle syntax errors + """ + if raw: + return response.uast + return pyuast.decode(response.uast, 0) + + def supported_languages(self): + sup_response = self._stub_v1.SupportedLanguages(SupportedLanguagesRequest()) + return sup_response.languages + + def version(self): + """ + Queries the Babelfish server for version and runtime information. + + :return: A dictionary with the keys "version" for the semantic version and # "build" for the build timestamp. - # """ - # return self._stub.Version(VersionRequest()) + """ + return self._stub_v1.Version(VersionRequest()) @staticmethod def _scramble_language(lang): diff --git a/bblfsh/sdkversion.py b/bblfsh/sdkversion.py deleted file mode 100644 index 9ce0bd7..0000000 --- a/bblfsh/sdkversion.py +++ /dev/null @@ -1 +0,0 @@ -VERSION = "v2" From 0d675e1f4e6a8697cb08b658f7b35b03e66aafe7 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Wed, 31 Oct 2018 15:29:40 +0100 Subject: [PATCH 17/48] Remove unused and broken import Signed-off-by: Juanjo Alvarez --- bblfsh/aliases.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/bblfsh/aliases.py b/bblfsh/aliases.py index 43c5c16..1fe70ec 100644 --- a/bblfsh/aliases.py +++ b/bblfsh/aliases.py @@ -1,7 +1,5 @@ import importlib -from bblfsh.sdkversion import VERSION - # "in" is a reserved keyword in Python thus can't be used as package name, so # we import by string From 91b798bfcc2ebf2ba412ae202421d1cec57b5959 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Wed, 31 Oct 2018 17:12:24 +0100 Subject: [PATCH 18/48] Compile the ext module from an static libuast object Signed-off-by: Juanjo Alvarez --- setup.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/setup.py b/setup.py index 1b1388a..263ccc8 100644 --- a/setup.py +++ b/setup.py @@ -21,25 +21,20 @@ SDK_V2_MAJOR = SDK_V2_VERSION.split('.')[0] FORMAT_ARGS = globals() -libraries = ['uast'] sources = ["bblfsh/pyuast.cc"] log = logging.getLogger("setup.py") # For debugging libuast-client interactions, set to True in production! -GET_LIBUAST = True +# FIXME: change to true +GET_LIBUAST = False if not GET_LIBUAST: log.warning("WARNING: not retrieving libuast, using local version") - class CustomBuildExt(build_ext): def run(self): - global libraries global sources - if "--global-uast" in sys.argv: - libraries.append("uast") - get_libuast() build_ext.run(self) @@ -285,13 +280,24 @@ def main(): clean() sys.exit() + libraries = [] + static_libraries = ["uast"] + static_lib_dir = j("bblfsh", "libuast") + + if sys.platform == 'win32': + # FIXME: untested! + libraries.extend(static_libraries) + extra_objects = [] + else: # POSIX + extra_objects = ['{}/lib{}.a'.format(static_lib_dir, l) for l in static_libraries] + libuast_module = Extension( "bblfsh.pyuast", libraries=libraries, - library_dirs=["/usr/lib", "/usr/local/lib", j("bblfsh", "libuast")], extra_compile_args=["-std=c++11"], - include_dirs=[j("bblfsh", "libuast"), "/usr/local/include", - "/usr/include"], sources=sources) + extra_objects=extra_objects, + include_dirs=[j("bblfsh", "libuast")], + sources=sources) setup( cmdclass = { From acec2192fde9ff3a8167f3d2149b903a6ae338db Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Thu, 1 Nov 2018 21:22:48 +0200 Subject: [PATCH 19/48] enable building the client with static libuast Signed-off-by: Denys Smirnov --- setup.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 263ccc8..59605bf 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ from setuptools.command.build_ext import build_ext VERSION = "3.0.0" -LIBUAST_VERSION = "v3.0.0-rc2" +LIBUAST_VERSION = "v3.0.0-rc3" LIBUAST_ARCH = "linux-amd64" SDK_V1_VERSION = "v1.16.1" SDK_V1_MAJOR = SDK_V1_VERSION.split('.')[0] @@ -25,8 +25,7 @@ log = logging.getLogger("setup.py") # For debugging libuast-client interactions, set to True in production! -# FIXME: change to true -GET_LIBUAST = False +GET_LIBUAST = True if not GET_LIBUAST: log.warning("WARNING: not retrieving libuast, using local version") From 2fd570c4e37d8350ac40ca277e725cd48c965669 Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Thu, 1 Nov 2018 21:23:48 +0200 Subject: [PATCH 20/48] do not free the query string in filter, it seems to be borrowed Signed-off-by: Denys Smirnov --- bblfsh/pyuast.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index 0a38721..dd981e2 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -263,7 +263,7 @@ class ContextExt { } uast::Iterator *it = ctx->Filter(unode, query); - delete(query); + return newIter(it, false); } @@ -316,6 +316,7 @@ static PyObject *PyContextExt_filter(PyContextExt *self, PyObject *args) { char *query = nullptr; if (!PyArg_ParseTuple(args, "Os", &node, &query)) return nullptr; + // TODO: freeing the query leads to a segfault; need to clarify why return self->p->Filter(node, query); } From d05770c21b1019a4927c642681eaf0efb2e1da78 Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Thu, 1 Nov 2018 21:24:39 +0200 Subject: [PATCH 21/48] improve the native Python wrappers and update the readme Signed-off-by: Denys Smirnov --- README.md | 18 ++++++++++-------- bblfsh/client.py | 5 +++-- bblfsh/pyuast.cc | 38 ++++++++++++++++++++++++++------------ 3 files changed, 39 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 9b83307..c3d950b 100644 --- a/README.md +++ b/README.md @@ -48,16 +48,18 @@ Please, read the [getting started](https://doc.bblf.sh/using-babelfish/getting-s import bblfsh client = bblfsh.BblfshClient("0.0.0.0:9432") -uast = client.parse("/path/to/file.py").uast -print(uast) +uast = client.parse("/path/to/file.py") +print(uast.load()) + # "filter' allows you to use XPath queries to filter on result nodes: -print(bblfsh.filter(uast, "//Import[@roleImport and @roleDeclaration]//alias")) +it = uast.filter("//Import[@role='Import' and @role='Declaration']//alias") +for node in it: + print(node.load()) -# filter\_[bool|string|number] must be used when using XPath functions returning -# these types: -print(bblfsh.filter_bool(uast, "boolean(//*[@strtOffset or @endOffset])")) -print(bblfsh.filter_string(uast, "name(//*[1])")) -print(bblfsh.filter_number(uast, "count(//*)")) +# filter must be used when using XPath functions returning these types: +print(uast.filter("boolean(//*[@strtOffset or @endOffset])")) +print(uast.filter("name(//*[1])")) +print(uast.filter("count(//*)")) # You can also iterate on several tree iteration orders: it = bblfsh.iterator(uast, bblfsh.TreeOrder.PRE_ORDER) diff --git a/bblfsh/client.py b/bblfsh/client.py index cc85fab..5d4be34 100644 --- a/bblfsh/client.py +++ b/bblfsh/client.py @@ -3,7 +3,7 @@ import grpc -import bblfsh.pyuast +from bblfsh.pyuast import decode as uast_decode from bblfsh.aliases import ParseRequest, DriverStub, ProtocolServiceStub, VersionRequest, SupportedLanguagesRequest @@ -83,7 +83,8 @@ def parse(self, filename, language=None, contents=None, mode=None, raw=False, ti """ if raw: return response.uast - return pyuast.decode(response.uast, 0) + ctx = uast_decode(response.uast, format=0) + return ctx def supported_languages(self): sup_response = self._stub_v1.SupportedLanguages(SupportedLanguagesRequest()) diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index dd981e2..744060a 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -309,12 +309,20 @@ static PyObject *PyContextExt_root(PyContextExt *self, PyObject *Py_UNUSED(ignor return self->p->RootNode(); } +// PyContextExt_load returns a root node converted to Python object. +// Returns a new reference. +static PyObject *PyContextExt_load(PyContextExt *self, PyObject *Py_UNUSED(ignored)) { + PyObject* root = PyContextExt_root(self, nullptr); + return PyNodeExt_load((PyNodeExt*)root, nullptr); +} + // PyContextExt_filter filters UAST. // Returns a new reference. -static PyObject *PyContextExt_filter(PyContextExt *self, PyObject *args) { - PyObject *node = nullptr; +static PyObject *PyContextExt_filter(PyContextExt *self, PyObject *args, PyObject *kwargs) { + char* kwds[] = {"query", "node", NULL}; char *query = nullptr; - if (!PyArg_ParseTuple(args, "Os", &node, &query)) + PyObject *node = nullptr; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|O", kwds, &query, &node)) return nullptr; // TODO: freeing the query leads to a segfault; need to clarify why return self->p->Filter(node, query); @@ -334,7 +342,10 @@ static PyMethodDef PyContextExt_methods[] = { {"root", (PyCFunction) PyContextExt_root, METH_NOARGS, "Return the root node attached to this query context" }, - {"filter", (PyCFunction) PyContextExt_filter, METH_VARARGS, + {"load", (PyCFunction) PyContextExt_load, METH_NOARGS, + "Load external node to Python" + }, + {"filter", (PyCFunction) PyContextExt_filter, METH_VARARGS | METH_KEYWORDS, "Filter a provided UAST with XPath" }, {"encode", (PyCFunction) PyContextExt_encode, METH_VARARGS, @@ -870,10 +881,11 @@ static PyObject *PyContext_root(PyContext *self, PyObject *Py_UNUSED(ignored)) { return self->p->RootNode(); } -static PyObject *PyContext_filter(PyContext *self, PyObject *args) { - PyObject *node = nullptr; +static PyObject *PyContext_filter(PyContext *self, PyObject *args, PyObject *kwargs) { + char* kwds[] = {"query", "node", NULL}; char *query = nullptr; - if (!PyArg_ParseTuple(args, "Os", &node, &query)) + PyObject *node = nullptr; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|O", kwds, &query, &node)) return nullptr; return self->p->Filter(node, query); } @@ -890,7 +902,7 @@ static PyMethodDef PyContext_methods[] = { {"root", (PyCFunction) PyContext_root, METH_NOARGS, "Return the root node attached to this query context" }, - {"filter", (PyCFunction) PyContext_filter, METH_VARARGS, + {"filter", (PyCFunction) PyContext_filter, METH_VARARGS | METH_KEYWORDS, "Filter a provided UAST with XPath" }, {"encode", (PyCFunction) PyContext_encode, METH_VARARGS, @@ -965,11 +977,12 @@ static PyObject *PyUastIter_new(PyObject *self, PyObject *args) { return ctx->Iterate(obj, (TreeOrder)order, true); } -static PyObject *PyContextExt_decode(PyObject *self, PyObject *args) { +static PyObject *PyContextExt_decode(PyObject *self, PyObject *args, PyObject *kwargs) { + char* kwds[] = {"data", "format", NULL}; PyObject *obj = nullptr; - UastFormat format = UAST_BINARY; // TODO: make it a kwarg + UastFormat format = UAST_BINARY; // TODO: make it an enum - if (!PyArg_ParseTuple(args, "Oi", &obj, &format)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i", kwds, &obj, &format)) return nullptr; Py_buffer buf; @@ -993,6 +1006,7 @@ static PyObject *PyContextExt_decode(PyObject *self, PyObject *args) { } static PyObject *PyContext_new(PyObject *self, PyObject *args) { + // TODO: optionally accept root object if (!PyArg_ParseTuple(args, "")) { return nullptr; } @@ -1007,7 +1021,7 @@ static PyObject *PyContext_new(PyObject *self, PyObject *args) { static PyMethodDef extension_methods[] = { {"iterator", PyUastIter_new, METH_VARARGS, "Get an iterator over a node"}, - {"decode", PyContextExt_decode, METH_VARARGS, "Decode UAST from a byte array"}, + {"decode", (PyCFunction)PyContextExt_decode, METH_VARARGS | METH_KEYWORDS, "Decode UAST from a byte array"}, {"uast", PyContext_new, METH_VARARGS, "Creates a new UAST context"}, {nullptr, nullptr, 0, nullptr} }; From 272acc91bd21fbfd111819a01fda6b530d69e158 Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Fri, 2 Nov 2018 23:29:52 +0200 Subject: [PATCH 22/48] fix error handling in native extension Signed-off-by: Denys Smirnov --- bblfsh/pyuast.cc | 72 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 8 deletions(-) diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index 744060a..a36508a 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -29,6 +29,16 @@ static bool checkError(const Uast* ctx) { } */ +bool isContext(PyObject* obj); + +bool assertNotContext(PyObject* obj) { + if (isContext(obj)) { + PyErr_SetString(PyExc_RuntimeError, "cannot use uast context as a node"); + return false; + } + return true; +} + // ========================================== // External UAST Node (managed by libuast) // ========================================== @@ -249,6 +259,9 @@ class ContextExt { // Iterate iterates over an external UAST tree. // Borrows the reference. PyObject* Iterate(PyObject* node, TreeOrder order){ + if (!assertNotContext(node)) { + return nullptr; + } NodeHandle h = toHandle(node); auto iter = ctx->Iterate(h, order); return newIter(iter, false); @@ -257,6 +270,9 @@ class ContextExt { // Filter queries an external UAST. // Borrows the reference. PyObject* Filter(PyObject* node, char* query){ + if (!assertNotContext(node)) { + return nullptr; + } NodeHandle unode = toHandle(node); if (unode == 0) { unode = ctx->RootNode(); @@ -270,6 +286,9 @@ class ContextExt { // Encode serializes the external UAST. // Borrows the reference. PyObject* Encode(PyObject *node, UastFormat format) { + if (!assertNotContext(node)) { + return nullptr; + } uast::Buffer data = ctx->Encode(toHandle(node), format); return asPyBuffer(data); } @@ -324,8 +343,15 @@ static PyObject *PyContextExt_filter(PyContextExt *self, PyObject *args, PyObjec PyObject *node = nullptr; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|O", kwds, &query, &node)) return nullptr; + + PyObject* it = nullptr; + try { + it = self->p->Filter(node, query); + } catch (const std::exception& e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + } // TODO: freeing the query leads to a segfault; need to clarify why - return self->p->Filter(node, query); + return it; } // PyContextExt_filter serializes UAST. @@ -420,8 +446,10 @@ class Node : public uast::Node { if (value == nullptr || value == Py_None) { return; } - Py_DECREF(type); - Py_DECREF(traceback); + if (type) + Py_DECREF(type); + if (traceback) + Py_DECREF(traceback); PyObject* str = PyObject_Str(value); Py_DECREF(value); @@ -513,15 +541,18 @@ class Node : public uast::Node { } size_t Size() { + if (obj == Py_None) { + return 0; + } size_t sz = 0; if (PyList_Check(obj)) { sz = (size_t)(PyList_Size(obj)); } else { sz = (size_t)(PyObject_Size(obj)); - if (int64_t(sz) == -1) { - checkPyException(); - return 0; // error - } + } + if (int64_t(sz) == -1) { + checkPyException(); + return 0; // error } assert(int64_t(sz) >= 0); return sz; @@ -532,6 +563,9 @@ class Node : public uast::Node { return nullptr; } if (!keys) keys = PyDict_Keys(obj); + if (!keys) { + return nullptr; + } PyObject* key = PyList_GetItem(keys, i); // borrows const char * k = PyUnicode_AsUTF8(key); @@ -818,6 +852,9 @@ class Context { // Iterate enumerates UAST nodes in a specified order. // Creates a new reference. PyObject* Iterate(PyObject* node, TreeOrder order, bool freeCtx){ + if (!assertNotContext(node)) { + return nullptr; + } Node* unode = toNode(node); auto iter = ctx->Iterate(unode, order); return newIter(iter, freeCtx); @@ -826,6 +863,9 @@ class Context { // Filter queries UAST. // Creates a new reference. PyObject* Filter(PyObject* node, std::string query){ + if (!assertNotContext(node)) { + return nullptr; + } Node* unode = toNode(node); if (unode == nullptr) { unode = ctx->RootNode(); @@ -837,6 +877,9 @@ class Context { // Encode serializes UAST. // Creates a new reference. PyObject* Encode(PyObject *node, UastFormat format) { + if (!assertNotContext(node)) { + return nullptr; + } uast::Buffer data = ctx->Encode(toNode(node), format); return asPyBuffer(data); // TODO: this probably won't deallocate the buffer } @@ -887,7 +930,15 @@ static PyObject *PyContext_filter(PyContext *self, PyObject *args, PyObject *kwa PyObject *node = nullptr; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|O", kwds, &query, &node)) return nullptr; - return self->p->Filter(node, query); + + PyObject* it = nullptr; + try { + it = self->p->Filter(node, query); + } catch (const std::exception& e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + } + // TODO: freeing the query leads to a segfault; need to clarify why + return it; } static PyObject *PyContext_encode(PyContext *self, PyObject *args) { @@ -1019,6 +1070,11 @@ static PyObject *PyContext_new(PyObject *self, PyObject *args) { return (PyObject*)pyU; } +bool isContext(PyObject* obj) { + if (!obj || obj == Py_None) return false; + return PyObject_TypeCheck(obj, &PyContextExtType) || PyObject_TypeCheck(obj, &PyContextType); +} + static PyMethodDef extension_methods[] = { {"iterator", PyUastIter_new, METH_VARARGS, "Get an iterator over a node"}, {"decode", (PyCFunction)PyContextExt_decode, METH_VARARGS | METH_KEYWORDS, "Decode UAST from a byte array"}, From 270445bfb896c0b90ec14973efd623115fe6b219 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 8 Nov 2018 10:21:35 +0100 Subject: [PATCH 23/48] Explicit cast to char* to avoid nasty warning with latest G++ Signed-off-by: Juanjo Alvarez --- bblfsh/pyuast.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index a36508a..4f3bda5 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -338,7 +338,7 @@ static PyObject *PyContextExt_load(PyContextExt *self, PyObject *Py_UNUSED(ignor // PyContextExt_filter filters UAST. // Returns a new reference. static PyObject *PyContextExt_filter(PyContextExt *self, PyObject *args, PyObject *kwargs) { - char* kwds[] = {"query", "node", NULL}; + char* kwds[] = {(char*)"query", (char*)"node", NULL}; char *query = nullptr; PyObject *node = nullptr; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|O", kwds, &query, &node)) @@ -925,7 +925,7 @@ static PyObject *PyContext_root(PyContext *self, PyObject *Py_UNUSED(ignored)) { } static PyObject *PyContext_filter(PyContext *self, PyObject *args, PyObject *kwargs) { - char* kwds[] = {"query", "node", NULL}; + char* kwds[] = {(char*)"query", (char*)"node", NULL}; char *query = nullptr; PyObject *node = nullptr; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|O", kwds, &query, &node)) @@ -1029,7 +1029,7 @@ static PyObject *PyUastIter_new(PyObject *self, PyObject *args) { } static PyObject *PyContextExt_decode(PyObject *self, PyObject *args, PyObject *kwargs) { - char* kwds[] = {"data", "format", NULL}; + char* kwds[] = {(char*)"data", (char*)"format", NULL}; PyObject *obj = nullptr; UastFormat format = UAST_BINARY; // TODO: make it an enum From 1f977e482517ff2c5b586323d20c1a212c9ea39a Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 8 Nov 2018 10:44:16 +0100 Subject: [PATCH 24/48] PEP8 Signed-off-by: Juanjo Alvarez --- setup.py | 49 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/setup.py b/setup.py index 59605bf..ebf7a91 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,8 @@ def run(self): def j(*paths): return os.path.join(*paths) -def runorexit(cmd, errmsg = ""): + +def runorexit(cmd, errmsg=""): log.info(">>", cmd) if os.system(cmd) != 0: sep = ". " if errmsg else "" @@ -89,7 +90,8 @@ def cpr(src, dst): def untar_url(url, path="."): log.info(">> tar xf " + url) with urlopen(url) as response: - response.tell = lambda: 0 # tarfile calls it only once in the beginning + # tarfile calls it only once in the beginning + response.tell = lambda: 0 with tarfile.open(fileobj=response, mode=("r:" + url.rsplit(".", 1)[-1])) as tar: tar.extractall(path=path) @@ -133,7 +135,8 @@ def get_libuast(): gopath = os.environ.get("GOPATH") if not gopath: - gopath = subprocess.check_output(['go', 'env', 'GOPATH']).decode("utf-8").strip() + gopath = subprocess.check_output( + ['go', 'env', 'GOPATH']).decode("utf-8").strip() if not gopath: log.error("GOPATH must be set") sys.exit(1) @@ -146,23 +149,31 @@ def get_libuast(): untar_url("https://github.com/bblfsh/libuast/releases/download/%s/libuast-%s.tar.gz" % (LIBUAST_VERSION, LIBUAST_ARCH)) mv(LIBUAST_ARCH, local_libuast) + def proto_download_v1(): - url ="https://github.com/bblfsh/sdk/archive/%s.tar.gz" % SDK_V1_VERSION + url = "https://github.com/bblfsh/sdk/archive/%s.tar.gz" % SDK_V1_VERSION untar_url(url) sdkdir = "sdk-" + SDK_V1_VERSION[1:] destdir = j("proto", "gopkg.in", "bblfsh", "sdk.{SDK_V1_MAJOR}") - cp(j(sdkdir, "protocol", "generated.proto"), j(destdir, "protocol", "generated.proto")) - cp(j(sdkdir, "uast", "generated.proto"), j(destdir, "uast", "generated.proto")) + cp(j(sdkdir, "protocol", "generated.proto"), + j(destdir, "protocol", "generated.proto")) + cp(j(sdkdir, "uast", "generated.proto"), + j(destdir, "uast", "generated.proto")) rimraf(sdkdir) + def proto_download_v2(): - untar_url("https://github.com/bblfsh/sdk/archive/%s.tar.gz" % SDK_V2_VERSION) + untar_url("https://github.com/bblfsh/sdk/archive/%s.tar.gz" + % SDK_V2_VERSION) sdkdir = "sdk-" + SDK_V2_VERSION[1:] destdir = j("proto", "gopkg.in", "bblfsh", "sdk.{SDK_V2_MAJOR}") - cp(j(sdkdir, "protocol", "driver.proto"), j(destdir, "protocol", "generated.proto")) - cp(j(sdkdir, "uast", "role", "generated.proto"), j(destdir, "uast", "generated.proto")) + cp(j(sdkdir, "protocol", "driver.proto"), + j(destdir, "protocol", "generated.proto")) + cp(j(sdkdir, "uast", "role", "generated.proto"), + j(destdir, "uast", "generated.proto")) rimraf(sdkdir) + def proto_compile(): sysinclude = "-I" + pkg_resources.resource_filename("grpc_tools", "_proto") from grpc.tools import protoc as protoc_module @@ -196,7 +207,8 @@ def protoc(proto_file, grpc=False): main_args += ["--grpc_python_out=" + target_dir] main_args += ["-Iproto", sysinclude, j("proto", proto_file)] - log.info("%s -m grpc.tools.protoc " + " ".join(main_args[1:]), sys.executable) + log.info("%s -m grpc.tools.protoc " + + " ".join(main_args[1:]), sys.executable) protoc_module.main(main_args) if grpc: @@ -207,7 +219,8 @@ def protoc(proto_file, grpc=False): for root, dirnames, filenames in os.walk(target_dir): for filename in filenames: - if filename == "generated_pb2_grpc.py" and grpc_garbage_dir is not None: + if filename == "generated_pb2_grpc.py" and\ + grpc_garbage_dir is not None: mv(j(root, filename), target) if os.path.samefile(root, target_dir): @@ -258,6 +271,7 @@ def do_get_deps(): proto_download_v2() proto_compile() + def clean(): rimraf("gopkg.in") rimraf(j("bblfsh", "github")) @@ -267,7 +281,8 @@ def clean(): def main(): - # The --global-uast flag allows to install the python driver using the installed uast library + # The --global-uast flag allows to install the python driver + # using the installed uast library if "--log" in sys.argv: logging.basicConfig(level=logging.INFO) @@ -287,8 +302,9 @@ def main(): # FIXME: untested! libraries.extend(static_libraries) extra_objects = [] - else: # POSIX - extra_objects = ['{}/lib{}.a'.format(static_lib_dir, l) for l in static_libraries] + else: # POSIX + extra_objects = ['{}/lib{}.a'.format( + static_lib_dir, l) for l in static_libraries] libuast_module = Extension( "bblfsh.pyuast", @@ -299,7 +315,7 @@ def main(): sources=sources) setup( - cmdclass = { + cmdclass={ "build_ext": CustomBuildExt, }, name="bblfsh", @@ -313,7 +329,8 @@ def main(): packages=find_packages(), exclude=["bblfsh/test.py"], keywords=["babelfish", "uast"], - install_requires=["grpcio==1.13.0", "grpcio-tools==1.13.0", "docker", "protobuf>=3.4.0"], + install_requires=["grpcio==1.13.0", "grpcio-tools==1.13.0", + "docker", "protobuf>=3.4.0"], package_data={"": ["LICENSE", "README.md"]}, ext_modules=[libuast_module], classifiers=[ From 20890e01a46b821907d517c8eed638354b57b7e3 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 8 Nov 2018 16:26:27 +0100 Subject: [PATCH 25/48] Renamed PyContext to PythonContext to avoid symbol conflict in 3.7+ Signed-off-by: Juanjo Alvarez --- bblfsh/pyuast.cc | 88 ++++++++++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index 4f3bda5..f80e2f5 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -315,29 +315,29 @@ static void PyUastIterExt_dealloc(PyObject *self) { typedef struct { PyObject_HEAD ContextExt *p; -} PyContextExt; +} PythonContextExt; -static void PyContextExt_dealloc(PyObject *self) { - delete(((PyContextExt *)self)->p); +static void PythonContextExt_dealloc(PyObject *self) { + delete(((PythonContextExt *)self)->p); // TODO: delete self? } -// PyContextExt_root returns a root node associated with this context. +// PythonContextExt_root returns a root node associated with this context. // Returns a new reference. -static PyObject *PyContextExt_root(PyContextExt *self, PyObject *Py_UNUSED(ignored)) { +static PyObject *PythonContextExt_root(PythonContextExt *self, PyObject *Py_UNUSED(ignored)) { return self->p->RootNode(); } -// PyContextExt_load returns a root node converted to Python object. +// PythonContextExt_load returns a root node converted to Python object. // Returns a new reference. -static PyObject *PyContextExt_load(PyContextExt *self, PyObject *Py_UNUSED(ignored)) { - PyObject* root = PyContextExt_root(self, nullptr); +static PyObject *PythonContextExt_load(PythonContextExt *self, PyObject *Py_UNUSED(ignored)) { + PyObject* root = PythonContextExt_root(self, nullptr); return PyNodeExt_load((PyNodeExt*)root, nullptr); } -// PyContextExt_filter filters UAST. +// PythonContextExt_filter filters UAST. // Returns a new reference. -static PyObject *PyContextExt_filter(PyContextExt *self, PyObject *args, PyObject *kwargs) { +static PyObject *PythonContextExt_filter(PythonContextExt *self, PyObject *args, PyObject *kwargs) { char* kwds[] = {(char*)"query", (char*)"node", NULL}; char *query = nullptr; PyObject *node = nullptr; @@ -354,9 +354,9 @@ static PyObject *PyContextExt_filter(PyContextExt *self, PyObject *args, PyObjec return it; } -// PyContextExt_filter serializes UAST. +// PythonContextExt_filter serializes UAST. // Returns a new reference. -static PyObject *PyContextExt_encode(PyContextExt *self, PyObject *args) { +static PyObject *PythonContextExt_encode(PythonContextExt *self, PyObject *args) { PyObject *node = nullptr; UastFormat format = UAST_BINARY; // TODO: make it a kwarg and enum if (!PyArg_ParseTuple(args, "Oi", &node, &format)) @@ -364,17 +364,17 @@ static PyObject *PyContextExt_encode(PyContextExt *self, PyObject *args) { return self->p->Encode(node, format); } -static PyMethodDef PyContextExt_methods[] = { - {"root", (PyCFunction) PyContextExt_root, METH_NOARGS, +static PyMethodDef PythonContextExt_methods[] = { + {"root", (PyCFunction) PythonContextExt_root, METH_NOARGS, "Return the root node attached to this query context" }, - {"load", (PyCFunction) PyContextExt_load, METH_NOARGS, + {"load", (PyCFunction) PythonContextExt_load, METH_NOARGS, "Load external node to Python" }, - {"filter", (PyCFunction) PyContextExt_filter, METH_VARARGS | METH_KEYWORDS, + {"filter", (PyCFunction) PythonContextExt_filter, METH_VARARGS | METH_KEYWORDS, "Filter a provided UAST with XPath" }, - {"encode", (PyCFunction) PyContextExt_encode, METH_VARARGS, + {"encode", (PyCFunction) PythonContextExt_encode, METH_VARARGS, "Encodes a UAST into a buffer" }, {nullptr} // Sentinel @@ -382,12 +382,12 @@ static PyMethodDef PyContextExt_methods[] = { extern "C" { - static PyTypeObject PyContextExtType = { + static PyTypeObject PythonContextExtType = { PyVarObject_HEAD_INIT(nullptr, 0) "pyuast.ContextExt", // tp_name - sizeof(PyContextExt), // tp_basicsize + sizeof(PythonContextExt), // tp_basicsize 0, // tp_itemsize - PyContextExt_dealloc, // tp_dealloc + PythonContextExt_dealloc, // tp_dealloc 0, // tp_print 0, // tp_getattr 0, // tp_setattr @@ -410,7 +410,7 @@ extern "C" 0, // tp_weaklistoffset 0, // tp_iter: __iter()__ method 0, // tp_iternext: next() method - PyContextExt_methods, // tp_methods + PythonContextExt_methods, // tp_methods 0, // tp_members 0, // tp_getset 0, // tp_base @@ -913,18 +913,18 @@ static void PyUastIter_dealloc(PyObject *self) { typedef struct { PyObject_HEAD Context *p; -} PyContext; +} PythonContext; -static void PyContext_dealloc(PyObject *self) { - delete(((PyContext *)self)->p); +static void PythonContext_dealloc(PyObject *self) { + delete(((PythonContext *)self)->p); // TODO: delete self? } -static PyObject *PyContext_root(PyContext *self, PyObject *Py_UNUSED(ignored)) { +static PyObject *PythonContext_root(PythonContext *self, PyObject *Py_UNUSED(ignored)) { return self->p->RootNode(); } -static PyObject *PyContext_filter(PyContext *self, PyObject *args, PyObject *kwargs) { +static PyObject *PythonContext_filter(PythonContext *self, PyObject *args, PyObject *kwargs) { char* kwds[] = {(char*)"query", (char*)"node", NULL}; char *query = nullptr; PyObject *node = nullptr; @@ -941,7 +941,7 @@ static PyObject *PyContext_filter(PyContext *self, PyObject *args, PyObject *kwa return it; } -static PyObject *PyContext_encode(PyContext *self, PyObject *args) { +static PyObject *PythonContext_encode(PythonContext *self, PyObject *args) { PyObject *node = nullptr; UastFormat format = UAST_BINARY; // TODO: make it a kwarg and enum if (!PyArg_ParseTuple(args, "Oi", &node, &format)) @@ -949,14 +949,14 @@ static PyObject *PyContext_encode(PyContext *self, PyObject *args) { return self->p->Encode(node, format); } -static PyMethodDef PyContext_methods[] = { - {"root", (PyCFunction) PyContext_root, METH_NOARGS, +static PyMethodDef PythonContext_methods[] = { + {"root", (PyCFunction) PythonContext_root, METH_NOARGS, "Return the root node attached to this query context" }, - {"filter", (PyCFunction) PyContext_filter, METH_VARARGS | METH_KEYWORDS, + {"filter", (PyCFunction) PythonContext_filter, METH_VARARGS | METH_KEYWORDS, "Filter a provided UAST with XPath" }, - {"encode", (PyCFunction) PyContext_encode, METH_VARARGS, + {"encode", (PyCFunction) PythonContext_encode, METH_VARARGS, "Encodes a UAST into a buffer" }, {nullptr} // Sentinel @@ -964,12 +964,12 @@ static PyMethodDef PyContext_methods[] = { extern "C" { - static PyTypeObject PyContextType = { + static PyTypeObject PythonContextType = { PyVarObject_HEAD_INIT(nullptr, 0) "pyuast.Context", // tp_name - sizeof(PyContext), // tp_basicsize + sizeof(PythonContext), // tp_basicsize 0, // tp_itemsize - PyContext_dealloc, // tp_dealloc + PythonContext_dealloc, // tp_dealloc 0, // tp_print 0, // tp_getattr 0, // tp_setattr @@ -992,7 +992,7 @@ extern "C" 0, // tp_weaklistoffset 0, // tp_iter: __iter()__ method 0, // tp_iternext: next() method - PyContext_methods, // tp_methods + PythonContext_methods, // tp_methods 0, // tp_members 0, // tp_getset 0, // tp_base @@ -1028,7 +1028,7 @@ static PyObject *PyUastIter_new(PyObject *self, PyObject *args) { return ctx->Iterate(obj, (TreeOrder)order, true); } -static PyObject *PyContextExt_decode(PyObject *self, PyObject *args, PyObject *kwargs) { +static PyObject *PythonContextExt_decode(PyObject *self, PyObject *args, PyObject *kwargs) { char* kwds[] = {(char*)"data", (char*)"format", NULL}; PyObject *obj = nullptr; UastFormat format = UAST_BINARY; // TODO: make it an enum @@ -1047,7 +1047,7 @@ static PyObject *PyContextExt_decode(PyObject *self, PyObject *args, PyObject *k uast::Context* ctx = uast::Decode(ubuf, format); PyBuffer_Release(&buf); - PyContextExt *pyU = PyObject_New(PyContextExt, &PyContextExtType); + PythonContextExt *pyU = PyObject_New(PythonContextExt, &PythonContextExtType); if (!pyU) { delete(ctx); return nullptr; @@ -1056,13 +1056,13 @@ static PyObject *PyContextExt_decode(PyObject *self, PyObject *args, PyObject *k return (PyObject*)pyU; } -static PyObject *PyContext_new(PyObject *self, PyObject *args) { +static PyObject *PythonContext_new(PyObject *self, PyObject *args) { // TODO: optionally accept root object if (!PyArg_ParseTuple(args, "")) { return nullptr; } - PyContext *pyU = PyObject_New(PyContext, &PyContextType); + PythonContext *pyU = PyObject_New(PythonContext, &PythonContextType); if (!pyU) { return nullptr; } @@ -1072,13 +1072,13 @@ static PyObject *PyContext_new(PyObject *self, PyObject *args) { bool isContext(PyObject* obj) { if (!obj || obj == Py_None) return false; - return PyObject_TypeCheck(obj, &PyContextExtType) || PyObject_TypeCheck(obj, &PyContextType); + return PyObject_TypeCheck(obj, &PythonContextExtType) || PyObject_TypeCheck(obj, &PythonContextType); } static PyMethodDef extension_methods[] = { {"iterator", PyUastIter_new, METH_VARARGS, "Get an iterator over a node"}, - {"decode", (PyCFunction)PyContextExt_decode, METH_VARARGS | METH_KEYWORDS, "Decode UAST from a byte array"}, - {"uast", PyContext_new, METH_VARARGS, "Creates a new UAST context"}, + {"decode", (PyCFunction)PythonContextExt_decode, METH_VARARGS | METH_KEYWORDS, "Decode UAST from a byte array"}, + {"uast", PythonContext_new, METH_VARARGS, "Creates a new UAST context"}, {nullptr, nullptr, 0, nullptr} }; @@ -1097,11 +1097,11 @@ static struct PyModuleDef module_def = { PyMODINIT_FUNC PyInit_pyuast(void) { - if (PyType_Ready(&PyContextExtType) < 0) return nullptr; + if (PyType_Ready(&PythonContextExtType) < 0) return nullptr; if (PyType_Ready(&PyNodeExtType) < 0) return nullptr; if (PyType_Ready(&PyUastIterExtType) < 0) return nullptr; - if (PyType_Ready(&PyContextType) < 0) return nullptr; + if (PyType_Ready(&PythonContextType) < 0) return nullptr; if (PyType_Ready(&PyUastIterType) < 0) return nullptr; return PyModule_Create(&module_def); } From 2c983a956b57960a5e9e360b30d689b5ad1e7e1c Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 8 Nov 2018 16:35:45 +0100 Subject: [PATCH 26/48] Use same name for Windows an Linux static lib before the extension Signed-off-by: Juanjo Alvarez --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index ebf7a91..405ee08 100644 --- a/setup.py +++ b/setup.py @@ -295,7 +295,7 @@ def main(): sys.exit() libraries = [] - static_libraries = ["uast"] + static_libraries = ["libuast"] static_lib_dir = j("bblfsh", "libuast") if sys.platform == 'win32': @@ -303,7 +303,7 @@ def main(): libraries.extend(static_libraries) extra_objects = [] else: # POSIX - extra_objects = ['{}/lib{}.a'.format( + extra_objects = ['{}/{}.a'.format( static_lib_dir, l) for l in static_libraries] libuast_module = Extension( From 0bcf223dbc85e4234efa548f2678c9cec03e36dc Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 8 Nov 2018 17:36:16 +0100 Subject: [PATCH 27/48] Add several needed static libs for Windows Signed-off-by: Juanjo Alvarez --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 405ee08..56aff50 100644 --- a/setup.py +++ b/setup.py @@ -295,16 +295,16 @@ def main(): sys.exit() libraries = [] - static_libraries = ["libuast"] static_lib_dir = j("bblfsh", "libuast") + static_libraries = ["{}/libuast".format(static_lib_dir)] if sys.platform == 'win32': # FIXME: untested! libraries.extend(static_libraries) + libraries.extend(["legacy_stdio_definitions", "winmm", "ws2_32"]) extra_objects = [] else: # POSIX - extra_objects = ['{}/{}.a'.format( - static_lib_dir, l) for l in static_libraries] + extra_objects = ['{}.a'.format(l) for l in static_libraries] libuast_module = Extension( "bblfsh.pyuast", From 8401a1c486679ccbb6e5498e997d0dcb7671baee Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Thu, 15 Nov 2018 13:33:44 +0100 Subject: [PATCH 28/48] Several improvements (see desc) - Added static typing. - Fixed some bugs and potential bugs found after adding typing. - Removed the unnused and eval-dangeous mapn cli argument. - Updated run_query so the CLI should work again with queries. - Reworked & simplified `launcher.py` exception handling logic. - PEP8 formatting. - Removed some vestigial, unneded and ugly code manipulating the import path. - Ensure the `build/` directory is deleted on `setup.py --clean`. Signed-off-by: Juanjo Alvarez --- bblfsh/__init__.py | 21 ++++++++-------- bblfsh/__main__.py | 63 +++++++++++++++++++++------------------------- bblfsh/aliases.py | 6 ++--- bblfsh/client.py | 40 ++++++++++++++++------------- bblfsh/launcher.py | 53 ++++++++++++++++++++------------------ setup.py | 3 +++ 6 files changed, 97 insertions(+), 89 deletions(-) diff --git a/bblfsh/__init__.py b/bblfsh/__init__.py index 3e89a6e..ded2c1e 100644 --- a/bblfsh/__init__.py +++ b/bblfsh/__init__.py @@ -2,32 +2,31 @@ from bblfsh.pyuast import decode, iterator, uast from bblfsh.aliases import * + class TreeOrder: - PRE_ORDER = 0 - POST_ORDER = 1 - LEVEL_ORDER = 2 + PRE_ORDER = 0 + POST_ORDER = 1 + LEVEL_ORDER = 2 POSITION_ORDER = 3 -# "in" is a reserved keyword in Python thus can't be used as package name, so -# we import by string class RoleSearchException(Exception): pass -def role_id(role_name): +def role_id(rname: str) -> int: try: - name = DESCRIPTOR.enum_types_by_name["Role"].values_by_name[role_name].number + name = DESCRIPTOR.enum_types_by_name["Role"].values_by_name[rname].number except KeyError: - raise RoleSearchException("Role with name '{}' not found".format(role_name)) + raise RoleSearchException("Role with name '{}' not found".format(rname)) return name -def role_name(role_id): +def role_name(rid: int) -> str: try: - id_ = DESCRIPTOR.enum_types_by_name["Role"].values_by_number[role_id].name + id_ = DESCRIPTOR.enum_types_by_name["Role"].values_by_number[rid].name except KeyError: - raise RoleSearchException("Role with ID '{}' not found".format(role_id)) + raise RoleSearchException("Role with ID '{}' not found".format(rid)) return id_ diff --git a/bblfsh/__main__.py b/bblfsh/__main__.py index 02eaded..f9f63e6 100644 --- a/bblfsh/__main__.py +++ b/bblfsh/__main__.py @@ -1,68 +1,63 @@ import argparse +import pprint import sys -from bblfsh.pyuast import filter - from bblfsh.client import BblfshClient from bblfsh.launcher import ensure_bblfsh_is_running -def setup(): +def setup() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Query for a UAST to Babelfish and dump it to stdout." ) parser.add_argument("-e", "--endpoint", default="0.0.0.0:9432", - help="bblfsh gRPC endpoint.") + help="bblfsh gRPC endpoint.", type=str) parser.add_argument("-f", "--file", required=True, - help="File to parse.") + help="File to parse.", type=str) parser.add_argument("-l", "--language", default=None, - help="File's language. The default is to autodetect.") + help="File's language. The default is to autodetect.", type=str) parser.add_argument("--disable-bblfsh-autorun", action="store_true", help="Do not automatically launch Babelfish server " "if it is not running.") - parser.add_argument("-q", "--query", default="", help="xpath query") - parser.add_argument("-m", "--mapn", default="", help="transform function of the results (n)") - parser.add_argument("-a", "--array", help='print results as an array', action='store_true') + parser.add_argument("-q", "--query", default="", help="xpath query", type=str) + parser.add_argument("-a", "--array", help='print results as a parseable Python array', action='store_true') + + return parser.parse_args() - args = parser.parse_args() - return args -def run_query(root, query, mapn, as_array): - result = list(filter(root, query)) +def run_query(uast, query: str, array: bool) -> None: + result_iter = uast.filter(query) - if not result: + if not result_iter: print("Nothing found") - else: - if mapn: - result = [eval(mapn) for n in result] + result_list = [x.load() for x in result_iter] - if as_array: - print("results[{}] = {}".format(len(result), result)) - else: - print("Running xpath query: {}".format(query)) - print("FOUND {} roots".format(len(result))) + if array: + pprint.pprint(result_list) + else: + print("%d Results:" % len(result_list)) + for i, node in enumerate(result_list): + print("== {} ==================================".format(i+1)) + print(node) - for i, node in enumerate(result): - print("== {} ==================================".format(i+1)) - print(node) -def main(): +def main() -> int: args = setup() if not args.disable_bblfsh_autorun: ensure_bblfsh_is_running() client = BblfshClient(args.endpoint) - response = client.parse(args.file, args.language) - root = response.uast - if len(response.errors): - sys.stderr.write("\n".join(response.errors) + "\n") - query = args.query - if query: - run_query(root, query, args.mapn, args.array) + uast = client.parse(args.file, args.language) + + if args.query: + run_query(uast, args.query, array=args.array) else: - print(root) + pprint.pprint(uast.load()) + + return 0 + if __name__ == "__main__": sys.exit(main()) diff --git a/bblfsh/aliases.py b/bblfsh/aliases.py index 1fe70ec..a3ea10b 100644 --- a/bblfsh/aliases.py +++ b/bblfsh/aliases.py @@ -1,4 +1,6 @@ import importlib +import google +import typing # "in" is a reserved keyword in Python thus can't be used as package name, so # we import by string @@ -9,14 +11,12 @@ protocol_v1_module = importlib.import_module("bblfsh.gopkg.in.bblfsh.sdk.v1.protocol.generated_pb2") protocol_grpc_v1_module = importlib.import_module("bblfsh.gopkg.in.bblfsh.sdk.v1.protocol.generated_pb2_grpc") -# Node = importlib.import_module( - # "bblfsh.gopkg.in.bblfsh.sdk.v2.uast.generated_pb2").Node - DESCRIPTOR = uast_v2_module.DESCRIPTOR ParseRequest = protocol_v2_module.ParseRequest ParseResponse = protocol_v2_module.ParseResponse ParseError = protocol_v2_module.ParseError Mode = protocol_v2_module.Mode +ModeType = google.protobuf.internal.enum_type_wrapper.EnumTypeWrapper DriverStub = protocol_grpc_v2_module.DriverStub DriverServicer = protocol_grpc_v2_module.DriverServicer diff --git a/bblfsh/client.py b/bblfsh/client.py index 5d4be34..e5c1863 100644 --- a/bblfsh/client.py +++ b/bblfsh/client.py @@ -1,28 +1,26 @@ import os import sys +import typing as t import grpc from bblfsh.pyuast import decode as uast_decode +from bblfsh.pyuast import uast as uast_ctx -from bblfsh.aliases import ParseRequest, DriverStub, ProtocolServiceStub, VersionRequest, SupportedLanguagesRequest - -# The following two insertions fix the broken pb import paths -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "gopkg/in/bblfsh/sdk/v1/protocol")) -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "gopkg/in/bblfsh/sdk/v2/protocol")) -sys.path.insert(0, os.path.dirname(__file__)) +from bblfsh.aliases import (ParseRequest, DriverStub, ProtocolServiceStub, + VersionRequest, SupportedLanguagesRequest, ModeType) class NonUTF8ContentException(Exception): pass -class BblfshClient(object): +class BblfshClient: """ - Babelfish gRPC client. Currently it is only capable of fetching UASTs. + Babelfish gRPC client. """ - def __init__(self, endpoint): + def __init__(self, endpoint:str) -> None: """ Initializes a new instance of BblfshClient. @@ -30,26 +28,32 @@ def __init__(self, endpoint): for example "0.0.0.0:9432" :type endpoint: str """ + self._channel = grpc.insecure_channel(endpoint) self._stub_v1 = ProtocolServiceStub(self._channel) self._stub_v2 = DriverStub(self._channel) @staticmethod - def _check_utf8(text): + def _ensure_utf8(text: bytes) -> str: try: - text.decode("utf-8") + return text.decode("utf-8") except UnicodeDecodeError: raise NonUTF8ContentException("Content must be UTF-8, ASCII or Base64 encoded") @staticmethod - def _get_contents(contents, filename): + def _get_contents(contents: t.Optional[t.Union[str, bytes]], filename: str) -> str: if contents is None: with open(filename, "rb") as fin: contents = fin.read() - BblfshClient._check_utf8(contents) + + if isinstance(contents, bytes): + contents = BblfshClient._ensure_utf8(contents) + return contents - def parse(self, filename, language=None, contents=None, mode=None, raw=False, timeout=None): + def parse(self, filename: str, language: t.Optional[str]=None, + contents: t.Optional[str]=None, mode: t.Optional[ModeType]=None, + raw: bool=False, timeout: t.Optional[int]=None) -> uast_ctx: """ Queries the Babelfish server and receives the UAST response for the specified file. @@ -81,16 +85,18 @@ def parse(self, filename, language=None, contents=None, mode=None, raw=False, ti TODO: return detected language TODO: handle syntax errors """ + if raw: return response.uast + ctx = uast_decode(response.uast, format=0) return ctx - def supported_languages(self): + def supported_languages(self) -> t.List[str]: sup_response = self._stub_v1.SupportedLanguages(SupportedLanguagesRequest()) return sup_response.languages - def version(self): + def version(self) -> str: """ Queries the Babelfish server for version and runtime information. @@ -100,7 +106,7 @@ def version(self): return self._stub_v1.Version(VersionRequest()) @staticmethod - def _scramble_language(lang): + def _scramble_language(lang: t.Optional[str]) -> t.Optional[str]: if lang is None: return None lang = lang.lower() diff --git a/bblfsh/launcher.py b/bblfsh/launcher.py index 32b3c61..c4d1f3b 100644 --- a/bblfsh/launcher.py +++ b/bblfsh/launcher.py @@ -5,29 +5,26 @@ import docker -def ensure_bblfsh_is_running(): - log = logging.getLogger("bblfsh") - try: - client = docker.from_env(version="auto") - except docker.errors.DockerException as e: - log.warning("Failed to connect to the Docker daemon and ensure " - "that the Babelfish server is running. %s", e) - return False +def after_container_start(cont: docker.models.resource.Model, log: logging.Logger) -> None: + log.warning( + "Launched the Babelfish server (name bblfshd, id %s).\nStop it " + "with: docker rm -f bblfshd", cont.id) + + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: + result = -1 + while result != 0: + time.sleep(0.1) + result = sock.connect_ex(("0.0.0.0", 9432)) - def after_start(container): - log.warning( - "Launched the Babelfish server (name bblfshd, id %s).\nStop it " - "with: docker rm -f bblfshd", container.id) - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: - result = -1 - while result != 0: - time.sleep(0.1) - result = sock.connect_ex(("0.0.0.0", 9432)) - log.warning("Babelfish server is up and running.") - log.info("Installing Python driver") - container.exec_run("bblfshctl driver install python bblfsh/python-driver:latest") + log.warning("Babelfish server is up and running.") + log.info("Installing Python driver") + cont.exec_run("bblfshctl driver install python bblfsh/python-driver:latest") + +def ensure_bblfsh_is_running() -> bool: + log = logging.getLogger("bblfsh") try: + client = docker.from_env(version="auto") container = client.containers.get("bblfshd") if container.status != "running": try: @@ -36,20 +33,28 @@ def after_start(container): log.warning("Failed to start the existing bblfshd container: " "%s: %s", type(e).__name__, e) else: - after_start(container) + after_container_start(container, log) return False - return True + except docker.errors.DockerException as e: + log.warning("Failed to connect to the Docker daemon and ensure " + "that the Babelfish server is running. %s", e) + return False + except AttributeError: log.error("You hit https://github.com/docker/docker-py/issues/1353\n" "Uninstall docker-py and docker and install *only* docker.\n" "Failed to ensure that the Babelfish server is running.") return False + except docker.errors.NotFound: container = client.containers.run( - "bblfsh/bblfshd", name="bblfshd", detach=True, privileged=True, + "bblfsh/bblfshd", name="bblfshd", detach=True, privileged=True, ports={9432: 9432} ) - after_start(container) + after_container_start(container, log) return False + finally: client.api.close() + + return True diff --git a/setup.py b/setup.py index 56aff50..df14217 100644 --- a/setup.py +++ b/setup.py @@ -273,6 +273,7 @@ def do_get_deps(): def clean(): + rimraf("build") rimraf("gopkg.in") rimraf(j("bblfsh", "github")) rimraf(j("bblfsh", "gopkg")) @@ -285,6 +286,8 @@ def main(): # using the installed uast library if "--log" in sys.argv: logging.basicConfig(level=logging.INFO) + else: + logging.basicConfig(level=logging.ERROR) if "--getdeps" in sys.argv: do_get_deps() From bd8c2d573fc5176b38949c70a742fa56218861c2 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Fri, 16 Nov 2018 16:07:34 +0100 Subject: [PATCH 29/48] Several Improvements (II) - Wrap the extension module values (results, iterator, item) into Python classes with support for automatic calls to `load()` on str serializing and methods to ensure a query result is of a specific type. - Extension module: export the types returned too (needed for correctly typing): NodeExt, IteratorExt and Context. Signed-off-by: Juanjo Alvarez --- bblfsh/__main__.py | 6 +-- bblfsh/client.py | 22 +++------- bblfsh/pyuast.cc | 13 +++++- bblfsh/result_context.py | 95 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 115 insertions(+), 21 deletions(-) create mode 100644 bblfsh/result_context.py diff --git a/bblfsh/__main__.py b/bblfsh/__main__.py index f9f63e6..aef9a62 100644 --- a/bblfsh/__main__.py +++ b/bblfsh/__main__.py @@ -49,12 +49,12 @@ def main() -> int: ensure_bblfsh_is_running() client = BblfshClient(args.endpoint) - uast = client.parse(args.file, args.language) + ctx = client.parse(args.file, args.language) if args.query: - run_query(uast, args.query, array=args.array) + run_query(ctx, args.query, array=args.array) else: - pprint.pprint(uast.load()) + pprint.pprint(ctx.load()) return 0 diff --git a/bblfsh/client.py b/bblfsh/client.py index e5c1863..340e8e0 100644 --- a/bblfsh/client.py +++ b/bblfsh/client.py @@ -1,14 +1,11 @@ import os -import sys import typing as t import grpc -from bblfsh.pyuast import decode as uast_decode -from bblfsh.pyuast import uast as uast_ctx - from bblfsh.aliases import (ParseRequest, DriverStub, ProtocolServiceStub, VersionRequest, SupportedLanguagesRequest, ModeType) +from bblfsh.result_context import ResultContext class NonUTF8ContentException(Exception): @@ -53,7 +50,7 @@ def _get_contents(contents: t.Optional[t.Union[str, bytes]], filename: str) -> s def parse(self, filename: str, language: t.Optional[str]=None, contents: t.Optional[str]=None, mode: t.Optional[ModeType]=None, - raw: bool=False, timeout: t.Optional[int]=None) -> uast_ctx: + timeout: t.Optional[int]=None) -> ResultContext: """ Queries the Babelfish server and receives the UAST response for the specified file. @@ -75,22 +72,13 @@ def parse(self, filename: str, language: t.Optional[str]=None, :return: UAST object. """ + # TODO: handle syntax errors contents = self._get_contents(contents, filename) request = ParseRequest(filename=os.path.basename(filename), - content=contents, - mode=mode, + content=contents, mode=mode, language=self._scramble_language(language)) response = self._stub_v2.Parse(request, timeout=timeout) - """ - TODO: return detected language - TODO: handle syntax errors - """ - - if raw: - return response.uast - - ctx = uast_decode(response.uast, format=0) - return ctx + return ResultContext(response) def supported_languages(self) -> t.List[str]: sup_response = self._stub_v1.SupportedLanguages(SupportedLanguagesRequest()) diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index f80e2f5..fe5f7ff 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -1103,5 +1103,16 @@ PyInit_pyuast(void) if (PyType_Ready(&PythonContextType) < 0) return nullptr; if (PyType_Ready(&PyUastIterType) < 0) return nullptr; - return PyModule_Create(&module_def); + + PyObject* m = PyModule_Create(&module_def); + + Py_INCREF(&PythonContextType); + PyModule_AddObject(m, "Context", (PyObject *)&PythonContextType); + + Py_INCREF(&PyNodeExtType); + PyModule_AddObject(m, "NodeExt", (PyObject *)&PyNodeExtType); + + Py_INCREF(&PyUastIterExtType); + PyModule_AddObject(m, "IteratorExt", (PyObject *)&PyUastIterExtType); + return m; } diff --git a/bblfsh/result_context.py b/bblfsh/result_context.py new file mode 100644 index 0000000..2f4fabd --- /dev/null +++ b/bblfsh/result_context.py @@ -0,0 +1,95 @@ +import typing as t + +from bblfsh.aliases import ParseResponse + +from bblfsh.pyuast import decode, IteratorExt, NodeExt + + +class ResponseError(Exception): + pass + + +class ResultTypeException(Exception): + pass + + +ResultMultiType = t.NewType("ResultType", t.Union[dict, int, float, bool, str]) + + +class FilterItem: + def __init__(self, node_ext: NodeExt) -> None: + self._node_ext = node_ext + self._loaded_node: t.Optional[ResultMultiType] = None + + def _ensure_load(self): + if self._loaded_node is None: + self._loaded_node = self._node_ext.load() + + def __str__(self): + return str(self.get()) + + def __repr__(self): + return repr(self.get()) + + def get(self) -> ResultMultiType: + self._ensure_load() + return self._loaded_node + + def _get_typed(self, type_: type) -> ResultMultiType: + self._ensure_load() + if not isinstance(self._loaded_node, type_): + raise ResultTypeException("Expected {} result, but type is '{}'" + .format(type_.__name__, type(self._loaded_node))) + + def get_bool(self) -> bool: + return self._get_typed(bool) + + def get_float(self) -> float: + res = self._get_typed(float) + if isinstance(res, int): + res = float(res) + return res + + def get_int(self) -> int: + return self._get_typed(int) + + def get_str(self) -> str: + return self._get_typed(str) + + def get_dict(self) -> dict: + return self._get_typed(dict) + + +class FilterResults: + def __init__(self, iter_ext: IteratorExt) -> None: + self._iter_ext = iter_ext + + def __iter__(self) -> object: + return self + + def __next__(self) -> FilterItem: + return FilterItem(next(self._iter_ext)) + + +class ResultContext: + def __init__(self, grpc_response: ParseResponse) -> None: + if grpc_response.errors: + raise ResponseError("\n".join( + [error.text for error in grpc_response.errors]) + ) + + self._response = grpc_response + self._ctx = decode(grpc_response.uast, format=0) + self.language = grpc_response.language + + def filter(self, query: str) -> FilterResults: + return FilterResults(self._ctx.filter(query)) + + def get_all(self) -> dict: + return self._ctx.load() + + def __str__(self) -> str: + return str(self.get_all()) + + def __repr__(self) -> str: + return repr(self.get_all()) From f964c4639c4acacacd7294ea72dfd95bffa0e88c Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Fri, 16 Nov 2018 18:27:15 +0100 Subject: [PATCH 30/48] Make iterators great (and working) again Wraps the internal iterator function from the extension module into methods for ResultContext, NodeIterator and Node objects (same objects added in the previous commit, renamed). Update README. Please see examples for specific usage. Signed-off-by: Juanjo Alvarez --- README.md | 49 ++++++++++++++++++++++++++++++---------- bblfsh/__init__.py | 9 +------- bblfsh/pyuast.cc | 4 ++++ bblfsh/result_context.py | 37 ++++++++++++++++++++++-------- bblfsh/tree_order.py | 12 ++++++++++ 5 files changed, 82 insertions(+), 29 deletions(-) create mode 100644 bblfsh/tree_order.py diff --git a/README.md b/README.md index c3d950b..f058b04 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,10 @@ pip install bblfsh ```bash git clone https://github.com/bblfsh/client-python.git cd client-python +pip install -r requirements.txt +python setup.py --getdeps python setup.py install +# or: pip install . ``` ### Dependencies @@ -48,23 +51,45 @@ Please, read the [getting started](https://doc.bblf.sh/using-babelfish/getting-s import bblfsh client = bblfsh.BblfshClient("0.0.0.0:9432") -uast = client.parse("/path/to/file.py") -print(uast.load()) +ctx = client.parse("/path/to/file.py") +print(ctx) +# or to get the results in a dictionary: +resdict = ctx.get_all() # "filter' allows you to use XPath queries to filter on result nodes: -it = uast.filter("//Import[@role='Import' and @role='Declaration']//alias") +it = ctx.filter("//python:Call") for node in it: - print(node.load()) + print(node) + # or: + doSomething(node.get()) # filter must be used when using XPath functions returning these types: -print(uast.filter("boolean(//*[@strtOffset or @endOffset])")) -print(uast.filter("name(//*[1])")) -print(uast.filter("count(//*)")) - -# You can also iterate on several tree iteration orders: -it = bblfsh.iterator(uast, bblfsh.TreeOrder.PRE_ORDER) -for node in it: - print(node.internal_type) +# XPath queries can return different types (dicts, int, float, bool or str), +# calling get() with an item will return the right type, but if you must ensure +# that you are getting the expected type (to avoid errors in the queries) there +# are alterative typed versions: +x = next(ctx.filter("boolean(//*[@strtOffset or @endOffset])").get_bool() +y = next(ctx.filter("name(//*[1])")).get_str() +z = next(ctx.filter("count(//*)").get_int() # or get_float() + +# You can also iterate using iteration orders different than the +# default preorder using the `iterate` method on `parse` result or node objects: + +# Directly over parse results +iter = client.parse("/path/to/file.py").iterate(bblfsh.TreeOrder.POST_ORDER) +for i in iter: ... + +# Over filter results (which by default are already iterators with PRE_ORDER): +ctx = client.parse("file.py") +newiter = ctx.filter("//python:Call").iterate(bblfsh.TreeOrder.LEVEL_ORDER) +for i in newiter: ... + +# Over individual node objects to change the iteration order of +# a specific subtree: +ctx = client.parse("file.py") +first_node = next(ctx) +newiter = first_node.iterate(bblfsh.TreeOrder.POSITION_ORDER) +for i in newiter: ... ``` Please read the [Babelfish clients](https://doc.bblf.sh/using-babelfish/clients.html) diff --git a/bblfsh/__init__.py b/bblfsh/__init__.py index ded2c1e..5fa5cd1 100644 --- a/bblfsh/__init__.py +++ b/bblfsh/__init__.py @@ -1,19 +1,12 @@ from bblfsh.client import BblfshClient from bblfsh.pyuast import decode, iterator, uast +from bblfsh.tree_order import TreeOrder from bblfsh.aliases import * -class TreeOrder: - PRE_ORDER = 0 - POST_ORDER = 1 - LEVEL_ORDER = 2 - POSITION_ORDER = 3 - - class RoleSearchException(Exception): pass - def role_id(rname: str) -> int: try: name = DESCRIPTOR.enum_types_by_name["Role"].values_by_name[rname].number diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index fe5f7ff..d013cce 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -1114,5 +1114,9 @@ PyInit_pyuast(void) Py_INCREF(&PyUastIterExtType); PyModule_AddObject(m, "IteratorExt", (PyObject *)&PyUastIterExtType); + + Py_INCREF(&PyUastIterType); + PyModule_AddObject(m, "Iterator", (PyObject *)&PyUastIterType); + return m; } diff --git a/bblfsh/result_context.py b/bblfsh/result_context.py index 2f4fabd..2b63e00 100644 --- a/bblfsh/result_context.py +++ b/bblfsh/result_context.py @@ -1,8 +1,8 @@ import typing as t from bblfsh.aliases import ParseResponse - -from bblfsh.pyuast import decode, IteratorExt, NodeExt +from bblfsh.pyuast import decode, IteratorExt, NodeExt, iterator +from bblfsh.tree_order import TreeOrder class ResponseError(Exception): @@ -13,10 +13,14 @@ class ResultTypeException(Exception): pass +class NotNodeIterationException(Exception): + pass + + ResultMultiType = t.NewType("ResultType", t.Union[dict, int, float, bool, str]) -class FilterItem: +class Node: def __init__(self, node_ext: NodeExt) -> None: self._node_ext = node_ext self._loaded_node: t.Optional[ResultMultiType] = None @@ -59,16 +63,27 @@ def get_str(self) -> str: def get_dict(self) -> dict: return self._get_typed(dict) + def iterate(self, order) -> 'NodeIterator': + if not isinstance(self._node_ext, NodeExt): + raise NotNodeIterationException("Cannot iterate over leaf of type '{}'" + .format(type(self._node_ext))) + TreeOrder.check_order(order) + return NodeIterator(iterator(self._node_ext, order)) -class FilterResults: + +class NodeIterator: def __init__(self, iter_ext: IteratorExt) -> None: self._iter_ext = iter_ext - def __iter__(self) -> object: + def __iter__(self) -> 'NodeIterator': return self - def __next__(self) -> FilterItem: - return FilterItem(next(self._iter_ext)) + def __next__(self) -> Node: + return Node(next(self._iter_ext)) + + def iterate(self, order) -> 'NodeIterator': + TreeOrder.check_order(order) + return NodeIterator(iterator(next(self._iter_ext), order)) class ResultContext: @@ -82,12 +97,16 @@ def __init__(self, grpc_response: ParseResponse) -> None: self._ctx = decode(grpc_response.uast, format=0) self.language = grpc_response.language - def filter(self, query: str) -> FilterResults: - return FilterResults(self._ctx.filter(query)) + def filter(self, query: str) -> NodeIterator: + return NodeIterator(self._ctx.filter(query)) def get_all(self) -> dict: return self._ctx.load() + def iterate(self, order: int) -> NodeIterator: + TreeOrder.check_order(order) + return NodeIterator(iterator(self._ctx.root(), order)) + def __str__(self) -> str: return str(self.get_all()) diff --git a/bblfsh/tree_order.py b/bblfsh/tree_order.py new file mode 100644 index 0000000..1d60c5f --- /dev/null +++ b/bblfsh/tree_order.py @@ -0,0 +1,12 @@ +class TreeOrder: + _MIN = 0 + PRE_ORDER = 0 + POST_ORDER = 1 + LEVEL_ORDER = 2 + POSITION_ORDER = 3 + _MAX = 3 + + @staticmethod + def check_order(order: int) -> None: + if order < TreeOrder._MIN or order > TreeOrder._MAX: + raise Exception("Wrong order value") From ea7d6158ae2b3f243848b1b449a51f23c6e0f652 Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Wed, 5 Dec 2018 02:59:20 +0200 Subject: [PATCH 31/48] fix usage of parsed string arguments in filter Signed-off-by: Denys Smirnov --- bblfsh/pyuast.cc | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index d013cce..da420e8 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -269,7 +269,7 @@ class ContextExt { // Filter queries an external UAST. // Borrows the reference. - PyObject* Filter(PyObject* node, char* query){ + PyObject* Filter(PyObject* node, const char* query){ if (!assertNotContext(node)) { return nullptr; } @@ -339,7 +339,7 @@ static PyObject *PythonContextExt_load(PythonContextExt *self, PyObject *Py_UNUS // Returns a new reference. static PyObject *PythonContextExt_filter(PythonContextExt *self, PyObject *args, PyObject *kwargs) { char* kwds[] = {(char*)"query", (char*)"node", NULL}; - char *query = nullptr; + const char *query = nullptr; PyObject *node = nullptr; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|O", kwds, &query, &node)) return nullptr; @@ -350,7 +350,6 @@ static PyObject *PythonContextExt_filter(PythonContextExt *self, PyObject *args, } catch (const std::exception& e) { PyErr_SetString(PyExc_RuntimeError, e.what()); } - // TODO: freeing the query leads to a segfault; need to clarify why return it; } @@ -926,7 +925,7 @@ static PyObject *PythonContext_root(PythonContext *self, PyObject *Py_UNUSED(ign static PyObject *PythonContext_filter(PythonContext *self, PyObject *args, PyObject *kwargs) { char* kwds[] = {(char*)"query", (char*)"node", NULL}; - char *query = nullptr; + const char *query = nullptr; PyObject *node = nullptr; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|O", kwds, &query, &node)) return nullptr; @@ -937,7 +936,6 @@ static PyObject *PythonContext_filter(PythonContext *self, PyObject *args, PyObj } catch (const std::exception& e) { PyErr_SetString(PyExc_RuntimeError, e.what()); } - // TODO: freeing the query leads to a segfault; need to clarify why return it; } From 1c73766e9e969f260cbcdc60773a4a0c68146c03 Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Wed, 5 Dec 2018 03:06:40 +0200 Subject: [PATCH 32/48] properly deallocate python objects Signed-off-by: Denys Smirnov --- bblfsh/pyuast.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index da420e8..76b1d2d 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -310,6 +310,7 @@ static void PyUastIterExt_dealloc(PyObject *self) { } it->freeCtx = false; it->ctx = nullptr; + Py_TYPE(self)->tp_free(self); } typedef struct { @@ -319,7 +320,7 @@ typedef struct { static void PythonContextExt_dealloc(PyObject *self) { delete(((PythonContextExt *)self)->p); - // TODO: delete self? + Py_TYPE(self)->tp_free(self); } // PythonContextExt_root returns a root node associated with this context. @@ -907,6 +908,7 @@ static void PyUastIter_dealloc(PyObject *self) { } it->freeCtx = false; it->ctx = nullptr; + Py_TYPE(self)->tp_free(self); } typedef struct { @@ -916,7 +918,7 @@ typedef struct { static void PythonContext_dealloc(PyObject *self) { delete(((PythonContext *)self)->p); - // TODO: delete self? + Py_TYPE(self)->tp_free(self); } static PyObject *PythonContext_root(PythonContext *self, PyObject *Py_UNUSED(ignored)) { From 7cb563a575eea5bbf9fb2ad3d38eab49a46d1823 Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Wed, 5 Dec 2018 03:29:41 +0200 Subject: [PATCH 33/48] free encoding buffer Signed-off-by: Denys Smirnov --- bblfsh/pyuast.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index 76b1d2d..7226808 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -13,7 +13,9 @@ // so we pass ownership to these lists and free them at the end of filter() PyObject* asPyBuffer(uast::Buffer buf) { - return PyByteArray_FromStringAndSize((const char*)(buf.ptr), buf.size); + PyObject* arr = PyByteArray_FromStringAndSize((const char*)(buf.ptr), buf.size); + free(buf.ptr); + return arr; //return PyMemoryView_FromMemory((char*)(buf.ptr), buf.size, PyBUF_READ); } @@ -881,7 +883,7 @@ class Context { return nullptr; } uast::Buffer data = ctx->Encode(toNode(node), format); - return asPyBuffer(data); // TODO: this probably won't deallocate the buffer + return asPyBuffer(data); } PyObject* LoadFrom(PyNodeExt *src) { auto sctx = src->ctx->ctx; From cd1d90d8a45c4c5f128e65e4ac2dc2d893cf3fc3 Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Wed, 5 Dec 2018 03:30:04 +0200 Subject: [PATCH 34/48] bump versions Signed-off-by: Denys Smirnov --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index df14217..26bf45e 100644 --- a/setup.py +++ b/setup.py @@ -13,11 +13,11 @@ from setuptools.command.build_ext import build_ext VERSION = "3.0.0" -LIBUAST_VERSION = "v3.0.0-rc3" +LIBUAST_VERSION = "v3.1.0" LIBUAST_ARCH = "linux-amd64" SDK_V1_VERSION = "v1.16.1" SDK_V1_MAJOR = SDK_V1_VERSION.split('.')[0] -SDK_V2_VERSION = "v2.5.0" +SDK_V2_VERSION = "v2.12.0" SDK_V2_MAJOR = SDK_V2_VERSION.split('.')[0] FORMAT_ARGS = globals() From a55abc4b24c03f972409b7933ecf181a7ae62b7d Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Tue, 11 Dec 2018 11:57:17 +0100 Subject: [PATCH 35/48] Unittests and other fixes Add test file Signed-off-by: Juanjo Alvarez --- README.md | 4 + bblfsh/__init__.py | 1 + bblfsh/aliases.py | 9 +- bblfsh/client.py | 5 +- bblfsh/fixtures/test.py | 322 ++++++++++++++++++++++++++++ bblfsh/result_context.py | 49 +++-- bblfsh/test.py | 451 +++++++++++++++++++++------------------ setup.py | 1 - 8 files changed, 612 insertions(+), 230 deletions(-) create mode 100644 bblfsh/fixtures/test.py diff --git a/README.md b/README.md index f058b04..d584366 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,10 @@ ctx = client.parse("file.py") first_node = next(ctx) newiter = first_node.iterate(bblfsh.TreeOrder.POSITION_ORDER) for i in newiter: ... + +# You can also get the non semantic UAST or native AST: +ctx = client.parse("file.py", mode=bblfsh.ModeDict["NATIVE"]) +# Possible values for ModeDict: DEFAULT_MODE, NATIVE, PREPROCESSED, ANNOTATED, SEMANTIC ``` Please read the [Babelfish clients](https://doc.bblf.sh/using-babelfish/clients.html) diff --git a/bblfsh/__init__.py b/bblfsh/__init__.py index 5fa5cd1..c7e4274 100644 --- a/bblfsh/__init__.py +++ b/bblfsh/__init__.py @@ -7,6 +7,7 @@ class RoleSearchException(Exception): pass + def role_id(rname: str) -> int: try: name = DESCRIPTOR.enum_types_by_name["Role"].values_by_name[rname].number diff --git a/bblfsh/aliases.py b/bblfsh/aliases.py index a3ea10b..61b6aca 100644 --- a/bblfsh/aliases.py +++ b/bblfsh/aliases.py @@ -1,10 +1,8 @@ import importlib import google -import typing # "in" is a reserved keyword in Python thus can't be used as package name, so # we import by string - uast_v2_module = importlib.import_module("bblfsh.gopkg.in.bblfsh.sdk.v2.uast.generated_pb2") protocol_v2_module = importlib.import_module("bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2") protocol_grpc_v2_module = importlib.import_module("bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2_grpc") @@ -17,10 +15,17 @@ ParseError = protocol_v2_module.ParseError Mode = protocol_v2_module.Mode ModeType = google.protobuf.internal.enum_type_wrapper.EnumTypeWrapper + +# Current values: {'DEFAULT_MODE': 0, 'NATIVE': 1, 'PREPROCESSED': 2, 'ANNOTATED': 4, 'SEMANTIC': 8} +ModeDict = {} +for k, v in Mode.DESCRIPTOR.values_by_name.items(): + ModeDict[k] = v.number + DriverStub = protocol_grpc_v2_module.DriverStub DriverServicer = protocol_grpc_v2_module.DriverServicer VersionRequest = protocol_v1_module.VersionRequest +VersionResponse = protocol_v1_module.VersionResponse SupportedLanguagesRequest = protocol_v1_module.SupportedLanguagesRequest SupportedLanguagesResponse = protocol_v1_module.SupportedLanguagesResponse ProtocolServiceStub = protocol_grpc_v1_module.ProtocolServiceStub diff --git a/bblfsh/client.py b/bblfsh/client.py index 340e8e0..48b888f 100644 --- a/bblfsh/client.py +++ b/bblfsh/client.py @@ -4,7 +4,8 @@ import grpc from bblfsh.aliases import (ParseRequest, DriverStub, ProtocolServiceStub, - VersionRequest, SupportedLanguagesRequest, ModeType) + VersionRequest, SupportedLanguagesRequest, ModeType, + VersionResponse) from bblfsh.result_context import ResultContext @@ -84,7 +85,7 @@ def supported_languages(self) -> t.List[str]: sup_response = self._stub_v1.SupportedLanguages(SupportedLanguagesRequest()) return sup_response.languages - def version(self) -> str: + def version(self) -> VersionResponse: """ Queries the Babelfish server for version and runtime information. diff --git a/bblfsh/fixtures/test.py b/bblfsh/fixtures/test.py new file mode 100644 index 0000000..943c193 --- /dev/null +++ b/bblfsh/fixtures/test.py @@ -0,0 +1,322 @@ +import os +import resource +import unittest + +import docker + +from bblfsh import (BblfshClient, iterator, role_id, + role_name, ParseResponse, TreeOrder) +from bblfsh.launcher import ensure_bblfsh_is_running +from bblfsh.client import NonUTF8ContentException +from bblfsh.result_context import Node, NodeIterator + + +class BblfshTests(unittest.TestCase): + BBLFSH_SERVER_EXISTED = None + + @classmethod + def setUpClass(cls): + cls.BBLFSH_SERVER_EXISTED = ensure_bblfsh_is_running() + + @classmethod + def tearDownClass(cls): + if not cls.BBLFSH_SERVER_EXISTED: + client = docker.from_env(version="auto") + client.containers.get("bblfshd").remove(force=True) + client.api.close() + + def setUp(self): + self.client = BblfshClient("0.0.0.0:9432") + + def testVersion(self): + version = self.client.version() + self.assertTrue(hasattr(version, "version")) + self.assertTrue(version.version) + self.assertTrue(hasattr(version, "build")) + self.assertTrue(version.build) + + # def testNativeParse(self): + # reply = self.client.native_parse(__file__) + # assert(reply.ast) + # + def testNonUTF8ParseError(self): + self.assertRaises(NonUTF8ContentException, + self.client.parse, "", "Python", b"a = '\x80abc'") + # + def testUASTDefaultLanguage(self): + self._validate_ctx(self.client.parse(__file__)) + + def testUASTPython(self): + ctx = self.client.parse(__file__, language="Python") + self._validate_ctx(ctx) + self.assertEqual(ctx.language, "python") + + def testUASTFileContents(self): + with open(__file__, "rb") as fin: + contents = fin.read() + ctx = self.client.parse("file.py", contents=contents) + self._validate_ctx(ctx) + self._validate_filter(ctx) + # + # def testBrokenFilter(self): + # self.assertRaises(RuntimeError, filter, 0, "foo") + # + # def testFilterInternalType(self): + # node = Node() + # node.internal_type = 'a' + # self.assertTrue(any(filter(node, "//a"))) + # self.assertFalse(any(filter(node, "//b"))) + # + # def testFilterToken(self): + # node = Node() + # node.token = 'a' + # self.assertTrue(any(filter(node, "//*[@token='a']"))) + # self.assertFalse(any(filter(node, "//*[@token='b']"))) + # + # def testFilterRoles(self): + # node = Node() + # node.roles.append(1) + # self.assertTrue(any(filter(node, "//*[@roleIdentifier]"))) + # self.assertFalse(any(filter(node, "//*[@roleQualified]"))) + # + # def testFilterProperties(self): + # node = Node() + # node.properties['k1'] = 'v2' + # node.properties['k2'] = 'v1' + # self.assertTrue(any(filter(node, "//*[@k2='v1']"))) + # self.assertTrue(any(filter(node, "//*[@k1='v2']"))) + # self.assertFalse(any(filter(node, "//*[@k1='v1']"))) + # + # def testFilterStartOffset(self): + # node = Node() + # node.start_position.offset = 100 + # self.assertTrue(any(filter(node, "//*[@startOffset=100]"))) + # self.assertFalse(any(filter(node, "//*[@startOffset=10]"))) + # + # def testFilterStartLine(self): + # node = Node() + # node.start_position.line = 10 + # self.assertTrue(any(filter(node, "//*[@startLine=10]"))) + # self.assertFalse(any(filter(node, "//*[@startLine=100]"))) + # + # def testFilterStartCol(self): + # node = Node() + # node.start_position.col = 50 + # self.assertTrue(any(filter(node, "//*[@startCol=50]"))) + # self.assertFalse(any(filter(node, "//*[@startCol=5]"))) + # + # def testFilterEndOffset(self): + # node = Node() + # node.end_position.offset = 100 + # self.assertTrue(any(filter(node, "//*[@endOffset=100]"))) + # self.assertFalse(any(filter(node, "//*[@endOffset=10]"))) + # + # def testFilterEndLine(self): + # node = Node() + # node.end_position.line = 10 + # self.assertTrue(any(filter(node, "//*[@endLine=10]"))) + # self.assertFalse(any(filter(node, "//*[@endLine=100]"))) + # + # def testFilterEndCol(self): + # node = Node() + # node.end_position.col = 50 + # self.assertTrue(any(filter(node, "//*[@endCol=50]"))) + # self.assertFalse(any(filter(node, "//*[@endCol=5]"))) + # + # def testFilterBool(self): + # node = Node() + # self.assertTrue(filter_bool(node, "boolean(//*[@startOffset or @endOffset])")) + # self.assertFalse(filter_bool(node, "boolean(//*[@blah])")) + # + # def testFilterNumber(self): + # node = Node() + # node.children.extend([Node(), Node(), Node()]) + # self.assertEqual(int(filter_number(node, "count(//*)")), 4) + # + # def testFilterString(self): + # node = Node() + # node.internal_type = "test" + # self.assertEqual(filter_string(node, "name(//*[1])"), "test") + # + # def testFilterBadQuery(self): + # node = Node() + # self.assertRaises(RuntimeError, filter, node, "//*roleModule") + # + # def testFilterBadType(self): + # node = Node() + # node.end_position.col = 50 + # self.assertRaises(RuntimeError, filter, node, "boolean(//*[@startPosition or @endPosition])") + # + # def testRoleIdName(self): + # self.assertEqual(role_id(role_name(1)), 1) + # self.assertEqual(role_name(role_id("IDENTIFIER")), "IDENTIFIER") + # + # def _itTestTree(self): + # root = Node() + # root.internal_type = 'root' + # root.start_position.offset = 0 + # root.start_position.line = 0 + # root.start_position.col = 1 + # + # son1 = Node() + # son1.internal_type = 'son1' + # son1.start_position.offset = 1 + # + # son1_1 = Node() + # son1_1.internal_type = 'son1_1' + # son1_1.start_position.offset = 10 + # + # son1_2 = Node() + # son1_2.internal_type = 'son1_2' + # son1_2.start_position.offset = 10 + # + # son1.children.extend([son1_1, son1_2]) + # + # son2 = Node() + # son2.internal_type = 'son2' + # son2.start_position.offset = 100 + # + # son2_1 = Node() + # son2_1.internal_type = 'son2_1' + # son2_1.start_position.offset = 5 + # + # son2_2 = Node() + # son2_2.internal_type = 'son2_2' + # son2_2.start_position.offset = 15 + # + # son2.children.extend([son2_1, son2_2]) + # root.children.extend([son1, son2]) + # + # return root + # + # def testIteratorPreOrder(self): + # root = self._itTestTree() + # it = iterator(root, TreeOrder.PRE_ORDER) + # self.assertIsNotNone(it) + # expanded = [node.internal_type for node in it] + # self.assertListEqual(expanded, ['root', 'son1', 'son1_1', 'son1_2', + # 'son2', 'son2_1', 'son2_2']) + # + # def testIteratorPostOrder(self): + # root = self._itTestTree() + # it = iterator(root, TreeOrder.POST_ORDER) + # self.assertIsNotNone(it) + # expanded = [node.internal_type for node in it] + # self.assertListEqual(expanded, ['son1_1', 'son1_2', 'son1', 'son2_1', + # 'son2_2', 'son2', 'root']) + # + # def testIteratorLevelOrder(self): + # root = self._itTestTree() + # it = iterator(root, TreeOrder.LEVEL_ORDER) + # self.assertIsNotNone(it) + # expanded = [node.internal_type for node in it] + # self.assertListEqual(expanded, ['root', 'son1', 'son2', 'son1_1', + # 'son1_2', 'son2_1', 'son2_2']) + # + # def testIteratorPositionOrder(self): + # root = self._itTestTree() + # it = iterator(root, TreeOrder.POSITION_ORDER) + # self.assertIsNotNone(it) + # expanded = [node.internal_type for node in it] + # self.assertListEqual(expanded, ['root', 'son1', 'son2_1', 'son1_1', + # 'son1_2', 'son2_2', 'son2']) + # + def _validate_ctx(self, ctx): + import bblfsh + self.assertIsNotNone(ctx) + self.assertIsInstance(ctx, bblfsh.result_context.ResultContext) + self.assertIsInstance(ctx.uast, bytes) + + # def testFilterInsideIter(self): + # root = self.client.parse(__file__).uast + # it = iterator(root, TreeOrder.PRE_ORDER) + # self.assertIsNotNone(it) + # for n in it: + # filter(n, "//*[@roleIdentifier]") + # + # def testItersMixingIterations(self): + # root = self.client.parse(__file__).uast + # it = iterator(root, TreeOrder.PRE_ORDER) + # next(it); next(it); next(it) + # n = next(it) + # it2 = iterator(n, TreeOrder.PRE_ORDER) + # next(it2) + # assert(next(it) == next(it2)) + # + # def testManyFilters(self): + # root = self.client.parse(__file__).uast + # root.properties['k1'] = 'v2' + # root.properties['k2'] = 'v1' + # + # before = resource.getrusage(resource.RUSAGE_SELF) + # for _ in range(500): + # filter(root, "//*[@roleIdentifier]") + # + # after = resource.getrusage(resource.RUSAGE_SELF) + # + # # Check that memory usage has not doubled after running the filter + # self.assertLess(after[2] / before[2], 2.0) + # + # def testManyParses(self): + # before = resource.getrusage(resource.RUSAGE_SELF) + # for _ in range(100): + # root = self.client.parse(__file__).uast + # root.properties['k1'] = 'v2' + # root.properties['k2'] = 'v1' + # + # after = resource.getrusage(resource.RUSAGE_SELF) + # + # # Check that memory usage has not doubled after running the parse+filter + # self.assertLess(after[2] / before[2], 2.0) + # + # def testManyParsersAndFilters(self): + # before = resource.getrusage(resource.RUSAGE_SELF) + # for _ in range(100): + # root = self.client.parse(__file__).uast + # root.properties['k1'] = 'v2' + # root.properties['k2'] = 'v1' + # + # filter(root, "//*[@roleIdentifier]") + # + # after = resource.getrusage(resource.RUSAGE_SELF) + # + # # Check that memory usage has not doubled after running the parse+filter + # self.assertLess(after[2] / before[2], 2.0) + # + # def testSupportedLanguages(self): + # res = self.client.supported_languages() + # self.assertGreater(len(res), 0) + # for l in res: + # for key in ('language', 'version', 'status', 'features'): + # print(key) + # self.assertTrue(hasattr(l, key)) + # self.assertIsNotNone(getattr(l, key)) + + def _validate_filter(self, ctx): + def assert_strnode(n: Node, expected: str) -> None: + self.assertEqual(n.get(), expected) + self.assertIsInstance(n.get_str(), str) + self.assertEqual(n.get_str(), expected) + + # print(ctx) + it = ctx.filter("//uast:RuntimeImport/Path/uast:Alias/Name/uast:Identifier/Name") + self.assertIsInstance(it, NodeIterator) + # wtf = next(it) + # print(type(wtf)) + # print(wtf) + + assert_strnode(next(it), "os") + assert_strnode(next(it), "resource") + assert_strnode(next(it), "unittest") + assert_strnode(next(it), "docker") + assert_strnode(next(it), "bblfsh") + self.assertRaises(StopIteration, next(it)) + + # self.assertEqual(next(results).token, "0") + # self.assertEqual(next(results).token, "1") + # self.assertEqual(next(results).token, "100") + # self.assertEqual(next(results).token, "10") + + +if __name__ == "__main__": + unittest.main() diff --git a/bblfsh/result_context.py b/bblfsh/result_context.py index 2b63e00..f985074 100644 --- a/bblfsh/result_context.py +++ b/bblfsh/result_context.py @@ -17,53 +17,61 @@ class NotNodeIterationException(Exception): pass -ResultMultiType = t.NewType("ResultType", t.Union[dict, int, float, bool, str]) +# ResultMultiType = t.NewType("ResultMultiType", t.Union[dict, int, float, bool, str]) +ResultMultiType = t.Union[dict, int, float, bool, str, None] class Node: def __init__(self, node_ext: NodeExt) -> None: self._node_ext = node_ext - self._loaded_node: t.Optional[ResultMultiType] = None + self._loaded_node: ResultMultiType = None - def _ensure_load(self): + def _ensure_load(self) -> None: if self._loaded_node is None: self._loaded_node = self._node_ext.load() - def __str__(self): + def __str__(self) -> str: return str(self.get()) - def __repr__(self): + def __repr__(self) -> str: return repr(self.get()) def get(self) -> ResultMultiType: self._ensure_load() return self._loaded_node - def _get_typed(self, type_: type) -> ResultMultiType: + def _get_typed(self, type_: t.Union[type, t.List[type]]) -> ResultMultiType: self._ensure_load() - if not isinstance(self._loaded_node, type_): + + if not isinstance(type_, list) and not isinstance(type_, tuple): + type_list = [type_] + else: + type_list = type_ + + if type(self._loaded_node) not in type_list: raise ResultTypeException("Expected {} result, but type is '{}'" - .format(type_.__name__, type(self._loaded_node))) + .format(str(type_list), type(self._loaded_node))) + return self._loaded_node def get_bool(self) -> bool: - return self._get_typed(bool) + return t.cast(bool, self._get_typed(bool)) def get_float(self) -> float: - res = self._get_typed(float) + res: ResultMultiType = self._get_typed([float, int]) if isinstance(res, int): res = float(res) - return res + return t.cast(float, res) def get_int(self) -> int: - return self._get_typed(int) + return t.cast(int, self._get_typed(int)) def get_str(self) -> str: - return self._get_typed(str) + return t.cast(str, self._get_typed(str)) def get_dict(self) -> dict: - return self._get_typed(dict) + return t.cast(dict, self._get_typed(dict)) - def iterate(self, order) -> 'NodeIterator': + def iterate(self, order: int) -> 'NodeIterator': if not isinstance(self._node_ext, NodeExt): raise NotNodeIterationException("Cannot iterate over leaf of type '{}'" .format(type(self._node_ext))) @@ -81,7 +89,7 @@ def __iter__(self) -> 'NodeIterator': def __next__(self) -> Node: return Node(next(self._iter_ext)) - def iterate(self, order) -> 'NodeIterator': + def iterate(self, order: int) -> 'NodeIterator': TreeOrder.check_order(order) return NodeIterator(iterator(next(self._iter_ext), order)) @@ -95,7 +103,6 @@ def __init__(self, grpc_response: ParseResponse) -> None: self._response = grpc_response self._ctx = decode(grpc_response.uast, format=0) - self.language = grpc_response.language def filter(self, query: str) -> NodeIterator: return NodeIterator(self._ctx.filter(query)) @@ -107,6 +114,14 @@ def iterate(self, order: int) -> NodeIterator: TreeOrder.check_order(order) return NodeIterator(iterator(self._ctx.root(), order)) + @property + def language(self) -> str: + return self._response.language + + @property + def uast(self) -> t.Any: + return self._response.uast + def __str__(self) -> str: return str(self.get_all()) diff --git a/bblfsh/test.py b/bblfsh/test.py index 27e7933..0754663 100644 --- a/bblfsh/test.py +++ b/bblfsh/test.py @@ -1,310 +1,345 @@ -import os import resource +import typing as t import unittest import docker -from bblfsh import (BblfshClient, filter, iterator, role_id, - role_name, Node, ParseResponse, TreeOrder, filter_bool, - filter_number, filter_string) +from bblfsh import (BblfshClient, iterator, TreeOrder, + ModeDict, role_id, role_name) from bblfsh.launcher import ensure_bblfsh_is_running from bblfsh.client import NonUTF8ContentException +from bblfsh.result_context import (Node, NodeIterator, + ResultContext, ResultTypeException) +from bblfsh.pyuast import uast class BblfshTests(unittest.TestCase): BBLFSH_SERVER_EXISTED = None + fixtures_file = "fixtures/test.py" @classmethod - def setUpClass(cls): + def setUpClass(cls: t.Any) -> None: cls.BBLFSH_SERVER_EXISTED = ensure_bblfsh_is_running() @classmethod - def tearDownClass(cls): + def tearDownClass(cls: t.Any) -> None: if not cls.BBLFSH_SERVER_EXISTED: client = docker.from_env(version="auto") client.containers.get("bblfshd").remove(force=True) client.api.close() - def setUp(self): + def setUp(self) -> None: self.client = BblfshClient("0.0.0.0:9432") - def testVersion(self): + def _parse_fixture(self) -> ResultContext: + ctx = self.client.parse(self.fixtures_file) + self._validate_ctx(ctx) + + return ctx + + def testVersion(self) -> None: version = self.client.version() self.assertTrue(hasattr(version, "version")) self.assertTrue(version.version) self.assertTrue(hasattr(version, "build")) self.assertTrue(version.build) - def testNativeParse(self): - reply = self.client.native_parse(__file__) - assert(reply.ast) + def testNativeParse(self) -> None: + ctx = self.client.parse(self.fixtures_file, mode=ModeDict["NATIVE"]) + self._validate_ctx(ctx) + self.assertIsNotNone(ctx) - def testNonUTF8ParseError(self): + it = ctx.filter("//*[@ast_type='NoopLine']") + self.assertIsNotNone(it) + self.assertIsInstance(it, NodeIterator) + res = list(it) + self.assertGreater(len(res), 1) + for i in res: + t = i.get_dict().get("ast_type") + self.assertIsNotNone(t) + self.assertEqual(t, "NoopLine") + + def testNonUTF8ParseError(self) -> None: self.assertRaises(NonUTF8ContentException, self.client.parse, "", "Python", b"a = '\x80abc'") - def testUASTDefaultLanguage(self): - res = self.client.parse(__file__) - print(res) - self._validate_resp(self.client.parse(__file__)) + def testUASTDefaultLanguage(self) -> None: + ctx = self._parse_fixture() + self.assertEqual(ctx.language, "python") - def testUASTPython(self): - self._validate_resp(self.client.parse(__file__, language="Python")) + def testUASTWithLanguage(self) -> None: + ctx = self.client.parse(self.fixtures_file, language="Python") + self._validate_ctx(ctx) + self.assertEqual(ctx.language, "python") - def testUASTFileContents(self): - with open(__file__, "rb") as fin: + def testUASTFileContents(self) -> None: + with open(self.fixtures_file, "r") as fin: contents = fin.read() - resp = self.client.parse("file.py", contents=contents) - self._validate_resp(resp) - self._validate_filter(resp) - - def testBrokenFilter(self): - self.assertRaises(RuntimeError, filter, 0, "foo") - - def testFilterInternalType(self): - node = Node() - node.internal_type = 'a' - self.assertTrue(any(filter(node, "//a"))) - self.assertFalse(any(filter(node, "//b"))) - - def testFilterToken(self): - node = Node() - node.token = 'a' - self.assertTrue(any(filter(node, "//*[@token='a']"))) - self.assertFalse(any(filter(node, "//*[@token='b']"))) - - def testFilterRoles(self): - node = Node() - node.roles.append(1) - self.assertTrue(any(filter(node, "//*[@roleIdentifier]"))) - self.assertFalse(any(filter(node, "//*[@roleQualified]"))) - - def testFilterProperties(self): - node = Node() - node.properties['k1'] = 'v2' - node.properties['k2'] = 'v1' - self.assertTrue(any(filter(node, "//*[@k2='v1']"))) - self.assertTrue(any(filter(node, "//*[@k1='v2']"))) - self.assertFalse(any(filter(node, "//*[@k1='v1']"))) - - def testFilterStartOffset(self): - node = Node() - node.start_position.offset = 100 - self.assertTrue(any(filter(node, "//*[@startOffset=100]"))) - self.assertFalse(any(filter(node, "//*[@startOffset=10]"))) - - def testFilterStartLine(self): - node = Node() - node.start_position.line = 10 - self.assertTrue(any(filter(node, "//*[@startLine=10]"))) - self.assertFalse(any(filter(node, "//*[@startLine=100]"))) - - def testFilterStartCol(self): - node = Node() - node.start_position.col = 50 - self.assertTrue(any(filter(node, "//*[@startCol=50]"))) - self.assertFalse(any(filter(node, "//*[@startCol=5]"))) - - def testFilterEndOffset(self): - node = Node() - node.end_position.offset = 100 - self.assertTrue(any(filter(node, "//*[@endOffset=100]"))) - self.assertFalse(any(filter(node, "//*[@endOffset=10]"))) - - def testFilterEndLine(self): - node = Node() - node.end_position.line = 10 - self.assertTrue(any(filter(node, "//*[@endLine=10]"))) - self.assertFalse(any(filter(node, "//*[@endLine=100]"))) - - def testFilterEndCol(self): - node = Node() - node.end_position.col = 50 - self.assertTrue(any(filter(node, "//*[@endCol=50]"))) - self.assertFalse(any(filter(node, "//*[@endCol=5]"))) - - def testFilterBool(self): - node = Node() - self.assertTrue(filter_bool(node, "boolean(//*[@startOffset or @endOffset])")) - self.assertFalse(filter_bool(node, "boolean(//*[@blah])")) - - def testFilterNumber(self): - node = Node() - node.children.extend([Node(), Node(), Node()]) - self.assertEqual(int(filter_number(node, "count(//*)")), 4) - - def testFilterString(self): - node = Node() - node.internal_type = "test" - self.assertEqual(filter_string(node, "name(//*[1])"), "test") - - def testFilterBadQuery(self): - node = Node() - self.assertRaises(RuntimeError, filter, node, "//*roleModule") - - def testFilterBadType(self): - node = Node() - node.end_position.col = 50 - self.assertRaises(RuntimeError, filter, node, "boolean(//*[@startPosition or @endPosition])") - - def testRoleIdName(self): + + ctx = self.client.parse("file.py", contents=contents) + self._validate_ctx(ctx) + + def assert_strnode(n: Node, expected: str) -> None: + self.assertEqual(n.get(), expected) + self.assertIsInstance(n.get_str(), str) + self.assertEqual(n.get_str(), expected) + + it = ctx.filter("//uast:RuntimeImport/Path/uast:Alias/Name/uast:Identifier/Name") + self.assertIsInstance(it, NodeIterator) + + assert_strnode(next(it), "os") + assert_strnode(next(it), "resource") + assert_strnode(next(it), "unittest") + assert_strnode(next(it), "docker") + assert_strnode(next(it), "bblfsh") + self.assertRaises(StopIteration, next, it) + + def testBrokenFilter(self) -> None: + ctx = self._parse_fixture() + + self.assertRaises(RuntimeError, ctx.filter, "dsdfkj32423#$@#$") + + # FIXME: doesnt work? + # def testFilterToken(self): + # ctx = self._parse_fixture() + # it = ctx.filter("//*[@token='else']/@token") + # # Problem: returns the node containing the @token, not the @token string ("else") + # first = next(it).get_str() + # self.assertEqual(first, "with") + + def testFilterRoles(self) -> None: + ctx = self._parse_fixture() + it = ctx.filter("//*[@role='Identifier']") + self.assertIsInstance(it, NodeIterator) + + l = list(it) + self.assertGreater(len(l), 0) + + it = ctx.filter("//*[@role='Friend']") + self.assertIsInstance(it, NodeIterator) + l = list(it) + self.assertEqual(len(l), 0) + + def testFilterProperties(self) -> None: + ctx = uast() + obj = {"k1":"v1", "k2": "v2"} + self.assertTrue(any(ctx.filter("/*[@k1='v1']", obj))) + self.assertTrue(any(ctx.filter("/*[@k2='v2']", obj))) + self.assertFalse(any(ctx.filter("/*[@k2='v1']", obj))) + self.assertFalse(any(ctx.filter("/*[@k1='v2']", obj))) + + def testFilterStartOffset(self) -> None: + ctx = self._parse_fixture() + self.assertTrue(any(ctx.filter("//uast:Positions/start/uast:Position[@offset=11749]"))) + self.assertFalse(any(ctx.filter("//uast:Positions/start/uast:Position[@offset=99999]"))) + + def testFilterStartLine(self) -> None: + ctx = self._parse_fixture() + self.assertTrue(any(ctx.filter("//uast:Positions/start/uast:Position[@line=295]"))) + self.assertFalse(any(ctx.filter("//uast:Positions/start/uast:Position[@line=99999]"))) + + def testFilterStartCol(self) -> None: + ctx = self._parse_fixture() + self.assertTrue(any(ctx.filter("//uast:Positions/start/uast:Position[@col=42]"))) + self.assertFalse(any(ctx.filter("//uast:Positions/start/uast:Position[@col=99999]"))) + + def testFilterEndOffset(self) -> None: + ctx = self._parse_fixture() + self.assertTrue(any(ctx.filter("//uast:Positions/end/uast:Position[@offset=11757]"))) + self.assertFalse(any(ctx.filter("//uast:Positions/end/uast:Position[@offset=99999]"))) + + def testFilterEndLine(self) -> None: + ctx = self._parse_fixture() + self.assertTrue(any(ctx.filter("//uast:Positions/end/uast:Position[@line=321]"))) + self.assertFalse(any(ctx.filter("//uast:Positions/end/uast:Position[@line=99999]"))) + + def testFilterEndCol(self) -> None: + ctx = self._parse_fixture() + self.assertTrue(any(ctx.filter("//uast:Positions/end/uast:Position[@col=49]"))) + self.assertFalse(any(ctx.filter("//uast:Positions/end/uast:Position[@col=99999]"))) + + def testFilterBool(self) -> None: + ctx = self._parse_fixture() + self.assertTrue(ctx.filter("boolean(//uast:Positions/end/uast:Position[@col=49])")) + self.assertTrue(next(ctx.filter("boolean(//uast:Positions/end/uast:Position[@col=49])")).get()) + self.assertTrue(next(ctx.filter("boolean(//uast:Positions/end/uast:Position[@col=49])")).get_bool()) + + self.assertFalse(next(ctx.filter("boolean(//uast:Positions/end/uast:Position[@col=9999])")).get()) + self.assertFalse(next(ctx.filter("boolean(//uast:Positions/end/uast:Position[@col=9999])")).get_bool()) + + def testFilterNumber(self) -> None: + ctx = self._parse_fixture() + self.assertEqual(next(ctx.filter("count(//uast:Positions/end/uast:Position[@col=49])")).get(), 2) + self.assertEqual(next(ctx.filter("count(//uast:Positions/end/uast:Position[@col=49])")).get_int(), 2) + self.assertEqual(next(ctx.filter("count(//uast:Positions/end/uast:Position[@col=49])")).get_float(), 2.0) + + def testFilterString(self) -> None: + ctx = self._parse_fixture() + self.assertEqual(next(ctx.filter("name(//uast:Positions)")).get(), "uast:Positions") + self.assertEqual(next(ctx.filter("name(//uast:Positions)")).get_str(), "uast:Positions") + + def testFilterBadQuery(self) -> None: + ctx = uast() + self.assertRaises(RuntimeError, ctx.filter, "//[@roleModule]", {}) + + def testFilterBadType(self) -> None: + ctx = self._parse_fixture() + res = next(ctx.filter("count(//uast:Positions/end/uast:Position[@col=49])")) + self.assertRaises(ResultTypeException, res.get_str) + + def testRoleIdName(self) -> None: self.assertEqual(role_id(role_name(1)), 1) self.assertEqual(role_name(role_id("IDENTIFIER")), "IDENTIFIER") - def _itTestTree(self): - root = Node() - root.internal_type = 'root' - root.start_position.offset = 0 - root.start_position.line = 0 - root.start_position.col = 1 - - son1 = Node() - son1.internal_type = 'son1' - son1.start_position.offset = 1 - - son1_1 = Node() - son1_1.internal_type = 'son1_1' - son1_1.start_position.offset = 10 - - son1_2 = Node() - son1_2.internal_type = 'son1_2' - son1_2.start_position.offset = 10 - - son1.children.extend([son1_1, son1_2]) - - son2 = Node() - son2.internal_type = 'son2' - son2.start_position.offset = 100 - - son2_1 = Node() - son2_1.internal_type = 'son2_1' - son2_1.start_position.offset = 5 - - son2_2 = Node() - son2_2.internal_type = 'son2_2' - son2_2.start_position.offset = 15 - - son2.children.extend([son2_1, son2_2]) - root.children.extend([son1, son2]) + @staticmethod + def _itTestTree() -> dict: + def set_position(node: dict, start_offset: int, start_line: int, start_col: int, + end_offset: int, end_line: int, end_col: int) -> None: + node["@pos"] = { + "@type": "uast:Positions", + "start": { + "@type": "uast:Position", + "offset": start_offset, + "line": start_line, + "col": start_col + }, + "end": { + "@type": "uast:Position", + "offset": end_offset, + "line": end_line, + "col": end_col + } + } + root = {"@type": "root"} + set_position(root, 0,1,1, 1,1,2) + + son1 = {"@type": "son1"} + set_position(son1, 2,2,2, 3,2,3) + + son1_1 = {"@type": "son1_1"} + set_position(son1_1, 10,10,1, 12,2,2) + + son1_2 = {"@type": "son1_2"} + set_position(son1_2, 10,10,1, 12,2,2) + + son1["children"] = [son1_1, son1_2] + + son2 = {"@type": "son2"} + set_position(son2, 100,100,1, 101,100,2) + + son2_1 = {"@type": "son2_1"} + set_position(son2_1, 5,5,1, 6,5,2) + + son2_2 = {"@type": "son2_2"} + set_position(son2_2, 15,15,1, 16,15,2) + + son2["children"] = [son2_1, son2_2] + root["children"] = [son1, son2] return root - def testIteratorPreOrder(self): + @staticmethod + def _get_nodetypes(iterator: NodeIterator) -> t.List[str]: + return [n["@type"] for n in + filter(lambda x: isinstance(x, dict), iterator)] + + def testIteratorPreOrder(self) -> None: root = self._itTestTree() it = iterator(root, TreeOrder.PRE_ORDER) self.assertIsNotNone(it) - expanded = [node.internal_type for node in it] + expanded = self._get_nodetypes(it) self.assertListEqual(expanded, ['root', 'son1', 'son1_1', 'son1_2', 'son2', 'son2_1', 'son2_2']) - def testIteratorPostOrder(self): + def testIteratorPostOrder(self) -> None: root = self._itTestTree() it = iterator(root, TreeOrder.POST_ORDER) self.assertIsNotNone(it) - expanded = [node.internal_type for node in it] + expanded = self._get_nodetypes(it) self.assertListEqual(expanded, ['son1_1', 'son1_2', 'son1', 'son2_1', 'son2_2', 'son2', 'root']) - def testIteratorLevelOrder(self): + def testIteratorLevelOrder(self) -> None: root = self._itTestTree() it = iterator(root, TreeOrder.LEVEL_ORDER) self.assertIsNotNone(it) - expanded = [node.internal_type for node in it] + expanded = self._get_nodetypes(it) self.assertListEqual(expanded, ['root', 'son1', 'son2', 'son1_1', 'son1_2', 'son2_1', 'son2_2']) - def testIteratorPositionOrder(self): + def testIteratorPositionOrder(self) -> None: root = self._itTestTree() it = iterator(root, TreeOrder.POSITION_ORDER) self.assertIsNotNone(it) - expanded = [node.internal_type for node in it] + expanded = self._get_nodetypes(it) self.assertListEqual(expanded, ['root', 'son1', 'son2_1', 'son1_1', 'son1_2', 'son2_2', 'son2']) - def _validate_resp(self, resp): - self.assertIsNotNone(resp) - self.assertEqual(type(resp).DESCRIPTOR.full_name, - ParseResponse.DESCRIPTOR.full_name) - self.assertEqual(len(resp.errors), 0) - # self.assertIsInstance() does not work - must be some metaclass magic - # self.assertIsInstance(resp.uast, Node) - - # Sometimes its fully qualified, sometimes is just "Node"... ditto - self.assertTrue(resp.uast.__class__.__name__.endswith('Node')) - - def testFilterInsideIter(self): - root = self.client.parse(__file__).uast - it = iterator(root, TreeOrder.PRE_ORDER) - self.assertIsNotNone(it) - for n in it: - filter(n, "//*[@roleIdentifier]") - - def testItersMixingIterations(self): - root = self.client.parse(__file__).uast - it = iterator(root, TreeOrder.PRE_ORDER) + def _validate_ctx(self, ctx: ResultContext) -> None: + import bblfsh + self.assertIsNotNone(ctx) + self.assertIsInstance(ctx, bblfsh.result_context.ResultContext) + self.assertIsInstance(ctx.uast, bytes) + + def testFilterInsideIter(self) -> None: + ctx = self._parse_fixture() + c2 = uast() + for n in ctx.iterate(TreeOrder.PRE_ORDER): + c2.filter("//uast:Positions", n) + + def testItersMixingIterations(self) -> None: + ctx = self._parse_fixture() + it = ctx.iterate(TreeOrder.PRE_ORDER) next(it); next(it); next(it) + n = next(it) - it2 = iterator(n, TreeOrder.PRE_ORDER) + it2 = n.iterate(TreeOrder.PRE_ORDER) next(it2) - assert(next(it) == next(it2)) + a = next(it).get() + b = next(it2).get() + self.assertListEqual(a, b) - def testManyFilters(self): - root = self.client.parse(__file__).uast - root.properties['k1'] = 'v2' - root.properties['k2'] = 'v1' + def testManyFilters(self) -> None: + ctx = self._parse_fixture() before = resource.getrusage(resource.RUSAGE_SELF) for _ in range(500): - filter(root, "//*[@roleIdentifier]") + ctx.filter("//*[@role='Identifier']") after = resource.getrusage(resource.RUSAGE_SELF) - # Check that memory usage has not doubled after running the filter + # Check that memory usage has not doubled self.assertLess(after[2] / before[2], 2.0) - def testManyParses(self): + def testManyParses(self) -> None: before = resource.getrusage(resource.RUSAGE_SELF) for _ in range(100): - root = self.client.parse(__file__).uast - root.properties['k1'] = 'v2' - root.properties['k2'] = 'v1' + self._parse_fixture() after = resource.getrusage(resource.RUSAGE_SELF) - # Check that memory usage has not doubled after running the parse+filter + # Check that memory usage has not doubled self.assertLess(after[2] / before[2], 2.0) - def testManyParsersAndFilters(self): + def testManyParsersAndFilters(self) -> None: before = resource.getrusage(resource.RUSAGE_SELF) for _ in range(100): - root = self.client.parse(__file__).uast - root.properties['k1'] = 'v2' - root.properties['k2'] = 'v1' - - filter(root, "//*[@roleIdentifier]") + ctx = self.client.parse(self.fixtures_file) + ctx.filter("//*[@role='Identifier']") after = resource.getrusage(resource.RUSAGE_SELF) - # Check that memory usage has not doubled after running the parse+filter + # Check that memory usage has not doubled self.assertLess(after[2] / before[2], 2.0) - def testSupportedLanguages(self): + def testSupportedLanguages(self) -> None: res = self.client.supported_languages() self.assertGreater(len(res), 0) for l in res: for key in ('language', 'version', 'status', 'features'): - print(key) self.assertTrue(hasattr(l, key)) self.assertIsNotNone(getattr(l, key)) - def _validate_filter(self, resp): - results = filter(resp.uast, "//Num") - self.assertIsInstance(resp.uast, Node) - self.assertEqual(next(results).token, "0") - self.assertEqual(next(results).token, "1") - self.assertEqual(next(results).token, "100") - self.assertEqual(next(results).token, "10") - if __name__ == "__main__": unittest.main() diff --git a/setup.py b/setup.py index 26bf45e..0509493 100644 --- a/setup.py +++ b/setup.py @@ -302,7 +302,6 @@ def main(): static_libraries = ["{}/libuast".format(static_lib_dir)] if sys.platform == 'win32': - # FIXME: untested! libraries.extend(static_libraries) libraries.extend(["legacy_stdio_definitions", "winmm", "ws2_32"]) extra_objects = [] From 75170a64aebf7778fb7ee871f5d82b3f715dd46a Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Tue, 11 Dec 2018 14:43:15 +0100 Subject: [PATCH 36/48] Uncommented failed test Signed-off-by: Juanjo Alvarez --- bblfsh/test.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bblfsh/test.py b/bblfsh/test.py index 0754663..895c5b2 100644 --- a/bblfsh/test.py +++ b/bblfsh/test.py @@ -100,12 +100,12 @@ def testBrokenFilter(self) -> None: self.assertRaises(RuntimeError, ctx.filter, "dsdfkj32423#$@#$") # FIXME: doesnt work? - # def testFilterToken(self): - # ctx = self._parse_fixture() - # it = ctx.filter("//*[@token='else']/@token") - # # Problem: returns the node containing the @token, not the @token string ("else") - # first = next(it).get_str() - # self.assertEqual(first, "with") + def testFilterToken(self): + ctx = self._parse_fixture() + it = ctx.filter("//*[@token='else']/@token") + # Problem: returns the node containing the @token, not the @token string ("else") + first = next(it).get_str() + self.assertEqual(first, "else") def testFilterRoles(self) -> None: ctx = self._parse_fixture() From c4fd5be9ed5843e7f102d19fd6d6ba36cacfe181 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Tue, 11 Dec 2018 17:25:04 +0100 Subject: [PATCH 37/48] Enabled unnitesting in travis Signed-off-by: Juanjo Alvarez --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index f47d82c..a268c4b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,6 +17,7 @@ install: - pip3 install -r requirements.txt - python3 setup.py --getdeps --log - pip3 install . --upgrade + - cd bblfsh && python3 -m unittest discover - if [[ -z "$TRAVIS_TAG" ]]; then exit 0; fi - if [[ $TRAVIS_PYTHON_VERSION != '3.6' ]]; then exit 0; fi # disable double uploads to pypi - echo "[distutils]" > .pypirc @@ -28,6 +29,5 @@ install: - HOME=. python setup.py sdist upload script: - python3 setup.py build_ext -i - - python3 -m unittest discover . notifications: email: false From 753efb43665e282ff115d10a79b3bec1a4ef1ad3 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Tue, 11 Dec 2018 17:29:12 +0100 Subject: [PATCH 38/48] Run docker and install python driver from travis Signed-off-by: Juanjo Alvarez --- .travis.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.travis.yml b/.travis.yml index a268c4b..09c87c0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,8 @@ language: python sudo: true dist: xenial +env: +- BBLFSHD_VERSION=v2.9.1 BBLFSH_PYTHON_VERSION=v2.3.0 services: - docker cache: @@ -11,6 +13,8 @@ python: - "3.6" - "3.7" install: + - docker run --privileged -d -p 9432:9432 --name bblfshd bblfsh/bblfshd:$BBLFSHD_VERSION + - docker exec bblfshd bblfshctl driver install bblfsh/python-driver:$BBLFSH_PYTHON_VERSION - wget https://github.com/bblfsh/client-python/releases/download/v2.2.1/protobuf-python_3.4.1-1_amd64.deb - sudo dpkg -i protobuf-python_3.4.1-1_amd64.deb - pip3 install --upgrade pip From 9876503376f9cef903bb77a38b8f98441d0076b9 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Wed, 12 Dec 2018 09:52:32 +0100 Subject: [PATCH 39/48] Commented out the node afected by SDK issue 340 Signed-off-by: Juanjo Alvarez --- bblfsh/test.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/bblfsh/test.py b/bblfsh/test.py index 895c5b2..b6559a8 100644 --- a/bblfsh/test.py +++ b/bblfsh/test.py @@ -99,13 +99,13 @@ def testBrokenFilter(self) -> None: self.assertRaises(RuntimeError, ctx.filter, "dsdfkj32423#$@#$") - # FIXME: doesnt work? - def testFilterToken(self): - ctx = self._parse_fixture() - it = ctx.filter("//*[@token='else']/@token") - # Problem: returns the node containing the @token, not the @token string ("else") - first = next(it).get_str() - self.assertEqual(first, "else") + # FIXME: Uncomment once https://github.com/bblfsh/sdk/issues/340 is fixed + # def testFilterToken(self): + # ctx = self._parse_fixture() + # it = ctx.filter("//*[@token='else']/@token") + # # Problem: returns the node containing the @token, not the @token string ("else") + # first = next(it).get_str() + # self.assertEqual(first, "else") def testFilterRoles(self) -> None: ctx = self._parse_fixture() From 7ad8c6f38c66ec1fb2ee4a45c226e7de16b5d52b Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Wed, 12 Dec 2018 09:56:12 +0100 Subject: [PATCH 40/48] Remove Python 3.5 from Travis Signed-off-by: Juanjo Alvarez --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 09c87c0..62d970f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,6 @@ cache: directories: - $HOME/.cache/pip python: - - "3.5" - "3.6" - "3.7" install: From a2752b734189c624a81f8465eebd8f4ab4a263b8 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Wed, 12 Dec 2018 15:43:44 +0100 Subject: [PATCH 41/48] Use range for grpcio and grpciotools Signed-off-by: Juanjo Alvarez --- requirements.txt | 4 ++-- setup.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 3d8bf56..a62b677 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.13.0 -grpcio-tools==1.13.0 +grpcio>=1.13.0 +grpcio-tools>=1.13.0 docker>=2.0,<3.0 protobuf>=3.4.0,<4.0 diff --git a/setup.py b/setup.py index 0509493..6d13481 100644 --- a/setup.py +++ b/setup.py @@ -331,7 +331,7 @@ def main(): packages=find_packages(), exclude=["bblfsh/test.py"], keywords=["babelfish", "uast"], - install_requires=["grpcio==1.13.0", "grpcio-tools==1.13.0", + install_requires=["grpcio>=1.13.0", "grpcio-tools>=1.13.0", "docker", "protobuf>=3.4.0"], package_data={"": ["LICENSE", "README.md"]}, ext_modules=[libuast_module], From a99f5be4a3509b113328c0086f5dd01d19954bdc Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Wed, 12 Dec 2018 15:56:09 +0100 Subject: [PATCH 42/48] Fixed some of @bzz feedback from review Signed-off-by: Juanjo Alvarez --- bblfsh/pyuast.cc | 215 ++++++++++++++++++++--------------------------- 1 file changed, 92 insertions(+), 123 deletions(-) diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index 7226808..9e0acef 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -49,7 +49,7 @@ class ContextExt; typedef struct { PyObject_HEAD - ContextExt *ctx; + ContextExt *ctx; NodeHandle handle; } PyNodeExt; @@ -145,43 +145,43 @@ extern "C" { static PyTypeObject PyUastIterExtType = { PyVarObject_HEAD_INIT(nullptr, 0) - "pyuast.IteratorExt", // tp_name - sizeof(PyUastIterExt), // tp_basicsize - 0, // tp_itemsize - PyUastIterExt_dealloc, // tp_dealloc - 0, // tp_print - 0, // tp_getattr - 0, // tp_setattr - 0, // tp_reserved - 0, // tp_repr - 0, // tp_as_number - 0, // tp_as_sequence - 0, // tp_as_mapping - 0, // tp_hash - 0, // tp_call - 0, // tp_str - 0, // tp_getattro - 0, // tp_setattro - 0, // tp_as_buffer - Py_TPFLAGS_DEFAULT, // tp_flags - "External UastIterator object", // tp_doc - 0, // tp_traverse - 0, // tp_clear - 0, // tp_richcompare - 0, // tp_weaklistoffset - PyUastIterExt_iter, // tp_iter: __iter()__ method - (iternextfunc)PyUastIterExt_next, // tp_iternext: next() method - 0, // tp_methods - 0, // tp_members - 0, // tp_getset - 0, // tp_base - 0, // tp_dict - 0, // tp_descr_get - 0, // tp_descr_set - 0, // tp_dictoffset - 0, // tp_init - PyType_GenericAlloc, // tp_alloc - 0, // tp_new + "pyuast.IteratorExt", // tp_name + sizeof(PyUastIterExt), // tp_basicsize + 0, // tp_itemsize + PyUastIterExt_dealloc, // tp_dealloc + 0, // tp_print + 0, // tp_getattr + 0, // tp_setattr + 0, // tp_reserved + 0, // tp_repr + 0, // tp_as_number + 0, // tp_as_sequence + 0, // tp_as_mapping + 0, // tp_hash + 0, // tp_call + 0, // tp_str + 0, // tp_getattro + 0, // tp_setattro + 0, // tp_as_buffer + Py_TPFLAGS_DEFAULT, // tp_flags + "External UastIterator object", // tp_doc + 0, // tp_traverse + 0, // tp_clear + 0, // tp_richcompare + 0, // tp_weaklistoffset + PyUastIterExt_iter, // tp_iter: __iter()__ method + (iternextfunc)PyUastIterExt_next, // tp_iternext: next() method + 0, // tp_methods + 0, // tp_members + 0, // tp_getset + 0, // tp_base + 0, // tp_dict + 0, // tp_descr_get + 0, // tp_descr_set + 0, // tp_dictoffset + 0, // tp_init + PyType_GenericAlloc, // tp_alloc + 0, // tp_new }; } @@ -224,8 +224,7 @@ class ContextExt { PyObject* newIter(uast::Iterator *it, bool freeCtx){ PyUastIterExt *pyIt = PyObject_New(PyUastIterExt, &PyUastIterExtType); - if (!pyIt) - return nullptr; + if (!pyIt) return nullptr; if (!PyObject_Init((PyObject *)pyIt, &PyUastIterExtType)) { Py_DECREF(pyIt); @@ -261,9 +260,8 @@ class ContextExt { // Iterate iterates over an external UAST tree. // Borrows the reference. PyObject* Iterate(PyObject* node, TreeOrder order){ - if (!assertNotContext(node)) { - return nullptr; - } + if (!assertNotContext(node)) return nullptr; + NodeHandle h = toHandle(node); auto iter = ctx->Iterate(h, order); return newIter(iter, false); @@ -272,13 +270,10 @@ class ContextExt { // Filter queries an external UAST. // Borrows the reference. PyObject* Filter(PyObject* node, const char* query){ - if (!assertNotContext(node)) { - return nullptr; - } + if (!assertNotContext(node)) return nullptr; + NodeHandle unode = toHandle(node); - if (unode == 0) { - unode = ctx->RootNode(); - } + if (unode == 0) unode = ctx->RootNode(); uast::Iterator *it = ctx->Filter(unode, query); @@ -288,9 +283,8 @@ class ContextExt { // Encode serializes the external UAST. // Borrows the reference. PyObject* Encode(PyObject *node, UastFormat format) { - if (!assertNotContext(node)) { - return nullptr; - } + if (!assertNotContext(node)) return nullptr; + uast::Buffer data = ctx->Encode(toHandle(node), format); return asPyBuffer(data); } @@ -307,9 +301,8 @@ static void PyUastIterExt_dealloc(PyObject *self) { auto it = (PyUastIterExt *)self; delete(it->iter); - if (it->freeCtx && it->ctx) { - delete(it->ctx); - } + if (it->freeCtx && it->ctx) delete(it->ctx); + it->freeCtx = false; it->ctx = nullptr; Py_TYPE(self)->tp_free(self); @@ -356,13 +349,12 @@ static PyObject *PythonContextExt_filter(PythonContextExt *self, PyObject *args, return it; } -// PythonContextExt_filter serializes UAST. +// PythonContextExt_encode serializes UAST. // Returns a new reference. static PyObject *PythonContextExt_encode(PythonContextExt *self, PyObject *args) { PyObject *node = nullptr; UastFormat format = UAST_BINARY; // TODO: make it a kwarg and enum - if (!PyArg_ParseTuple(args, "Oi", &node, &format)) - return nullptr; + if (!PyArg_ParseTuple(args, "Oi", &node, &format)) return nullptr; return self->p->Encode(node, format); } @@ -386,10 +378,10 @@ extern "C" { static PyTypeObject PythonContextExtType = { PyVarObject_HEAD_INIT(nullptr, 0) - "pyuast.ContextExt", // tp_name - sizeof(PythonContextExt), // tp_basicsize + "pyuast.ContextExt", // tp_name + sizeof(PythonContextExt), // tp_basicsize 0, // tp_itemsize - PythonContextExt_dealloc, // tp_dealloc + PythonContextExt_dealloc, // tp_dealloc 0, // tp_print 0, // tp_getattr 0, // tp_setattr @@ -405,14 +397,14 @@ extern "C" 0, // tp_setattro 0, // tp_as_buffer Py_TPFLAGS_DEFAULT, // tp_flags - "Internal ContextExt object", // tp_doc + "Internal ContextExt object", // tp_doc 0, // tp_traverse 0, // tp_clear 0, // tp_richcompare 0, // tp_weaklistoffset 0, // tp_iter: __iter()__ method 0, // tp_iternext: next() method - PythonContextExt_methods, // tp_methods + PythonContextExt_methods, // tp_methods 0, // tp_members 0, // tp_getset 0, // tp_base @@ -445,13 +437,10 @@ class Node : public uast::Node { static void checkPyException() { PyObject *type, *value, *traceback; PyErr_Fetch(&type, &value, &traceback); - if (value == nullptr || value == Py_None) { - return; - } - if (type) - Py_DECREF(type); - if (traceback) - Py_DECREF(traceback); + if (value == nullptr || value == Py_None) return; + + if (type) Py_DECREF(type); + if (traceback) Py_DECREF(traceback); PyObject* str = PyObject_Str(value); Py_DECREF(value); @@ -504,12 +493,9 @@ class Node : public uast::Node { Py_DECREF(keys); keys = nullptr; } - if (obj) { - Py_DECREF(obj); - } - if (str) { - delete str; - } + if (obj) Py_DECREF(obj); + if (str) delete str; + } PyObject* toPy(); @@ -543,9 +529,8 @@ class Node : public uast::Node { } size_t Size() { - if (obj == Py_None) { - return 0; - } + if (obj == Py_None) return 0; + size_t sz = 0; if (PyList_Check(obj)) { sz = (size_t)(PyList_Size(obj)); @@ -561,13 +546,11 @@ class Node : public uast::Node { } std::string* KeyAt(size_t i) { - if (obj == Py_None) { - return nullptr; - } + if (obj == Py_None) return nullptr; + if (!keys) keys = PyDict_Keys(obj); - if (!keys) { - return nullptr; - } + if (!keys) return nullptr; + PyObject* key = PyList_GetItem(keys, i); // borrows const char * k = PyUnicode_AsUTF8(key); @@ -575,9 +558,8 @@ class Node : public uast::Node { return s; } Node* ValueAt(size_t i) { - if (obj == Py_None) { - return nullptr; - } + if (obj == Py_None) return nullptr; + if (PyList_Check(obj)) { PyObject* v = PyList_GetItem(obj, i); // borrows return lookupOrCreate(v); // new ref @@ -621,9 +603,8 @@ class Interface : public uast::NodeCreator { std::map obj2node; static PyObject* newBool(bool v) { - if (v) { - Py_RETURN_TRUE; - } + if (v) Py_RETURN_TRUE; + Py_RETURN_FALSE; } @@ -817,8 +798,7 @@ class Context { } PyObject* newIter(uast::Iterator *it, bool freeCtx){ PyUastIter *pyIt = PyObject_New(PyUastIter, &PyUastIterType); - if (!pyIt) - return nullptr; + if (!pyIt) return nullptr; if (!PyObject_Init((PyObject *)pyIt, &PyUastIterType)) { Py_DECREF(pyIt); @@ -854,9 +834,8 @@ class Context { // Iterate enumerates UAST nodes in a specified order. // Creates a new reference. PyObject* Iterate(PyObject* node, TreeOrder order, bool freeCtx){ - if (!assertNotContext(node)) { - return nullptr; - } + if (!assertNotContext(node)) return nullptr; + Node* unode = toNode(node); auto iter = ctx->Iterate(unode, order); return newIter(iter, freeCtx); @@ -865,13 +844,10 @@ class Context { // Filter queries UAST. // Creates a new reference. PyObject* Filter(PyObject* node, std::string query){ - if (!assertNotContext(node)) { - return nullptr; - } + if (!assertNotContext(node)) return nullptr; + Node* unode = toNode(node); - if (unode == nullptr) { - unode = ctx->RootNode(); - } + if (unode == nullptr) unode = ctx->RootNode(); auto it = ctx->Filter(unode, query); return newIter(it, false); @@ -879,9 +855,8 @@ class Context { // Encode serializes UAST. // Creates a new reference. PyObject* Encode(PyObject *node, UastFormat format) { - if (!assertNotContext(node)) { - return nullptr; - } + if (!assertNotContext(node)) return nullptr; + uast::Buffer data = ctx->Encode(toNode(node), format); return asPyBuffer(data); } @@ -905,9 +880,8 @@ static void PyUastIter_dealloc(PyObject *self) { auto it = (PyUastIter *)self; delete(it->iter); - if (it->freeCtx && it->ctx) { - delete(it->ctx); - } + if (it->freeCtx && it->ctx) delete(it->ctx); + it->freeCtx = false; it->ctx = nullptr; Py_TYPE(self)->tp_free(self); @@ -946,8 +920,7 @@ static PyObject *PythonContext_filter(PythonContext *self, PyObject *args, PyObj static PyObject *PythonContext_encode(PythonContext *self, PyObject *args) { PyObject *node = nullptr; UastFormat format = UAST_BINARY; // TODO: make it a kwarg and enum - if (!PyArg_ParseTuple(args, "Oi", &node, &format)) - return nullptr; + if (!PyArg_ParseTuple(args, "Oi", &node, &format)) return nullptr; return self->p->Encode(node, format); } @@ -969,9 +942,9 @@ extern "C" static PyTypeObject PythonContextType = { PyVarObject_HEAD_INIT(nullptr, 0) "pyuast.Context", // tp_name - sizeof(PythonContext), // tp_basicsize + sizeof(PythonContext), // tp_basicsize 0, // tp_itemsize - PythonContext_dealloc, // tp_dealloc + PythonContext_dealloc, // tp_dealloc 0, // tp_print 0, // tp_getattr 0, // tp_setattr @@ -994,7 +967,7 @@ extern "C" 0, // tp_weaklistoffset 0, // tp_iter: __iter()__ method 0, // tp_iternext: next() method - PythonContext_methods, // tp_methods + PythonContext_methods, // tp_methods 0, // tp_members 0, // tp_getset 0, // tp_base @@ -1016,8 +989,7 @@ static PyObject *PyUastIter_new(PyObject *self, PyObject *args) { PyObject *obj = nullptr; uint8_t order; - if (!PyArg_ParseTuple(args, "OB", &obj, &order)) - return nullptr; + if (!PyArg_ParseTuple(args, "OB", &obj, &order)) return nullptr; // the node can either be external or any other Python object if (PyObject_TypeCheck(obj, &PyNodeExtType)) { @@ -1041,8 +1013,7 @@ static PyObject *PythonContextExt_decode(PyObject *self, PyObject *args, PyObjec Py_buffer buf; int res = PyObject_GetBuffer(obj, &buf, PyBUF_C_CONTIGUOUS); - if (res != 0) - return nullptr; + if (res != 0) return nullptr; uast::Buffer ubuf(buf.buf, (size_t)(buf.len)); @@ -1060,14 +1031,12 @@ static PyObject *PythonContextExt_decode(PyObject *self, PyObject *args, PyObjec static PyObject *PythonContext_new(PyObject *self, PyObject *args) { // TODO: optionally accept root object - if (!PyArg_ParseTuple(args, "")) { - return nullptr; - } + if (!PyArg_ParseTuple(args, "")) return nullptr; + PythonContext *pyU = PyObject_New(PythonContext, &PythonContextType); - if (!pyU) { - return nullptr; - } + if (!pyU) return nullptr; + pyU->p = new Context(); return (PyObject*)pyU; } From b983ea2a6632b519d43f8c3c47ea5e0f389361f2 Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Thu, 13 Dec 2018 02:29:40 +0200 Subject: [PATCH 43/48] add error checks for iterators and clarify comments Signed-off-by: Denys Smirnov --- bblfsh/pyuast.cc | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/bblfsh/pyuast.cc b/bblfsh/pyuast.cc index 9e0acef..ee0da1a 100644 --- a/bblfsh/pyuast.cc +++ b/bblfsh/pyuast.cc @@ -16,20 +16,10 @@ PyObject* asPyBuffer(uast::Buffer buf) { PyObject* arr = PyByteArray_FromStringAndSize((const char*)(buf.ptr), buf.size); free(buf.ptr); return arr; - //return PyMemoryView_FromMemory((char*)(buf.ptr), buf.size, PyBUF_READ); -} -/* -static bool checkError(const Uast* ctx) { - char *error = LastError((Uast*)ctx); - if (!error) { - return true; - } - PyErr_SetString(PyExc_RuntimeError, error); - free(error); - return false; + // TODO: this is an alternative way of exposing the data; check which one is faster + //return PyMemoryView_FromMemory((char*)(buf.ptr), buf.size, PyBUF_READ); } -*/ bool isContext(PyObject* obj); @@ -129,10 +119,14 @@ static PyObject *PyUastIterExt_toPy(ContextExt *ctx, NodeHandle node); static PyObject *PyUastIterExt_next(PyObject *self) { auto it = (PyUastIterExt *)self; - // TODO: check errors - if (!it->iter->next()) { - PyErr_SetNone(PyExc_StopIteration); - return nullptr; + try { + if (!it->iter->next()) { + PyErr_SetNone(PyExc_StopIteration); + return nullptr; + } + } catch (const std::exception& e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + return nullptr; } NodeHandle node = it->iter->node(); @@ -719,10 +713,14 @@ static PyObject *PyUastIter_iter(PyObject *self) { static PyObject *PyUastIter_next(PyObject *self) { auto it = (PyUastIter *)self; - // TODO: check errors - if (!it->iter->next()) { - PyErr_SetNone(PyExc_StopIteration); - return nullptr; + try { + if (!it->iter->next()) { + PyErr_SetNone(PyExc_StopIteration); + return nullptr; + } + } catch (const std::exception& e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + return nullptr; } Node* node = it->iter->node(); From 9e3f41546a61c33315b4240b370b8dbcd447e2b2 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Tue, 18 Dec 2018 10:00:56 +0100 Subject: [PATCH 44/48] Fixed from @zurk review (thanks!) Signed-off-by: Juanjo Alvarez --- bblfsh/aliases.py | 15 ++++++++---- bblfsh/client.py | 2 +- bblfsh/test.py | 56 +++++++++++++++++++++++--------------------- bblfsh/tree_order.py | 5 +++- 4 files changed, 44 insertions(+), 34 deletions(-) diff --git a/bblfsh/aliases.py b/bblfsh/aliases.py index 61b6aca..9977511 100644 --- a/bblfsh/aliases.py +++ b/bblfsh/aliases.py @@ -3,11 +3,16 @@ # "in" is a reserved keyword in Python thus can't be used as package name, so # we import by string -uast_v2_module = importlib.import_module("bblfsh.gopkg.in.bblfsh.sdk.v2.uast.generated_pb2") -protocol_v2_module = importlib.import_module("bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2") -protocol_grpc_v2_module = importlib.import_module("bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2_grpc") -protocol_v1_module = importlib.import_module("bblfsh.gopkg.in.bblfsh.sdk.v1.protocol.generated_pb2") -protocol_grpc_v1_module = importlib.import_module("bblfsh.gopkg.in.bblfsh.sdk.v1.protocol.generated_pb2_grpc") +uast_v2_module = importlib.import_module( + "bblfsh.gopkg.in.bblfsh.sdk.v2.uast.generated_pb2") +protocol_v2_module = importlib.import_module( + "bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2") +protocol_grpc_v2_module = importlib.import_module( + "bblfsh.gopkg.in.bblfsh.sdk.v2.protocol.generated_pb2_grpc") +protocol_v1_module = importlib.import_module( + "bblfsh.gopkg.in.bblfsh.sdk.v1.protocol.generated_pb2") +protocol_grpc_v1_module = importlib.import_module( + "bblfsh.gopkg.in.bblfsh.sdk.v1.protocol.generated_pb2_grpc") DESCRIPTOR = uast_v2_module.DESCRIPTOR ParseRequest = protocol_v2_module.ParseRequest diff --git a/bblfsh/client.py b/bblfsh/client.py index 48b888f..1624660 100644 --- a/bblfsh/client.py +++ b/bblfsh/client.py @@ -90,7 +90,7 @@ def version(self) -> VersionResponse: Queries the Babelfish server for version and runtime information. :return: A dictionary with the keys "version" for the semantic version and - # "build" for the build timestamp. + "build" for the build timestamp. """ return self._stub_v1.Version(VersionRequest()) diff --git a/bblfsh/test.py b/bblfsh/test.py index b6559a8..99732d5 100644 --- a/bblfsh/test.py +++ b/bblfsh/test.py @@ -100,10 +100,11 @@ def testBrokenFilter(self) -> None: self.assertRaises(RuntimeError, ctx.filter, "dsdfkj32423#$@#$") # FIXME: Uncomment once https://github.com/bblfsh/sdk/issues/340 is fixed - # def testFilterToken(self): - # ctx = self._parse_fixture() - # it = ctx.filter("//*[@token='else']/@token") - # # Problem: returns the node containing the @token, not the @token string ("else") + def testFilterToken(self): + ctx = self._parse_fixture() + it = ctx.filter("//*[@token='else']/@token") + print(next(it)) + # Problem: returns the node containing the @token, not the @token string ("else") # first = next(it).get_str() # self.assertEqual(first, "else") @@ -299,38 +300,39 @@ def testItersMixingIterations(self) -> None: b = next(it2).get() self.assertListEqual(a, b) - def testManyFilters(self) -> None: - ctx = self._parse_fixture() + # XXX uncomment + # def testManyFilters(self) -> None: + # ctx = self._parse_fixture() - before = resource.getrusage(resource.RUSAGE_SELF) - for _ in range(500): - ctx.filter("//*[@role='Identifier']") + # before = resource.getrusage(resource.RUSAGE_SELF) + # for _ in range(500): + # ctx.filter("//*[@role='Identifier']") - after = resource.getrusage(resource.RUSAGE_SELF) + # after = resource.getrusage(resource.RUSAGE_SELF) - # Check that memory usage has not doubled - self.assertLess(after[2] / before[2], 2.0) + # # Check that memory usage has not doubled + # self.assertLess(after[2] / before[2], 2.0) - def testManyParses(self) -> None: - before = resource.getrusage(resource.RUSAGE_SELF) - for _ in range(100): - self._parse_fixture() + # def testManyParses(self) -> None: + # before = resource.getrusage(resource.RUSAGE_SELF) + # for _ in range(100): + # self._parse_fixture() - after = resource.getrusage(resource.RUSAGE_SELF) + # after = resource.getrusage(resource.RUSAGE_SELF) - # Check that memory usage has not doubled - self.assertLess(after[2] / before[2], 2.0) + # # Check that memory usage has not doubled + # self.assertLess(after[2] / before[2], 2.0) - def testManyParsersAndFilters(self) -> None: - before = resource.getrusage(resource.RUSAGE_SELF) - for _ in range(100): - ctx = self.client.parse(self.fixtures_file) - ctx.filter("//*[@role='Identifier']") + # def testManyParsersAndFilters(self) -> None: + # before = resource.getrusage(resource.RUSAGE_SELF) + # for _ in range(100): + # ctx = self.client.parse(self.fixtures_file) + # ctx.filter("//*[@role='Identifier']") - after = resource.getrusage(resource.RUSAGE_SELF) + # after = resource.getrusage(resource.RUSAGE_SELF) - # Check that memory usage has not doubled - self.assertLess(after[2] / before[2], 2.0) + # # Check that memory usage has not doubled + # self.assertLess(after[2] / before[2], 2.0) def testSupportedLanguages(self) -> None: res = self.client.supported_languages() diff --git a/bblfsh/tree_order.py b/bblfsh/tree_order.py index 1d60c5f..e02259b 100644 --- a/bblfsh/tree_order.py +++ b/bblfsh/tree_order.py @@ -1,4 +1,7 @@ -class TreeOrder: +from enum import IntEnum + + +class TreeOrder(IntEnum): _MIN = 0 PRE_ORDER = 0 POST_ORDER = 1 From d485273f457a174b40b820ad71195a739db04197 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Tue, 18 Dec 2018 12:15:44 +0100 Subject: [PATCH 45/48] Fixes and improvements from @vmarkovtsev review Signed-off-by: Juanjo Alvarez --- README.md | 4 ++-- bblfsh/client.py | 22 +++++++++++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index d584366..af29b5b 100644 --- a/README.md +++ b/README.md @@ -76,8 +76,8 @@ z = next(ctx.filter("count(//*)").get_int() # or get_float() # default preorder using the `iterate` method on `parse` result or node objects: # Directly over parse results -iter = client.parse("/path/to/file.py").iterate(bblfsh.TreeOrder.POST_ORDER) -for i in iter: ... +it = client.parse("/path/to/file.py").iterate(bblfsh.TreeOrder.POST_ORDER) +for i in it: ... # Over filter results (which by default are already iterators with PRE_ORDER): ctx = client.parse("file.py") diff --git a/bblfsh/client.py b/bblfsh/client.py index 1624660..b3ba0f7 100644 --- a/bblfsh/client.py +++ b/bblfsh/client.py @@ -1,5 +1,5 @@ import os -import typing as t +from typing import Optional, Union, List import grpc @@ -39,7 +39,7 @@ def _ensure_utf8(text: bytes) -> str: raise NonUTF8ContentException("Content must be UTF-8, ASCII or Base64 encoded") @staticmethod - def _get_contents(contents: t.Optional[t.Union[str, bytes]], filename: str) -> str: + def _get_contents(contents: Optional[Union[str, bytes]], filename: str) -> str: if contents is None: with open(filename, "rb") as fin: contents = fin.read() @@ -49,9 +49,9 @@ def _get_contents(contents: t.Optional[t.Union[str, bytes]], filename: str) -> s return contents - def parse(self, filename: str, language: t.Optional[str]=None, - contents: t.Optional[str]=None, mode: t.Optional[ModeType]=None, - timeout: t.Optional[int]=None) -> ResultContext: + def parse(self, filename: str, language: Optional[str]=None, + contents: Optional[str]=None, mode: Optional[ModeType]=None, + timeout: Optional[int]=None) -> ResultContext: """ Queries the Babelfish server and receives the UAST response for the specified file. @@ -81,7 +81,7 @@ def parse(self, filename: str, language: t.Optional[str]=None, response = self._stub_v2.Parse(request, timeout=timeout) return ResultContext(response) - def supported_languages(self) -> t.List[str]: + def supported_languages(self) -> List[str]: sup_response = self._stub_v1.SupportedLanguages(SupportedLanguagesRequest()) return sup_response.languages @@ -95,7 +95,7 @@ def version(self) -> VersionResponse: return self._stub_v1.Version(VersionRequest()) @staticmethod - def _scramble_language(lang: t.Optional[str]) -> t.Optional[str]: + def _scramble_language(lang: Optional[str]) -> Optional[str]: if lang is None: return None lang = lang.lower() @@ -103,3 +103,11 @@ def _scramble_language(lang: t.Optional[str]) -> t.Optional[str]: lang = lang.replace("+", "p") lang = lang.replace("#", "sharp") return lang + + def close(self) -> None: + """ + Close the gRPC channel and free the acquired resources. Using a closed client is + not supported. + """ + self._channel.close() + self._channel = self._stub_v1 = self._stub_v2 = None From 66ccfed3a1902f0987b22fc44c6f9e0a1e526d06 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Tue, 18 Dec 2018 12:16:34 +0100 Subject: [PATCH 46/48] PEP8 fix Signed-off-by: Juanjo Alvarez --- bblfsh/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bblfsh/client.py b/bblfsh/client.py index b3ba0f7..ac27050 100644 --- a/bblfsh/client.py +++ b/bblfsh/client.py @@ -18,7 +18,7 @@ class BblfshClient: Babelfish gRPC client. """ - def __init__(self, endpoint:str) -> None: + def __init__(self, endpoint: str) -> None: """ Initializes a new instance of BblfshClient. From a0206663c56ff785c636e9e8ef36958cb76dfcd7 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Tue, 18 Dec 2018 14:06:14 +0100 Subject: [PATCH 47/48] Changed ModeDict to a Modes enum-like class Signed-off-by: Juanjo Alvarez --- bblfsh/aliases.py | 7 +++++-- bblfsh/test.py | 4 ++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/bblfsh/aliases.py b/bblfsh/aliases.py index 9977511..c516b00 100644 --- a/bblfsh/aliases.py +++ b/bblfsh/aliases.py @@ -21,10 +21,13 @@ Mode = protocol_v2_module.Mode ModeType = google.protobuf.internal.enum_type_wrapper.EnumTypeWrapper + +class Modes: + pass + # Current values: {'DEFAULT_MODE': 0, 'NATIVE': 1, 'PREPROCESSED': 2, 'ANNOTATED': 4, 'SEMANTIC': 8} -ModeDict = {} for k, v in Mode.DESCRIPTOR.values_by_name.items(): - ModeDict[k] = v.number + setattr(Modes, k, v.number) DriverStub = protocol_grpc_v2_module.DriverStub DriverServicer = protocol_grpc_v2_module.DriverServicer diff --git a/bblfsh/test.py b/bblfsh/test.py index 99732d5..53afbde 100644 --- a/bblfsh/test.py +++ b/bblfsh/test.py @@ -5,7 +5,7 @@ import docker from bblfsh import (BblfshClient, iterator, TreeOrder, - ModeDict, role_id, role_name) + Modes, role_id, role_name) from bblfsh.launcher import ensure_bblfsh_is_running from bblfsh.client import NonUTF8ContentException from bblfsh.result_context import (Node, NodeIterator, @@ -45,7 +45,7 @@ def testVersion(self) -> None: self.assertTrue(version.build) def testNativeParse(self) -> None: - ctx = self.client.parse(self.fixtures_file, mode=ModeDict["NATIVE"]) + ctx = self.client.parse(self.fixtures_file, mode=Modes.NATIVE) self._validate_ctx(ctx) self.assertIsNotNone(ctx) From 9b094aa37668c787507a2c059d248457cd56c0c4 Mon Sep 17 00:00:00 2001 From: Juanjo Alvarez Date: Tue, 18 Dec 2018 14:09:05 +0100 Subject: [PATCH 48/48] Allow to create Clients with an instanced grpc channel as suggested by Vadim Signed-off-by: Juanjo Alvarez --- bblfsh/client.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bblfsh/client.py b/bblfsh/client.py index ac27050..fc975c3 100644 --- a/bblfsh/client.py +++ b/bblfsh/client.py @@ -18,7 +18,7 @@ class BblfshClient: Babelfish gRPC client. """ - def __init__(self, endpoint: str) -> None: + def __init__(self, endpoint: Union[str, grpc.Channel]) -> None: """ Initializes a new instance of BblfshClient. @@ -27,7 +27,11 @@ def __init__(self, endpoint: str) -> None: :type endpoint: str """ - self._channel = grpc.insecure_channel(endpoint) + if isinstance(endpoint, str): + self._channel = grpc.insecure_channel(endpoint) + else: + self._channel = grpc.endpoint + self._stub_v1 = ProtocolServiceStub(self._channel) self._stub_v2 = DriverStub(self._channel)