diff --git a/src/libuast.hpp b/src/libuast.hpp index 3f5037a..48be5ff 100644 --- a/src/libuast.hpp +++ b/src/libuast.hpp @@ -58,6 +58,15 @@ namespace uast { virtual T node() = 0; }; + // NodeHash is a hash of a node subtree. + struct NodeHash { + // The data are a cryptographic-quality fingerprint of the tree structure. + // The specific algorithm is not specified and may change across library versions. + // Clients may compare hash values for equality to match equivalent nodes, but cannot + // recompute hash values directly, as the input depends on details of serialization. + uint8_t data[UAST_HASH_SIZE]; + }; + // Context is a common interface implemented by all UAST contexts. template class Context { public: @@ -75,6 +84,7 @@ namespace uast { virtual Iterator* Filter(T root, std::string query) = 0; virtual Iterator* Iterate(T root, TreeOrder order) = 0; + virtual NodeHash Hash(T root, HashFlags flags) = 0; }; // NodeCreator is an interface that creates new UAST nodes. @@ -351,6 +361,11 @@ namespace uast { CheckError(); return new RawIterator(it); } + NodeHash Hash(NodeHandle root, HashFlags flags) { + NodeHash h; + UastHash(ctx, root, (void*)&h, flags); + return h; + } }; // PtrIterator is an iterator that casts NodeHandle directly to pointer type T. @@ -414,6 +429,9 @@ namespace uast { auto it = new PtrIterator(raw, true); return it; } + NodeHash Hash(T root, HashFlags flags) { + return ctx->Hash(ToHandle(root), flags); + } void CheckError(){ ctx->CheckError(); } diff --git a/src/uast.h b/src/uast.h index 37578ed..470f95b 100644 --- a/src/uast.h +++ b/src/uast.h @@ -90,7 +90,9 @@ typedef struct UastIterator { typedef enum { UAST_BINARY = 0, UAST_YAML = 1 } UastFormat; +// HashFlags is a bit-field with different flags which controls node hashing. typedef enum { + HASH_ALL = 0x0, HASH_NO_POS = 0x1, } HashFlags; diff --git a/tests/main.cc b/tests/main.cc index 71f5528..0ed6c4d 100644 --- a/tests/main.cc +++ b/tests/main.cc @@ -28,6 +28,7 @@ int main() { // add the tests to the suite ADD_TEST(suite, "test of RoleNameForId()", TestRoleNameForId); + ADD_TEST(suite, "test node hash", TestNodeHash); ADD_TEST(suite, "test of UastFilter() pointers", TestUastFilterPointers); ADD_TEST(suite, "test iteration (preorder)", TestUastIteratorPreOrder); ADD_TEST(suite, "test of UastFilter() counting", TestUastFilterCount); diff --git a/tests/nodes_test.h b/tests/nodes_test.h index 82de303..2998f31 100644 --- a/tests/nodes_test.h +++ b/tests/nodes_test.h @@ -642,6 +642,36 @@ void TestNodeFindError() { UastFree(ctx); } +void TestNodeHash() { + Uast *ctx = NewUastMock(); + Node* module = newObject("Module"); + Node* child = newObject("Child"); + module->SetChild("field", child); + + unsigned char hash[UAST_HASH_SIZE]; + // This value must be updated if the algorithm for structural hashing is changed, + // or if the tree structure for the test module changes in a way that modifies the + // structural hash. To update it, copy the actual value from the test failure and + // update this array. + unsigned char exp[UAST_HASH_SIZE] = { + 0xe6, 0xd2, 0x53, 0xe1, 0x26, 0x0a, 0xaa, 0xa9, + 0x37, 0xcc, 0xfc, 0x42, 0x0f, 0x52, 0x65, 0x48, + 0x1d, 0x59, 0x18, 0xce, 0x01, 0xad, 0xda, 0xa2, + 0x82, 0x4c, 0x74, 0x77, 0xae, 0xa1, 0x26, 0xb5}; + UastHash(ctx, NodeHandle(module), (void*)hash, HASH_ALL); + bool ok = memcmp(hash, exp, UAST_HASH_SIZE) == 0; + if (!ok) { + printf("unexpected hash value:\n"); + for (int i = 0; i < UAST_HASH_SIZE; i++) { + printf("0x%02x, ", (unsigned char)hash[i]); + if (i%8 == 7) printf("\n"); + } + } + CU_ASSERT_FATAL(ok); + + UastFree(ctx); +} + void TestEmptyResult() { Uast *ctx = NewUastMock(); Node* module = newObject("Module");