From 76a7a4182b330600a49b2eac0e639f55215d93c5 Mon Sep 17 00:00:00 2001 From: Vinicius Stock Date: Tue, 29 Aug 2023 14:44:11 -0400 Subject: [PATCH] Make PrefixTree generic to allow for arbitrary values --- .../lib/ruby_indexer/prefix_tree.rb | 123 ++++++++++++++---- lib/ruby_indexer/test/prefix_tree_test.rb | 36 ++++- lib/ruby_lsp/requests/path_completion.rb | 8 +- 3 files changed, 134 insertions(+), 33 deletions(-) diff --git a/lib/ruby_indexer/lib/ruby_indexer/prefix_tree.rb b/lib/ruby_indexer/lib/ruby_indexer/prefix_tree.rb index 9de6485692..4917f5f2ed 100644 --- a/lib/ruby_indexer/lib/ruby_indexer/prefix_tree.rb +++ b/lib/ruby_indexer/lib/ruby_indexer/prefix_tree.rb @@ -2,65 +2,140 @@ # frozen_string_literal: true module RubyIndexer + # A PrefixTree is a data structure that allows searching for partial strings fast. The tree is similar to a nested + # hash structure, where the keys are the characters of the inserted strings. + # + # ## Example + # ```ruby + # tree = PrefixTree[String].new + # # Insert entries using the same key and value + # tree.insert("bar", "bar") + # tree.insert("baz", "baz") + # # Internally, the structure is analogous to this, but using nodes: + # # { + # # "b" => { + # # "a" => { + # # "r" => "bar", + # # "z" => "baz" + # # } + # # } + # # } + # # When we search it, it finds all possible values based on partial (or complete matches): + # tree.search("") # => ["bar", "baz"] + # tree.search("b") # => ["bar", "baz"] + # tree.search("ba") # => ["bar", "baz"] + # tree.search("bar") # => ["bar"] + # ``` + # + # A PrefixTree is useful for autocomplete, since we always want to find all alternatives while the developer hasn't + # finished typing yet. This PrefixTree implementation allows for string keys and any arbitrary value using the generic + # `Value` type. + # + # See https://en.wikipedia.org/wiki/Trie for more information class PrefixTree extend T::Sig + extend T::Generic - sig { params(items: T::Array[String]).void } - def initialize(items) - @root = T.let(Node.new(""), Node) + Value = type_member - items.each { |item| insert(item) } + sig { void } + def initialize + @root = T.let(Node.new("", ""), Node[Value]) end - sig { params(prefix: String).returns(T::Array[String]) } + # Search the PrefixTree based on a given `prefix`. If `foo` is an entry in the tree, then searching for `fo` will + # return it as a result. The result is always an array of the type of value attribute to the generic `Value` type. + # Notice that if the `Value` is an array, this method will return an array of arrays, where each entry is the array + # of values for a given match + sig { params(prefix: String).returns(T::Array[Value]) } def search(prefix) - node = T.let(@root, Node) + node = find_node(prefix) + return [] unless node - prefix.each_char do |char| - snode = node.children[char] - return [] unless snode + node.collect + end - node = snode + # Inserts a `value` using the given `key` + sig { params(key: String, value: Value).void } + def insert(key, value) + node = @root + + key.each_char do |char| + node = node.children[char] ||= Node.new(char, value, node) end - node.collect + node.leaf = true + end + + # Deletes the entry identified by `key` from the tree. Notice that a partial match will still delete all entries + # that match it. For example, if the tree contains `foo` and we ask to delete `fo`, then `foo` will be deleted + sig { params(key: String).void } + def delete(key) + node = find_node(key) + return unless node + + # Remove the node from the tree and then go up the parents to remove any of them with empty children + parent = T.let(T.must(node.parent), T.nilable(Node[Value])) + + while parent + parent.children.delete(node.key) + return if parent.children.any? + + node = parent + parent = parent.parent + end end private - sig { params(item: String).void } - def insert(item) - node = T.let(@root, Node) + # Find a node that matches the given `key` + sig { params(key: String).returns(T.nilable(Node[Value])) } + def find_node(key) + node = @root + + key.each_char do |char| + snode = node.children[char] + return nil unless snode - item.each_char do |char| - node = node.children[char] ||= Node.new(node.value + char) + node = snode end - node.leaf = true + node end class Node extend T::Sig + extend T::Generic + + Value = type_member - sig { returns(T::Hash[String, Node]) } + sig { returns(T::Hash[String, Node[Value]]) } attr_reader :children sig { returns(String) } + attr_reader :key + + sig { returns(Value) } attr_reader :value sig { returns(T::Boolean) } attr_accessor :leaf - sig { params(value: String).void } - def initialize(value) - @children = T.let({}, T::Hash[String, Node]) - @value = T.let(value, String) + sig { returns(T.nilable(Node[Value])) } + attr_reader :parent + + sig { params(key: String, value: Value, parent: T.nilable(Node[Value])).void } + def initialize(key, value, parent = nil) + @key = key + @value = value + @parent = parent + @children = T.let({}, T::Hash[String, Node[Value]]) @leaf = T.let(false, T::Boolean) end - sig { returns(T::Array[String]) } + sig { returns(T::Array[Value]) } def collect - result = T.let([], T::Array[String]) + result = T.let([], T::Array[Value]) result << value if leaf children.each_value do |node| diff --git a/lib/ruby_indexer/test/prefix_tree_test.rb b/lib/ruby_indexer/test/prefix_tree_test.rb index 3a9faab651..34111986f4 100644 --- a/lib/ruby_indexer/test/prefix_tree_test.rb +++ b/lib/ruby_indexer/test/prefix_tree_test.rb @@ -6,14 +6,15 @@ module RubyIndexer class PrefixTreeTest < Minitest::Test def test_empty - tree = PrefixTree.new([]) + tree = PrefixTree.new assert_empty(tree.search("")) assert_empty(tree.search("foo")) end def test_single_item - tree = PrefixTree.new(["foo"]) + tree = PrefixTree.new + tree.insert("foo", "foo") assert_equal(["foo"], tree.search("")) assert_equal(["foo"], tree.search("foo")) @@ -21,7 +22,8 @@ def test_single_item end def test_multiple_items - tree = PrefixTree.new(["foo", "bar", "baz"]) + tree = PrefixTree[String].new + ["foo", "bar", "baz"].each { |item| tree.insert(item, item) } assert_equal(["foo", "bar", "baz"], tree.search("")) assert_equal(["bar", "baz"], tree.search("b")) @@ -32,7 +34,8 @@ def test_multiple_items end def test_multiple_prefixes - tree = PrefixTree.new(["fo", "foo"]) + tree = PrefixTree[String].new + ["fo", "foo"].each { |item| tree.insert(item, item) } assert_equal(["fo", "foo"], tree.search("")) assert_equal(["fo", "foo"], tree.search("f")) @@ -42,7 +45,8 @@ def test_multiple_prefixes end def test_multiple_prefixes_with_shuffled_order - tree = PrefixTree.new([ + tree = PrefixTree[String].new + [ "foo/bar/base", "foo/bar/on", "foo/bar/support/selection", @@ -72,7 +76,7 @@ def test_multiple_prefixes_with_shuffled_order "foo/bar/support/formatting", "foo/bar/path", "foo/executor", - ]) + ].each { |item| tree.insert(item, item) } assert_equal( [ @@ -91,5 +95,25 @@ def test_multiple_prefixes_with_shuffled_order tree.search("foo/bar/support"), ) end + + def test_deletion + tree = PrefixTree[String].new + ["foo/bar", "foo/baz"].each { |item| tree.insert(item, item) } + assert_equal(["foo/bar", "foo/baz"], tree.search("foo")) + + tree.delete("foo/bar") + assert_empty(tree.search("foo/bar")) + assert_equal(["foo/baz"], tree.search("foo")) + end + + def test_deleted_node_is_removed_from_the_tree + tree = PrefixTree[String].new + tree.insert("foo/bar", "foo/bar") + assert_equal(["foo/bar"], tree.search("foo")) + + tree.delete("foo/bar") + root = tree.instance_variable_get(:@root) + assert_empty(root.children) + end end end diff --git a/lib/ruby_lsp/requests/path_completion.rb b/lib/ruby_lsp/requests/path_completion.rb index c7d56466ba..77096d40cf 100644 --- a/lib/ruby_lsp/requests/path_completion.rb +++ b/lib/ruby_lsp/requests/path_completion.rb @@ -26,7 +26,8 @@ class PathCompletion < Listener def initialize(emitter, message_queue) super @response = T.let([], ResponseType) - @tree = T.let(RubyIndexer::PrefixTree.new(collect_load_path_files), RubyIndexer::PrefixTree) + @tree = T.let(RubyIndexer::PrefixTree[String].new, RubyIndexer::PrefixTree[String]) + collect_load_path_files emitter.register(self, :on_tstring_content) end @@ -44,8 +45,9 @@ def on_tstring_content(node) def collect_load_path_files $LOAD_PATH.flat_map do |p| Dir.glob("**/*.rb", base: p) - end.map! do |result| - result.delete_suffix!(".rb") + end.each do |result| + entry = result.delete_suffix!(".rb") + @tree.insert(entry, entry) end end