Skip to content

Commit

Permalink
Make PrefixTree generic to allow for arbitrary values
Browse files Browse the repository at this point in the history
  • Loading branch information
vinistock committed Aug 31, 2023
1 parent 03a287d commit 76a7a41
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 33 deletions.
123 changes: 99 additions & 24 deletions lib/ruby_indexer/lib/ruby_indexer/prefix_tree.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,65 +2,140 @@
# frozen_string_literal: true

module RubyIndexer
# A PrefixTree is a data structure that allows searching for partial strings fast. The tree is similar to a nested
# hash structure, where the keys are the characters of the inserted strings.
#
# ## Example
# ```ruby
# tree = PrefixTree[String].new
# # Insert entries using the same key and value
# tree.insert("bar", "bar")
# tree.insert("baz", "baz")
# # Internally, the structure is analogous to this, but using nodes:
# # {
# # "b" => {
# # "a" => {
# # "r" => "bar",
# # "z" => "baz"
# # }
# # }
# # }
# # When we search it, it finds all possible values based on partial (or complete matches):
# tree.search("") # => ["bar", "baz"]
# tree.search("b") # => ["bar", "baz"]
# tree.search("ba") # => ["bar", "baz"]
# tree.search("bar") # => ["bar"]
# ```
#
# A PrefixTree is useful for autocomplete, since we always want to find all alternatives while the developer hasn't
# finished typing yet. This PrefixTree implementation allows for string keys and any arbitrary value using the generic
# `Value` type.
#
# See https://en.wikipedia.org/wiki/Trie for more information
class PrefixTree
extend T::Sig
extend T::Generic

sig { params(items: T::Array[String]).void }
def initialize(items)
@root = T.let(Node.new(""), Node)
Value = type_member

items.each { |item| insert(item) }
sig { void }
def initialize
@root = T.let(Node.new("", ""), Node[Value])
end

sig { params(prefix: String).returns(T::Array[String]) }
# Search the PrefixTree based on a given `prefix`. If `foo` is an entry in the tree, then searching for `fo` will
# return it as a result. The result is always an array of the type of value attribute to the generic `Value` type.
# Notice that if the `Value` is an array, this method will return an array of arrays, where each entry is the array
# of values for a given match
sig { params(prefix: String).returns(T::Array[Value]) }
def search(prefix)
node = T.let(@root, Node)
node = find_node(prefix)
return [] unless node

prefix.each_char do |char|
snode = node.children[char]
return [] unless snode
node.collect
end

node = snode
# Inserts a `value` using the given `key`
sig { params(key: String, value: Value).void }
def insert(key, value)
node = @root

key.each_char do |char|
node = node.children[char] ||= Node.new(char, value, node)
end

node.collect
node.leaf = true
end

# Deletes the entry identified by `key` from the tree. Notice that a partial match will still delete all entries
# that match it. For example, if the tree contains `foo` and we ask to delete `fo`, then `foo` will be deleted
sig { params(key: String).void }
def delete(key)
node = find_node(key)
return unless node

# Remove the node from the tree and then go up the parents to remove any of them with empty children
parent = T.let(T.must(node.parent), T.nilable(Node[Value]))

while parent
parent.children.delete(node.key)
return if parent.children.any?

node = parent
parent = parent.parent
end
end

private

sig { params(item: String).void }
def insert(item)
node = T.let(@root, Node)
# Find a node that matches the given `key`
sig { params(key: String).returns(T.nilable(Node[Value])) }
def find_node(key)
node = @root

key.each_char do |char|
snode = node.children[char]
return nil unless snode

item.each_char do |char|
node = node.children[char] ||= Node.new(node.value + char)
node = snode
end

node.leaf = true
node
end

class Node
extend T::Sig
extend T::Generic

Value = type_member

sig { returns(T::Hash[String, Node]) }
sig { returns(T::Hash[String, Node[Value]]) }
attr_reader :children

sig { returns(String) }
attr_reader :key

sig { returns(Value) }
attr_reader :value

sig { returns(T::Boolean) }
attr_accessor :leaf

sig { params(value: String).void }
def initialize(value)
@children = T.let({}, T::Hash[String, Node])
@value = T.let(value, String)
sig { returns(T.nilable(Node[Value])) }
attr_reader :parent

sig { params(key: String, value: Value, parent: T.nilable(Node[Value])).void }
def initialize(key, value, parent = nil)
@key = key
@value = value
@parent = parent
@children = T.let({}, T::Hash[String, Node[Value]])
@leaf = T.let(false, T::Boolean)
end

sig { returns(T::Array[String]) }
sig { returns(T::Array[Value]) }
def collect
result = T.let([], T::Array[String])
result = T.let([], T::Array[Value])
result << value if leaf

children.each_value do |node|
Expand Down
36 changes: 30 additions & 6 deletions lib/ruby_indexer/test/prefix_tree_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,24 @@
module RubyIndexer
class PrefixTreeTest < Minitest::Test
def test_empty
tree = PrefixTree.new([])
tree = PrefixTree.new

assert_empty(tree.search(""))
assert_empty(tree.search("foo"))
end

def test_single_item
tree = PrefixTree.new(["foo"])
tree = PrefixTree.new
tree.insert("foo", "foo")

assert_equal(["foo"], tree.search(""))
assert_equal(["foo"], tree.search("foo"))
assert_empty(tree.search("bar"))
end

def test_multiple_items
tree = PrefixTree.new(["foo", "bar", "baz"])
tree = PrefixTree[String].new
["foo", "bar", "baz"].each { |item| tree.insert(item, item) }

assert_equal(["foo", "bar", "baz"], tree.search(""))
assert_equal(["bar", "baz"], tree.search("b"))
Expand All @@ -32,7 +34,8 @@ def test_multiple_items
end

def test_multiple_prefixes
tree = PrefixTree.new(["fo", "foo"])
tree = PrefixTree[String].new
["fo", "foo"].each { |item| tree.insert(item, item) }

assert_equal(["fo", "foo"], tree.search(""))
assert_equal(["fo", "foo"], tree.search("f"))
Expand All @@ -42,7 +45,8 @@ def test_multiple_prefixes
end

def test_multiple_prefixes_with_shuffled_order
tree = PrefixTree.new([
tree = PrefixTree[String].new
[
"foo/bar/base",
"foo/bar/on",
"foo/bar/support/selection",
Expand Down Expand Up @@ -72,7 +76,7 @@ def test_multiple_prefixes_with_shuffled_order
"foo/bar/support/formatting",
"foo/bar/path",
"foo/executor",
])
].each { |item| tree.insert(item, item) }

assert_equal(
[
Expand All @@ -91,5 +95,25 @@ def test_multiple_prefixes_with_shuffled_order
tree.search("foo/bar/support"),
)
end

def test_deletion
tree = PrefixTree[String].new
["foo/bar", "foo/baz"].each { |item| tree.insert(item, item) }
assert_equal(["foo/bar", "foo/baz"], tree.search("foo"))

tree.delete("foo/bar")
assert_empty(tree.search("foo/bar"))
assert_equal(["foo/baz"], tree.search("foo"))
end

def test_deleted_node_is_removed_from_the_tree
tree = PrefixTree[String].new
tree.insert("foo/bar", "foo/bar")
assert_equal(["foo/bar"], tree.search("foo"))

tree.delete("foo/bar")
root = tree.instance_variable_get(:@root)
assert_empty(root.children)
end
end
end
8 changes: 5 additions & 3 deletions lib/ruby_lsp/requests/path_completion.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ class PathCompletion < Listener
def initialize(emitter, message_queue)
super
@response = T.let([], ResponseType)
@tree = T.let(RubyIndexer::PrefixTree.new(collect_load_path_files), RubyIndexer::PrefixTree)
@tree = T.let(RubyIndexer::PrefixTree[String].new, RubyIndexer::PrefixTree[String])
collect_load_path_files

emitter.register(self, :on_tstring_content)
end
Expand All @@ -44,8 +45,9 @@ def on_tstring_content(node)
def collect_load_path_files
$LOAD_PATH.flat_map do |p|
Dir.glob("**/*.rb", base: p)
end.map! do |result|
result.delete_suffix!(".rb")
end.each do |result|
entry = result.delete_suffix!(".rb")
@tree.insert(entry, entry)
end
end

Expand Down

0 comments on commit 76a7a41

Please sign in to comment.