diff --git a/lib/ruby_indexer/lib/ruby_indexer/declaration_listener.rb b/lib/ruby_indexer/lib/ruby_indexer/declaration_listener.rb index ab7cbed46..fa503cdde 100644 --- a/lib/ruby_indexer/lib/ruby_indexer/declaration_listener.rb +++ b/lib/ruby_indexer/lib/ruby_indexer/declaration_listener.rb @@ -68,8 +68,10 @@ def on_class_node_enter(node) superclass.slice end + nesting = name.start_with?("::") ? [name.delete_prefix("::")] : @stack + [name.delete_prefix("::")] + entry = Entry::Class.new( - fully_qualify_name(name), + nesting, @file_path, node.location, comments, @@ -94,7 +96,9 @@ def on_module_node_enter(node) name = node.constant_path.location.slice comments = collect_comments(node) - entry = Entry::Module.new(fully_qualify_name(name), @file_path, node.location, comments) + + nesting = name.start_with?("::") ? [name.delete_prefix("::")] : @stack + [name.delete_prefix("::")] + entry = Entry::Module.new(nesting, @file_path, node.location, comments) @owner_stack << entry @index << entry @@ -205,10 +209,8 @@ def on_call_node_enter(node) handle_attribute(node, reader: false, writer: true) when :attr_accessor handle_attribute(node, reader: true, writer: true) - when :include - handle_module_operation(node, :included_modules) - when :prepend - handle_module_operation(node, :prepended_modules) + when :include, :prepend, :extend + handle_module_operation(node, message) when :public @visibility_stack.push(Entry::Visibility::PUBLIC) when :protected @@ -479,16 +481,21 @@ def handle_module_operation(node, operation) arguments = node.arguments&.arguments return unless arguments - names = arguments.filter_map do |node| - if node.is_a?(Prism::ConstantReadNode) || node.is_a?(Prism::ConstantPathNode) - node.full_name + arguments.each do |node| + next unless node.is_a?(Prism::ConstantReadNode) || node.is_a?(Prism::ConstantPathNode) + + case operation + when :include + owner.mixin_operations << Entry::Include.new(node.full_name) + when :prepend + owner.mixin_operations << Entry::Prepend.new(node.full_name) + when :extend + owner.mixin_operations << Entry::Extend.new(node.full_name) end rescue Prism::ConstantPathNode::DynamicPartsInConstantPathError, Prism::ConstantPathNode::MissingNodesInConstantPathError # Do nothing end - collection = operation == :included_modules ? owner.included_modules : owner.prepended_modules - collection.concat(names) end sig { returns(Entry::Visibility) } diff --git a/lib/ruby_indexer/lib/ruby_indexer/entry.rb b/lib/ruby_indexer/lib/ruby_indexer/entry.rb index 8fac069ed..5012cfbf7 100644 --- a/lib/ruby_indexer/lib/ruby_indexer/entry.rb +++ b/lib/ruby_indexer/lib/ruby_indexer/entry.rb @@ -62,6 +62,25 @@ def file_name File.basename(@file_path) end + class ModuleOperation + extend T::Sig + extend T::Helpers + + abstract! + + sig { returns(String) } + attr_reader :module_name + + sig { params(module_name: String).void } + def initialize(module_name) + @module_name = module_name + end + end + + class Include < ModuleOperation; end + class Prepend < ModuleOperation; end + class Extend < ModuleOperation; end + class Namespace < Entry extend T::Sig extend T::Helpers @@ -69,13 +88,35 @@ class Namespace < Entry abstract! sig { returns(T::Array[String]) } - def included_modules - @included_modules ||= T.let([], T.nilable(T::Array[String])) + attr_reader :nesting + + sig do + params( + nesting: T::Array[String], + file_path: String, + location: T.any(Prism::Location, RubyIndexer::Location), + comments: T::Array[String], + ).void + end + def initialize(nesting, file_path, location, comments) + @name = T.let(nesting.join("::"), String) + # The original nesting where this namespace was discovered + @nesting = nesting + + super(@name, file_path, location, comments) end sig { returns(T::Array[String]) } - def prepended_modules - @prepended_modules ||= T.let([], T.nilable(T::Array[String])) + def mixin_operation_module_names + mixin_operations.map(&:module_name) + end + + # Stores all explicit prepend, include and extend operations in the exact order they were discovered in the source + # code. Maintaining the order is essential to linearize ancestors the right way when a module is both included + # and prepended + sig { returns(T::Array[ModuleOperation]) } + def mixin_operations + @mixin_operations ||= T.let([], T.nilable(T::Array[ModuleOperation])) end end @@ -92,15 +133,16 @@ class Class < Namespace sig do params( - name: String, + nesting: T::Array[String], file_path: String, location: T.any(Prism::Location, RubyIndexer::Location), comments: T::Array[String], parent_class: T.nilable(String), ).void end - def initialize(name, file_path, location, comments, parent_class) - super(name, file_path, location, comments) + def initialize(nesting, file_path, location, comments, parent_class) + super(nesting, file_path, location, comments) + @parent_class = T.let(parent_class, T.nilable(String)) end end diff --git a/lib/ruby_indexer/lib/ruby_indexer/index.rb b/lib/ruby_indexer/lib/ruby_indexer/index.rb index eaf23cb94..13db72fdf 100644 --- a/lib/ruby_indexer/lib/ruby_indexer/index.rb +++ b/lib/ruby_indexer/lib/ruby_indexer/index.rb @@ -6,6 +6,7 @@ class Index extend T::Sig class UnresolvableAliasError < StandardError; end + class NonExistingNamespaceError < StandardError; end # The minimum Jaro-Winkler similarity score for an entry to be considered a match for a given fuzzy search query ENTRY_SIMILARITY_THRESHOLD = 0.7 @@ -31,6 +32,9 @@ def initialize # Holds all require paths for every indexed item so that we can provide autocomplete for requires @require_paths_tree = T.let(PrefixTree[IndexablePath].new, PrefixTree[IndexablePath]) + + # Holds the linearized ancestors list for every namespace + @ancestors = T.let({}, T::Hash[String, T::Array[String]]) end sig { params(indexable: IndexablePath).void } @@ -255,6 +259,104 @@ def resolve_method(method_name, receiver_name) end end + # Linearizes the ancestors for a given name, returning the order of namespaces in which Ruby will search for method + # or constant declarations. + # + # When we add an ancestor in Ruby, that namespace might have ancestors of its own. Therefore, we need to linearize + # everything recursively to ensure that we are placing ancestors in the right order. For example, if you include a + # module that prepends another module, then the prepend module appears before the included module. + # + # The order of ancestors is [linearized_prepends, self, linearized_includes, linearized_superclass] + sig { params(fully_qualified_name: String).returns(T::Array[String]) } + def linearized_ancestors_of(fully_qualified_name) + # If we already computed the ancestors for this namespace, return it straight away + cached_ancestors = @ancestors[fully_qualified_name] + return cached_ancestors if cached_ancestors + + ancestors = [fully_qualified_name] + + # Cache the linearized ancestors array eagerly. This is important because we might have circular dependencies and + # this will prevent us from falling into an infinite recursion loop. Because we mutate the ancestors array later, + # the cache will reflect the final result + @ancestors[fully_qualified_name] = ancestors + + # If we don't have an entry for `name`, raise + entries = resolve(fully_qualified_name, []) + raise NonExistingNamespaceError, "No entry found for #{fully_qualified_name}" unless entries + + # If none of the entries for `name` are namespaces, raise + namespaces = entries.filter_map do |entry| + case entry + when Entry::Namespace + entry + when Entry::Alias + self[entry.target]&.grep(Entry::Namespace) + end + end.flatten + + raise NonExistingNamespaceError, + "None of the entries for #{fully_qualified_name} are modules or classes" if namespaces.empty? + + mixin_operations = namespaces.flat_map(&:mixin_operations) + main_namespace_index = 0 + + # The original nesting where we discovered this namespace, so that we resolve the correct names of the + # included/prepended/extended modules and parent classes + nesting = T.must(namespaces.first).nesting + + mixin_operations.each do |operation| + resolved_module = resolve(operation.module_name, nesting) + next unless resolved_module + + module_fully_qualified_name = T.must(resolved_module.first).name + + case operation + when Entry::Prepend + # When a module is prepended, Ruby checks if it hasn't been prepended already to prevent adding it in front of + # the actual namespace twice. However, it does not check if it has been included because you are allowed to + # prepend the same module after it has already been included + linearized_prepends = linearized_ancestors_of(module_fully_qualified_name) + + # When there are duplicate prepended modules, we have to insert the new prepends after the existing ones. For + # example, if the current ancestors are `["A", "Foo"]` and we try to prepend `["A", "B"]`, then `"B"` has to + # be inserted after `"A` + uniq_prepends = linearized_prepends - T.must(ancestors[0...main_namespace_index]) + insert_position = linearized_prepends.length - uniq_prepends.length + + T.unsafe(ancestors).insert( + insert_position, + *(linearized_prepends - T.must(ancestors[0...main_namespace_index])), + ) + + main_namespace_index += linearized_prepends.length + when Entry::Include + # When including a module, Ruby will always prevent duplicate entries in case the module has already been + # prepended or included + linearized_includes = linearized_ancestors_of(module_fully_qualified_name) + T.unsafe(ancestors).insert(main_namespace_index + 1, *(linearized_includes - ancestors)) + end + end + + # Find the first class entry that has a parent class. Notice that if the developer makes a mistake and inherits + # from two diffent classes in different files, we simply ignore it + superclass = T.cast(namespaces.find { |n| n.is_a?(Entry::Class) && n.parent_class }, T.nilable(Entry::Class)) + + if superclass + # If the user makes a mistake and creates a class that inherits from itself, this method would throw a stack + # error. We need to ensure that this isn't the case + parent_class = T.must(superclass.parent_class) + + resolved_parent_class = resolve(parent_class, nesting) + parent_class_name = resolved_parent_class&.first&.name + + if parent_class_name && fully_qualified_name != parent_class_name + ancestors.concat(linearized_ancestors_of(parent_class_name)) + end + end + + ancestors + end + private # Attempts to resolve an UnresolvedAlias into a resolved Alias. If the unresolved alias is pointing to a constant diff --git a/lib/ruby_indexer/test/classes_and_modules_test.rb b/lib/ruby_indexer/test/classes_and_modules_test.rb index d76c25e17..5e6f7f17c 100644 --- a/lib/ruby_indexer/test/classes_and_modules_test.rb +++ b/lib/ruby_indexer/test/classes_and_modules_test.rb @@ -369,13 +369,13 @@ class ConstantPathReferences RUBY foo = T.must(@index["Foo"][0]) - assert_equal(["A1", "A2", "A3", "A4", "A5", "A6"], foo.included_modules) + assert_equal(["A1", "A2", "A3", "A4", "A5", "A6"], foo.mixin_operation_module_names) qux = T.must(@index["Foo::Qux"][0]) - assert_equal(["Corge", "Corge", "Baz"], qux.included_modules) + assert_equal(["Corge", "Corge", "Baz"], qux.mixin_operation_module_names) constant_path_references = T.must(@index["ConstantPathReferences"][0]) - assert_equal(["Foo::Bar", "Foo::Bar2"], constant_path_references.included_modules) + assert_equal(["Foo::Bar", "Foo::Bar2"], constant_path_references.mixin_operation_module_names) end def test_keeping_track_of_prepended_modules @@ -415,13 +415,59 @@ class ConstantPathReferences RUBY foo = T.must(@index["Foo"][0]) - assert_equal(["A1", "A2", "A3", "A4", "A5", "A6"], foo.prepended_modules) + assert_equal(["A1", "A2", "A3", "A4", "A5", "A6"], foo.mixin_operation_module_names) qux = T.must(@index["Foo::Qux"][0]) - assert_equal(["Corge", "Corge", "Baz"], qux.prepended_modules) + assert_equal(["Corge", "Corge", "Baz"], qux.mixin_operation_module_names) constant_path_references = T.must(@index["ConstantPathReferences"][0]) - assert_equal(["Foo::Bar", "Foo::Bar2"], constant_path_references.prepended_modules) + assert_equal(["Foo::Bar", "Foo::Bar2"], constant_path_references.mixin_operation_module_names) + end + + def test_keeping_track_of_extended_modules + index(<<~RUBY) + class Foo + # valid syntaxes that we can index + extend A1 + self.extend A2 + extend A3, A4 + self.extend A5, A6 + + # valid syntaxes that we cannot index because of their dynamic nature + extend some_variable_or_method_call + self.extend some_variable_or_method_call + + def something + extend A7 # We should not index this because of this dynamic nature + end + + # Valid inner class syntax definition with its own modules prepended + class Qux + extend Corge + self.extend Corge + extend Baz + + extend some_variable_or_method_call + end + end + + class ConstantPathReferences + extend Foo::Bar + self.extend Foo::Bar2 + + extend dynamic::Bar + extend Foo:: + end + RUBY + + foo = T.must(@index["Foo"][0]) + assert_equal(["A1", "A2", "A3", "A4", "A5", "A6"], foo.mixin_operation_module_names) + + qux = T.must(@index["Foo::Qux"][0]) + assert_equal(["Corge", "Corge", "Baz"], qux.mixin_operation_module_names) + + constant_path_references = T.must(@index["ConstantPathReferences"][0]) + assert_equal(["Foo::Bar", "Foo::Bar2"], constant_path_references.mixin_operation_module_names) end end end diff --git a/lib/ruby_indexer/test/index_test.rb b/lib/ruby_indexer/test/index_test.rb index 6f759471a..d5ebfdd2e 100644 --- a/lib/ruby_indexer/test/index_test.rb +++ b/lib/ruby_indexer/test/index_test.rb @@ -313,5 +313,376 @@ def test_index_single_does_not_fail_for_non_existing_file @index.index_single(IndexablePath.new(nil, "/fake/path/foo.rb")) assert_empty(@index.instance_variable_get(:@entries)) end + + def test_linearized_ancestors_basic_ordering + index(<<~RUBY) + module A; end + module B; end + + class Foo + prepend A + prepend B + end + + class Bar + include A + include B + end + RUBY + + assert_equal( + [ + "B", + "A", + "Foo", + # "Object", + # "Kernel", + # "BasicObject", + ], + @index.linearized_ancestors_of("Foo"), + ) + + assert_equal( + [ + "Bar", + "B", + "A", + # "Object", + # "Kernel", + # "BasicObject", + ], + @index.linearized_ancestors_of("Bar"), + ) + end + + def test_linearized_ancestors + index(<<~RUBY) + module A; end + module B; end + module C; end + + module D + include A + end + + module E + prepend B + end + + module F + include C + include A + end + + class Bar + prepend F + end + + class Foo < Bar + include E + prepend D + end + RUBY + + # Object, Kernel and BasicObject are intentionally commented out for now until we develop a strategy for indexing + # declarations made in C code + assert_equal( + [ + "D", + "A", + "Foo", + "B", + "E", + "F", + "A", + "C", + "Bar", + # "Object", + # "Kernel", + # "BasicObject", + ], + @index.linearized_ancestors_of("Foo"), + ) + end + + def test_linearized_ancestors_duplicates + index(<<~RUBY) + module A; end + module B + include A + end + + class Foo + include B + include A + end + + class Bar + prepend B + prepend A + end + RUBY + + assert_equal( + [ + "Foo", + "B", + "A", + # "Object", + # "Kernel", + # "BasicObject", + ], + @index.linearized_ancestors_of("Foo"), + ) + + assert_equal( + [ + "B", + "A", + "Bar", + # "Object", + # "Kernel", + # "BasicObject", + ], + @index.linearized_ancestors_of("Bar"), + ) + end + + def test_linearizing_ancestors_is_cached + index(<<~RUBY) + module C; end + module A; end + module B + include A + end + + class Foo + include B + include A + end + RUBY + + @index.linearized_ancestors_of("Foo") + ancestors = @index.instance_variable_get(:@ancestors) + assert(ancestors.key?("Foo")) + assert(ancestors.key?("A")) + assert(ancestors.key?("B")) + refute(ancestors.key?("C")) + end + + def test_duplicate_prepend_include + index(<<~RUBY) + module A; end + + class Foo + prepend A + include A + end + + class Bar + include A + prepend A + end + RUBY + + assert_equal( + [ + "A", + "Foo", + # "Object", + # "Kernel", + # "BasicObject", + ], + @index.linearized_ancestors_of("Foo"), + ) + + assert_equal( + [ + "A", + "Bar", + "A", + # "Object", + # "Kernel", + # "BasicObject", + ], + @index.linearized_ancestors_of("Bar"), + ) + end + + def test_linearizing_ancestors_handles_circular_parent_class + index(<<~RUBY) + class Foo < Foo + end + RUBY + + assert_equal( + [ + "Foo", + # "Object", + # "Kernel", + # "BasicObject", + ], + @index.linearized_ancestors_of("Foo"), + ) + end + + def test_ancestors_linearization_complex_prepend_duplication + index(<<~RUBY) + module A; end + module B + prepend A + end + module C + prepend B + end + + class Foo + prepend A + prepend C + end + RUBY + + assert_equal( + [ + "A", + "B", + "C", + "Foo", + # "Object", + # "Kernel", + # "BasicObject", + ], + @index.linearized_ancestors_of("Foo"), + ) + end + + def test_ancestors_linearization_complex_include_duplication + index(<<~RUBY) + module A; end + module B + include A + end + module C + include B + end + + class Foo + include A + include C + end + RUBY + + assert_equal( + [ + "Foo", + "C", + "B", + "A", + # "Object", + # "Kernel", + # "BasicObject", + ], + @index.linearized_ancestors_of("Foo"), + ) + end + + def test_linearizing_ancestors_that_need_to_be_resolved + index(<<~RUBY) + module Foo + module Baz + end + module Qux + end + + class Something; end + + class Bar < Something + include Baz + prepend Qux + end + end + RUBY + + assert_equal( + [ + "Foo::Qux", + "Foo::Bar", + "Foo::Baz", + "Foo::Something", + # "Object", + # "Kernel", + # "BasicObject", + ], + @index.linearized_ancestors_of("Foo::Bar"), + ) + end + + def test_linearizing_ancestors_for_non_existing_namespaces + index(<<~RUBY) + module Kernel + def Array(a); end + end + RUBY + + assert_raises(Index::NonExistingNamespaceError) do + @index.linearized_ancestors_of("Foo") + end + + assert_raises(Index::NonExistingNamespaceError) do + @index.linearized_ancestors_of("Array") + end + end + + def test_linearizing_circular_ancestors + index(<<~RUBY) + module M1 + include M2 + end + + module M2 + include M1 + end + + module A1 + include A2 + end + + module A2 + include A3 + end + + module A3 + include A1 + end + + class Foo < Foo + include Foo + end + + module Bar + include Bar + end + RUBY + + assert_equal(["M2", "M1"], @index.linearized_ancestors_of("M2")) + assert_equal(["A3", "A1", "A2"], @index.linearized_ancestors_of("A3")) + assert_equal(["Foo"], @index.linearized_ancestors_of("Foo")) + assert_equal(["Bar"], @index.linearized_ancestors_of("Bar")) + end + + def test_linearizing_circular_aliased_dependency + index(<<~RUBY) + module A + end + + ALIAS = A + + module A + include ALIAS + end + RUBY + + assert_equal(["A", "ALIAS"], @index.linearized_ancestors_of("A")) + end end end