From 77de05f2a4f5c8975b61da2ba70e980b3f384507 Mon Sep 17 00:00:00 2001 From: Andy Waite Date: Wed, 24 Jul 2024 15:27:53 -0400 Subject: [PATCH] Add task to verify indexing against Top 100 RubyGems (#2330) Add task to verify against against Top 100 RubyGems --- .github/workflows/indexing.yml | 25 ++++++++ rakelib/index.rake | 95 +++++++++++++++++++++++++++++++ rakelib/top_100_gems.yml | 101 +++++++++++++++++++++++++++++++++ sorbet/config | 1 + 4 files changed, 222 insertions(+) create mode 100644 .github/workflows/indexing.yml create mode 100644 rakelib/index.rake create mode 100644 rakelib/top_100_gems.yml diff --git a/.github/workflows/indexing.yml b/.github/workflows/indexing.yml new file mode 100644 index 000000000..4fea90c5b --- /dev/null +++ b/.github/workflows/indexing.yml @@ -0,0 +1,25 @@ +name: CI (indexing) + +on: + push: + paths: + - 'Gemfile.lock' + - 'lib/ruby_indexer/**' + pull_request: + paths: + - 'Gemfile.lock' + - 'lib/ruby_indexer/**' + +jobs: + indexing_sanity_check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + bundler-cache: true + + - name: Index Top 100 Ruby gems + run: bundle exec rake index:topgems diff --git a/rakelib/index.rake b/rakelib/index.rake new file mode 100644 index 000000000..726ae6e85 --- /dev/null +++ b/rakelib/index.rake @@ -0,0 +1,95 @@ +# frozen_string_literal: true + +# Based on https://github.com/ruby/prism/blob/main/rakelib/lex.rake + +module GemIndexing + class << self + # This method is responsible for iterating through a list of items and running + # each item in a separate thread. It will block until all items have been + # processed. This is particularly useful for tasks that are IO-bound like + # downloading files or reading files from disk. + def parallelize(items, &block) + Thread.abort_on_exception = true + + queue = Queue.new + items.each { |item| queue << item } + + workers = + ENV.fetch("WORKERS") { 16 }.to_i.times.map do + parallelize_thread(queue, &block) + end + + workers.map(&:join) + end + + private + + # Create a new thread with a minimal number of locals that it can access. + def parallelize_thread(queue, &block) + Thread.new { block.call(queue.shift) until queue.empty? } + end + end +end + +TOP_100_GEM_FILENAME = "rakelib/top_100_gems.yml" +TOP_100_GEMS_DIR = "tmp/top_100_gems" + +namespace :download do + directory TOP_100_GEMS_DIR + + desc "Download the top 100 rubygems under #{TOP_100_GEMS_DIR}/" + task topgems: TOP_100_GEMS_DIR do + $LOAD_PATH.unshift(File.expand_path("../lib", __dir__)) + require "net/http" + require "rubygems/package" + require "tmpdir" + + GemIndexing.parallelize(YAML.safe_load_file(TOP_100_GEM_FILENAME)) do |gem_name| + directory = File.expand_path("#{TOP_100_GEMS_DIR}/#{gem_name}") + next if File.directory?(directory) + + puts "Downloading #{gem_name}" + + uri = URI.parse("https://rubygems.org/gems/#{gem_name}.gem") + response = Net::HTTP.get_response(uri) + raise gem_name unless response.is_a?(Net::HTTPSuccess) + + Dir.mktmpdir do |tmpdir| + filepath = File.join(tmpdir, "#{gem_name}.gem") + File.write(filepath, response.body) + Gem::Package.new(filepath).extract_files(directory, "**/*.rb") + end + end + end +end + +# This task indexes against the top 100 gems, and will exit(1) if any fail. +desc "Index against the top 100 rubygems" +task "index:topgems": ["download:topgems"] do + $LOAD_PATH.unshift(File.expand_path("../lib", __dir__)) + require "net/http" + require "rubygems/package" + require "tmpdir" + + gem_names = YAML.safe_load_file(TOP_100_GEM_FILENAME) + + errors = [] + GemIndexing.parallelize(gem_names) do |gem_name| + directory = File.expand_path("#{TOP_100_GEMS_DIR}/#{gem_name}") + + index = RubyIndexer::Index.new + + errors = Dir[File.join(directory, "**", "*.rb")].filter_map do |filepath| + print(".") + code = File.read(filepath) + index.index_single(RubyIndexer::IndexablePath.new(nil, filepath), code) + nil + rescue => e + errors << { message: e.message, file: filepath } + end + end + + puts "errors: #{errors}" if errors.any? +ensure + FileUtils.rm_rf(TOP_100_GEMS_DIR) +end diff --git a/rakelib/top_100_gems.yml b/rakelib/top_100_gems.yml new file mode 100644 index 000000000..d05ddcb00 --- /dev/null +++ b/rakelib/top_100_gems.yml @@ -0,0 +1,101 @@ +--- +- actioncable-7.0.4.3 +- actionmailbox-7.0.4.3 +- actionmailer-7.0.4.3 +- actionpack-7.0.4.3 +- actiontext-7.0.4.3 +- actionview-7.0.4.3 +- activejob-7.0.4.3 +- activemodel-7.0.4.3 +- activerecord-7.0.4.3 +- activestorage-7.0.4.3 +- activesupport-7.0.4.3 +- addressable-2.8.4 +- autoprefixer-rails-10.4.13.0 +- aws-partitions-1.744.0 +- aws-sdk-cloudformation-1.77.0 +- aws-sdk-cloudfront-1.76.0 +- aws-sdk-cloudwatch-1.72.0 +- aws-sdk-core-3.171.0 +- aws-sdk-dynamodb-1.83.0 +- aws-sdk-ec2-1.375.0 +- aws-sdk-iam-1.77.0 +- aws-sdk-kinesis-1.45.0 +- aws-sdk-kms-1.63.0 +- aws-sdk-lambda-1.93.0 +- aws-sdk-rds-1.175.0 +- aws-sdk-resources-3.162.0 +- aws-sdk-s3-1.120.1 +- aws-sdk-secretsmanager-1.73.0 +- aws-sdk-sns-1.60.0 +- aws-sdk-ssm-1.150.0 +- backports-3.24.1 +- brakeman-5.4.1 +- bundler-2.4.11 +- capybara-3.39.0 +- concurrent-ruby-1.2.2 +- connection_pool-2.4.0 +- dalli-3.2.4 +- database_cleaner-2.0.2 +- devise-4.9.2 +- dry-types-1.7.1 +- elasticsearch-8.7.0 +- elasticsearch-api-8.7.0 +- excon-0.99.0 +- faker-3.1.1 +- faraday-retry-2.1.0 +- fastlane-2.212.1 +- fog-aws-3.18.0 +- git-1.18.0 +- google-cloud-errors-1.3.1 +- google-protobuf-3.22.2 +- googleauth-1.5.1 +- graphql-2.0.21 +- grpc-1.53.0 +- jwt-2.7.0 +- loofah-2.20.0 +- mail-2.8.1 +- mime-types-data-3.2023.0218.1 +- minitest-5.18.0 +- msgpack-1.7.0 +- net-http-persistent-4.0.2 +- net-ssh-7.1.0 +- newrelic_rpm-9.1.0 +- nio4r-2.5.9 +- nokogiri-1.14.3 +- octokit-6.1.1 +- oj-3.14.3 +- parser-3.2.2.0 +- pg-1.4.6 +- plist-3.7.0 +- puma-6.2.1 +- rack-3.0.7 +- rack-cors-2.0.1 +- rack-protection-3.0.6 +- rack-test-2.1.0 +- rails-7.0.4.3 +- railties-7.0.4.3 +- raindrops-0.20.1 +- redis-store-1.9.2 +- regexp_parser-2.7.0 +- responders-3.1.0 +- rouge-4.1.0 +- rspec-core-3.12.1 +- rspec-mocks-3.12.5 +- rubocop-1.50.0 +- rubocop-ast-1.28.0 +- rubocop-performance-1.17.1 +- rubocop-rails-2.19.0 +- rubocop-rspec-2.19.0 +- ruby-progressbar-1.13.0 +- ruby_parser-3.20.0 +- rubygems-update-3.4.11 +- selenium-webdriver-4.8.6 +- sidekiq-7.0.8 +- sinatra-3.0.6 +- slop-4.10.1 +- sqlite3-1.6.2 +- thin-1.8.2 +- tilt-2.1.0 +- yard-0.9.32 +- zeitwerk-2.6.7 diff --git a/sorbet/config b/sorbet/config index 733ffe146..255ce1452 100644 --- a/sorbet/config +++ b/sorbet/config @@ -1,6 +1,7 @@ --dir . --ignore=vendor/ +--ignore=tmp/ --ignore=test/fixtures/ --ignore=test/expectations/ --enable-experimental-requires-ancestor