Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add task to verify indexing against Top 100 RubyGems #2330

Merged
merged 1 commit into from
Jul 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions .github/workflows/indexing.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: CI (indexing)

on:
push:
andyw8 marked this conversation as resolved.
Show resolved Hide resolved
paths:
- 'Gemfile.lock'
- 'lib/ruby_indexer/**'
pull_request:
paths:
- 'Gemfile.lock'
- 'lib/ruby_indexer/**'
andyw8 marked this conversation as resolved.
Show resolved Hide resolved

jobs:
indexing_sanity_check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Set up Ruby
uses: ruby/setup-ruby@v1
with:
bundler-cache: true

- name: Index Top 100 Ruby gems
run: bundle exec rake index:topgems
95 changes: 95 additions & 0 deletions rakelib/index.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# frozen_string_literal: true

# Based on https://github.com/ruby/prism/blob/main/rakelib/lex.rake

module GemIndexing
class << self
# This method is responsible for iterating through a list of items and running
# each item in a separate thread. It will block until all items have been
# processed. This is particularly useful for tasks that are IO-bound like
# downloading files or reading files from disk.
def parallelize(items, &block)
Thread.abort_on_exception = true

queue = Queue.new
items.each { |item| queue << item }

workers =
ENV.fetch("WORKERS") { 16 }.to_i.times.map do
parallelize_thread(queue, &block)
end

workers.map(&:join)
end

private

# Create a new thread with a minimal number of locals that it can access.
def parallelize_thread(queue, &block)
Thread.new { block.call(queue.shift) until queue.empty? }
end
end
end

TOP_100_GEM_FILENAME = "rakelib/top_100_gems.yml"
TOP_100_GEMS_DIR = "tmp/top_100_gems"

namespace :download do
directory TOP_100_GEMS_DIR

desc "Download the top 100 rubygems under #{TOP_100_GEMS_DIR}/"
task topgems: TOP_100_GEMS_DIR do
$LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
require "net/http"
require "rubygems/package"
require "tmpdir"

GemIndexing.parallelize(YAML.safe_load_file(TOP_100_GEM_FILENAME)) do |gem_name|
directory = File.expand_path("#{TOP_100_GEMS_DIR}/#{gem_name}")
next if File.directory?(directory)

puts "Downloading #{gem_name}"

uri = URI.parse("https://rubygems.org/gems/#{gem_name}.gem")
response = Net::HTTP.get_response(uri)
raise gem_name unless response.is_a?(Net::HTTPSuccess)

Dir.mktmpdir do |tmpdir|
filepath = File.join(tmpdir, "#{gem_name}.gem")
File.write(filepath, response.body)
Gem::Package.new(filepath).extract_files(directory, "**/*.rb")
end
end
end
end

# This task indexes against the top 100 gems, and will exit(1) if any fail.
desc "Index against the top 100 rubygems"
task "index:topgems": ["download:topgems"] do
$LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
require "net/http"
require "rubygems/package"
require "tmpdir"

gem_names = YAML.safe_load_file(TOP_100_GEM_FILENAME)

errors = []
GemIndexing.parallelize(gem_names) do |gem_name|
directory = File.expand_path("#{TOP_100_GEMS_DIR}/#{gem_name}")

index = RubyIndexer::Index.new

errors = Dir[File.join(directory, "**", "*.rb")].filter_map do |filepath|
print(".")
code = File.read(filepath)
index.index_single(RubyIndexer::IndexablePath.new(nil, filepath), code)
nil
rescue => e
errors << { message: e.message, file: filepath }
end
end

puts "errors: #{errors}" if errors.any?
ensure
FileUtils.rm_rf(TOP_100_GEMS_DIR)
end
101 changes: 101 additions & 0 deletions rakelib/top_100_gems.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
---
- actioncable-7.0.4.3
- actionmailbox-7.0.4.3
- actionmailer-7.0.4.3
- actionpack-7.0.4.3
- actiontext-7.0.4.3
- actionview-7.0.4.3
- activejob-7.0.4.3
- activemodel-7.0.4.3
- activerecord-7.0.4.3
- activestorage-7.0.4.3
- activesupport-7.0.4.3
- addressable-2.8.4
- autoprefixer-rails-10.4.13.0
- aws-partitions-1.744.0
- aws-sdk-cloudformation-1.77.0
- aws-sdk-cloudfront-1.76.0
- aws-sdk-cloudwatch-1.72.0
- aws-sdk-core-3.171.0
- aws-sdk-dynamodb-1.83.0
- aws-sdk-ec2-1.375.0
- aws-sdk-iam-1.77.0
- aws-sdk-kinesis-1.45.0
- aws-sdk-kms-1.63.0
- aws-sdk-lambda-1.93.0
- aws-sdk-rds-1.175.0
- aws-sdk-resources-3.162.0
- aws-sdk-s3-1.120.1
- aws-sdk-secretsmanager-1.73.0
- aws-sdk-sns-1.60.0
- aws-sdk-ssm-1.150.0
- backports-3.24.1
- brakeman-5.4.1
- bundler-2.4.11
- capybara-3.39.0
- concurrent-ruby-1.2.2
- connection_pool-2.4.0
- dalli-3.2.4
- database_cleaner-2.0.2
- devise-4.9.2
- dry-types-1.7.1
- elasticsearch-8.7.0
- elasticsearch-api-8.7.0
- excon-0.99.0
- faker-3.1.1
- faraday-retry-2.1.0
- fastlane-2.212.1
- fog-aws-3.18.0
- git-1.18.0
- google-cloud-errors-1.3.1
- google-protobuf-3.22.2
- googleauth-1.5.1
- graphql-2.0.21
- grpc-1.53.0
- jwt-2.7.0
- loofah-2.20.0
- mail-2.8.1
- mime-types-data-3.2023.0218.1
- minitest-5.18.0
- msgpack-1.7.0
- net-http-persistent-4.0.2
- net-ssh-7.1.0
- newrelic_rpm-9.1.0
- nio4r-2.5.9
- nokogiri-1.14.3
- octokit-6.1.1
- oj-3.14.3
- parser-3.2.2.0
- pg-1.4.6
- plist-3.7.0
- puma-6.2.1
- rack-3.0.7
- rack-cors-2.0.1
- rack-protection-3.0.6
- rack-test-2.1.0
- rails-7.0.4.3
- railties-7.0.4.3
- raindrops-0.20.1
- redis-store-1.9.2
- regexp_parser-2.7.0
- responders-3.1.0
- rouge-4.1.0
- rspec-core-3.12.1
- rspec-mocks-3.12.5
- rubocop-1.50.0
- rubocop-ast-1.28.0
- rubocop-performance-1.17.1
- rubocop-rails-2.19.0
- rubocop-rspec-2.19.0
- ruby-progressbar-1.13.0
- ruby_parser-3.20.0
- rubygems-update-3.4.11
- selenium-webdriver-4.8.6
- sidekiq-7.0.8
- sinatra-3.0.6
- slop-4.10.1
- sqlite3-1.6.2
- thin-1.8.2
- tilt-2.1.0
- yard-0.9.32
- zeitwerk-2.6.7
1 change: 1 addition & 0 deletions sorbet/config
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
--dir
.
--ignore=vendor/
--ignore=tmp/
andyw8 marked this conversation as resolved.
Show resolved Hide resolved
--ignore=test/fixtures/
--ignore=test/expectations/
--enable-experimental-requires-ancestor
Loading