From 22f7e1854f7111f62f082026a063cd4df5922572 Mon Sep 17 00:00:00 2001 From: Sebastian Serth Date: Mon, 9 Sep 2024 18:55:19 +0200 Subject: [PATCH] Migrate to Solid Queue 0.8.x and extract tables With Solid Queue 0.8.0, support for dedicated databases were introduced. In fact, Rails recommends moving the database, which we perform with the migration. Unfortunately, it is currently unclear how exactly schema migrations will be provided. Therefore, we cannot use the same GitHub actions workflow for now and will re-add it once a new migration has been published. --- .github/workflows/ci.yml | 13 +- .rubocop.yml | 2 +- Gemfile.lock | 2 +- config/database.yml.example | 18 ++- config/environments/development.rb | 1 + config/environments/production.rb | 1 + config/solid_queue.yml | 24 +-- ..._move_solid_queue_to_dedicated_database.rb | 93 +++++++++++ db/queue_schema.rb | 144 ++++++++++++++++++ db/schema.rb | 127 --------------- db/scripts/copy_data.rb | 108 +++++++++++++ db/seeds.rb | 3 + docs/LOCAL_SETUP.md | 8 +- docs/backup.md | 4 +- spec/db/seeds_spec.rb | 4 +- 15 files changed, 388 insertions(+), 164 deletions(-) create mode 100644 db/migrate/20240907100647_move_solid_queue_to_dedicated_database.rb create mode 100644 db/queue_schema.rb create mode 100644 db/scripts/copy_data.rb diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 43f1e264f..356ae2a5e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -68,19 +68,10 @@ jobs: cp config/secrets.yml.ci config/secrets.yml cp config/mnemosyne.yml.ci config/mnemosyne.yml - - name: Create database + - name: Prepare database env: RAILS_ENV: test - run: bundler exec rake db:schema:load - - name: Temporarily create pending SolidQueue migrations - # Dependabot might update the SolidQueue gem, which might include new migrations. - # However, Dependabot won't add the migrations nor run SolidQueue at all. - # Consequently, all tests would still pass and the dependency update would be merged. - # To prevent this, we temporarily create the pending migrations *after* the schema has been loaded. - # If a new migration was added, a `PendingMigrationError` will be raised and the CI will fail. - env: - RAILS_ENV: test - run: bundler exec rails solid_queue:install:migrations + run: bundler exec rake db:prepare - name: Precompile assets env: RAILS_ENV: test diff --git a/.rubocop.yml b/.rubocop.yml index 20989103c..db56e2aea 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -23,7 +23,7 @@ AllCops: NewCops: enable Exclude: - 'bin/*' - - 'db/schema.rb' + - 'db/*schema.rb' - 'vendor/**/*' # Ignore local files for faster processing - 'tmp/**/*' diff --git a/Gemfile.lock b/Gemfile.lock index bca0d2836..7358df04c 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -534,7 +534,7 @@ GEM rexml (~> 3.2) rubocop (>= 1.0, < 2.0) slim (>= 3.0, < 6.0) - solid_queue (0.7.1) + solid_queue (0.8.2) activejob (>= 7.1) activerecord (>= 7.1) concurrent-ruby (>= 1.3.1) diff --git a/config/database.yml.example b/config/database.yml.example index 280cf978b..d000098cf 100644 --- a/config/database.yml.example +++ b/config/database.yml.example @@ -6,12 +6,22 @@ default: &default # host: localhost development: - <<: *default - database: codeharbor_development + primary: &primary_development + <<: *default + database: codeharbor_development + queue: + <<: *primary_development + database: codeharbor_development_queue + migrations_paths: db/queue_migrate production: - <<: *default - database: codeharbor_production + primary: &primary_production + <<: *default + database: codeharbor_production + queue: + <<: *primary_production + database: codeharbor_production_queue + migrations_paths: db/queue_migrate # Warning: The database defined as "test" will be erased and # re-generated from your development database when you run "rake". diff --git a/config/environments/development.rb b/config/environments/development.rb index cda0c98b1..71b9e2d69 100644 --- a/config/environments/development.rb +++ b/config/environments/development.rb @@ -46,6 +46,7 @@ # Use a real queuing backend for Active Job (and separate queues per environment). config.active_job.queue_adapter = :solid_queue + config.solid_queue.connects_to = {database: {writing: :queue}} config.active_job.queue_name_prefix = 'codeharbor_development' config.action_mailer.perform_caching = false diff --git a/config/environments/production.rb b/config/environments/production.rb index a5249631a..b2fe47aa5 100644 --- a/config/environments/production.rb +++ b/config/environments/production.rb @@ -78,6 +78,7 @@ # Use a real queuing backend for Active Job (and separate queues per environment). config.active_job.queue_adapter = :solid_queue + config.solid_queue.connects_to = {database: {writing: :queue}} config.active_job.queue_name_prefix = 'codeharbor_production' config.action_mailer.perform_caching = false diff --git a/config/solid_queue.yml b/config/solid_queue.yml index 73dd19e0c..28f7a9ace 100644 --- a/config/solid_queue.yml +++ b/config/solid_queue.yml @@ -1,20 +1,20 @@ - default: &default - dispatchers: - - polling_interval: 1 - batch_size: 500 - workers: - - queues: "*" - threads: 3 - processes: 1 - polling_interval: 0.1 +default: &default + dispatchers: + - polling_interval: 1 + batch_size: 500 + workers: + - queues: "*" + threads: 3 + processes: <%= ENV.fetch("JOB_CONCURRENCY", 1) %> + polling_interval: 0.1 - development: +development: <<: *default - test: +test: <<: *default - production: +production: <<: *default dispatchers: - polling_interval: 1 diff --git a/db/migrate/20240907100647_move_solid_queue_to_dedicated_database.rb b/db/migrate/20240907100647_move_solid_queue_to_dedicated_database.rb new file mode 100644 index 000000000..529ac45ed --- /dev/null +++ b/db/migrate/20240907100647_move_solid_queue_to_dedicated_database.rb @@ -0,0 +1,93 @@ +# frozen_string_literal: true + +require_relative '../scripts/copy_data' + +class MoveSolidQueueToDedicatedDatabase < ActiveRecord::Migration[7.1] + include CopyData + include ActiveRecord::Tasks + + class Queue < ApplicationRecord + self.abstract_class = true + connects_to database: {writing: :queue} + end + + def up + create_database(:queue) + load_queue_schema(database: :queue) + copy_data(source_connection: connection, target_connection: Queue.connection, operation: :matches, condition: 'solid_queue_%') + drop_queue_tables(connection:) + ensure + Queue.connection&.disconnect! + end + + def down + load_queue_schema(database: :primary) + copy_data(source_connection: Queue.connection, target_connection: connection, operation: :matches, condition: 'solid_queue_%') + drop_queue_tables(connection: Queue.connection) + rescue StandardError + Queue.connection&.disconnect! + else + Queue.connection&.disconnect! + drop_database(:queue) + end + + def foreign_key_targets + %w[ + solid_queue_jobs + ] + end + + private + + def load_queue_schema(database:) + with_connection(database:) do + DatabaseTasks.load_schema(configs_for(:queue), ActiveRecord.schema_format, 'db/queue_schema.rb') + end + end + + def drop_queue_tables(connection:) + connection.drop_table :solid_queue_semaphores + connection.drop_table :solid_queue_scheduled_executions + connection.drop_table :solid_queue_recurring_tasks + connection.drop_table :solid_queue_recurring_executions + connection.drop_table :solid_queue_ready_executions + connection.drop_table :solid_queue_processes + connection.drop_table :solid_queue_pauses + connection.drop_table :solid_queue_failed_executions + connection.drop_table :solid_queue_claimed_executions + connection.drop_table :solid_queue_blocked_executions + connection.drop_table :solid_queue_jobs + end + + def configs_for(name) + ActiveRecord::Base.configurations.configs_for(name: name.to_s, env_name: Rails.env) + end + + def create_database(name) + database_name = configs_for(name).database + new_primary_connection.create_database(database_name) + rescue ActiveRecord::DatabaseAlreadyExists + # Database already exists, do nothing + end + + def drop_database(name) + database_name = configs_for(name).database + new_primary_connection.drop_database(database_name) + end + + def new_primary_connection + ActiveRecord::ConnectionAdapters::PostgreSQLAdapter.new(configs_for(:primary).configuration_hash) + end + + def with_connection(database:) + # We must not overwrite the `connection`, which is automatically overwritten by establishing a new connection. + # However, we need to specify another connection, i.e. for loading the schema to the desired database. + # Hence, we use this monkey patching workaround to change the connection temporary and then revert back. + klass = database == :queue ? Queue : ApplicationRecord + DatabaseTasks.alias_method :previous_migration_class, :migration_class + DatabaseTasks.define_method(:migration_class) { klass } + yield + ensure + DatabaseTasks.alias_method :migration_class, :previous_migration_class + end +end diff --git a/db/queue_schema.rb b/db/queue_schema.rb new file mode 100644 index 000000000..c45343ad6 --- /dev/null +++ b/db/queue_schema.rb @@ -0,0 +1,144 @@ +# This file is auto-generated from the current state of the database. Instead +# of editing this file, please use the migrations feature of Active Record to +# incrementally modify your database, and then regenerate this schema definition. +# +# This file is the source Rails uses to define your schema when running `bin/rails +# db:schema:load`. When creating a new database, `bin/rails db:schema:load` tends to +# be faster and is potentially less error prone than running all of your +# migrations from scratch. Old migrations may fail to apply correctly if those +# migrations use external dependencies or application code. +# +# It's strongly recommended that you check this file into your version control system. + +ActiveRecord::Schema[7.1].define(version: 2024_09_04_193154) do + # These are extensions that must be enabled in order to support this database + enable_extension "plpgsql" + + create_table "solid_queue_blocked_executions", force: :cascade do |t| + t.bigint "job_id", null: false + t.string "queue_name", null: false + t.integer "priority", default: 0, null: false + t.string "concurrency_key", null: false + t.datetime "expires_at", null: false + t.datetime "created_at", null: false + t.index ["concurrency_key", "priority", "job_id"], name: "index_solid_queue_blocked_executions_for_release" + t.index ["expires_at", "concurrency_key"], name: "index_solid_queue_blocked_executions_for_maintenance" + t.index ["job_id"], name: "index_solid_queue_blocked_executions_on_job_id", unique: true + end + + create_table "solid_queue_claimed_executions", force: :cascade do |t| + t.bigint "job_id", null: false + t.bigint "process_id" + t.datetime "created_at", null: false + t.index ["job_id"], name: "index_solid_queue_claimed_executions_on_job_id", unique: true + t.index ["process_id", "job_id"], name: "index_solid_queue_claimed_executions_on_process_id_and_job_id" + end + + create_table "solid_queue_failed_executions", force: :cascade do |t| + t.bigint "job_id", null: false + t.text "error" + t.datetime "created_at", null: false + t.index ["job_id"], name: "index_solid_queue_failed_executions_on_job_id", unique: true + end + + create_table "solid_queue_jobs", force: :cascade do |t| + t.string "queue_name", null: false + t.string "class_name", null: false + t.text "arguments" + t.integer "priority", default: 0, null: false + t.string "active_job_id" + t.datetime "scheduled_at" + t.datetime "finished_at" + t.string "concurrency_key" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["active_job_id"], name: "index_solid_queue_jobs_on_active_job_id" + t.index ["class_name"], name: "index_solid_queue_jobs_on_class_name" + t.index ["finished_at"], name: "index_solid_queue_jobs_on_finished_at" + t.index ["queue_name", "finished_at"], name: "index_solid_queue_jobs_for_filtering" + t.index ["scheduled_at", "finished_at"], name: "index_solid_queue_jobs_for_alerting" + end + + create_table "solid_queue_pauses", force: :cascade do |t| + t.string "queue_name", null: false + t.datetime "created_at", null: false + t.index ["queue_name"], name: "index_solid_queue_pauses_on_queue_name", unique: true + end + + create_table "solid_queue_processes", force: :cascade do |t| + t.string "kind", null: false + t.datetime "last_heartbeat_at", null: false + t.bigint "supervisor_id" + t.integer "pid", null: false + t.string "hostname" + t.text "metadata" + t.datetime "created_at", null: false + t.string "name", null: false + t.index ["last_heartbeat_at"], name: "index_solid_queue_processes_on_last_heartbeat_at" + t.index ["name", "supervisor_id"], name: "index_solid_queue_processes_on_name_and_supervisor_id", unique: true + t.index ["supervisor_id"], name: "index_solid_queue_processes_on_supervisor_id" + end + + create_table "solid_queue_ready_executions", force: :cascade do |t| + t.bigint "job_id", null: false + t.string "queue_name", null: false + t.integer "priority", default: 0, null: false + t.datetime "created_at", null: false + t.index ["job_id"], name: "index_solid_queue_ready_executions_on_job_id", unique: true + t.index ["priority", "job_id"], name: "index_solid_queue_poll_all" + t.index ["queue_name", "priority", "job_id"], name: "index_solid_queue_poll_by_queue" + end + + create_table "solid_queue_recurring_executions", force: :cascade do |t| + t.bigint "job_id", null: false + t.string "task_key", null: false + t.datetime "run_at", null: false + t.datetime "created_at", null: false + t.index ["job_id"], name: "index_solid_queue_recurring_executions_on_job_id", unique: true + t.index ["task_key", "run_at"], name: "index_solid_queue_recurring_executions_on_task_key_and_run_at", unique: true + end + + create_table "solid_queue_recurring_tasks", force: :cascade do |t| + t.string "key", null: false + t.string "schedule", null: false + t.string "command", limit: 2048 + t.string "class_name" + t.text "arguments" + t.string "queue_name" + t.integer "priority", default: 0 + t.boolean "static", default: true, null: false + t.text "description" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["key"], name: "index_solid_queue_recurring_tasks_on_key", unique: true + t.index ["static"], name: "index_solid_queue_recurring_tasks_on_static" + end + + create_table "solid_queue_scheduled_executions", force: :cascade do |t| + t.bigint "job_id", null: false + t.string "queue_name", null: false + t.integer "priority", default: 0, null: false + t.datetime "scheduled_at", null: false + t.datetime "created_at", null: false + t.index ["job_id"], name: "index_solid_queue_scheduled_executions_on_job_id", unique: true + t.index ["scheduled_at", "priority", "job_id"], name: "index_solid_queue_dispatch_all" + end + + create_table "solid_queue_semaphores", force: :cascade do |t| + t.string "key", null: false + t.integer "value", default: 1, null: false + t.datetime "expires_at", null: false + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["expires_at"], name: "index_solid_queue_semaphores_on_expires_at" + t.index ["key", "value"], name: "index_solid_queue_semaphores_on_key_and_value" + t.index ["key"], name: "index_solid_queue_semaphores_on_key", unique: true + end + + add_foreign_key "solid_queue_blocked_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade + add_foreign_key "solid_queue_claimed_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade + add_foreign_key "solid_queue_failed_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade + add_foreign_key "solid_queue_ready_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade + add_foreign_key "solid_queue_recurring_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade + add_foreign_key "solid_queue_scheduled_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade +end diff --git a/db/schema.rb b/db/schema.rb index 1d77aa345..327ed1af6 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -209,127 +209,6 @@ t.index ["user_id"], name: "index_reports_on_user_id" end - create_table "solid_queue_blocked_executions", force: :cascade do |t| - t.bigint "job_id", null: false - t.string "queue_name", null: false - t.integer "priority", default: 0, null: false - t.string "concurrency_key", null: false - t.datetime "expires_at", null: false - t.datetime "created_at", null: false - t.index ["concurrency_key", "priority", "job_id"], name: "index_solid_queue_blocked_executions_for_release" - t.index ["expires_at", "concurrency_key"], name: "index_solid_queue_blocked_executions_for_maintenance" - t.index ["job_id"], name: "index_solid_queue_blocked_executions_on_job_id", unique: true - end - - create_table "solid_queue_claimed_executions", force: :cascade do |t| - t.bigint "job_id", null: false - t.bigint "process_id" - t.datetime "created_at", null: false - t.index ["job_id"], name: "index_solid_queue_claimed_executions_on_job_id", unique: true - t.index ["process_id", "job_id"], name: "index_solid_queue_claimed_executions_on_process_id_and_job_id" - end - - create_table "solid_queue_failed_executions", force: :cascade do |t| - t.bigint "job_id", null: false - t.text "error" - t.datetime "created_at", null: false - t.index ["job_id"], name: "index_solid_queue_failed_executions_on_job_id", unique: true - end - - create_table "solid_queue_jobs", force: :cascade do |t| - t.string "queue_name", null: false - t.string "class_name", null: false - t.text "arguments" - t.integer "priority", default: 0, null: false - t.string "active_job_id" - t.datetime "scheduled_at" - t.datetime "finished_at" - t.string "concurrency_key" - t.datetime "created_at", null: false - t.datetime "updated_at", null: false - t.index ["active_job_id"], name: "index_solid_queue_jobs_on_active_job_id" - t.index ["class_name"], name: "index_solid_queue_jobs_on_class_name" - t.index ["finished_at"], name: "index_solid_queue_jobs_on_finished_at" - t.index ["queue_name", "finished_at"], name: "index_solid_queue_jobs_for_filtering" - t.index ["scheduled_at", "finished_at"], name: "index_solid_queue_jobs_for_alerting" - end - - create_table "solid_queue_pauses", force: :cascade do |t| - t.string "queue_name", null: false - t.datetime "created_at", null: false - t.index ["queue_name"], name: "index_solid_queue_pauses_on_queue_name", unique: true - end - - create_table "solid_queue_processes", force: :cascade do |t| - t.string "kind", null: false - t.datetime "last_heartbeat_at", null: false - t.bigint "supervisor_id" - t.integer "pid", null: false - t.string "hostname" - t.text "metadata" - t.datetime "created_at", null: false - t.string "name", null: false - t.index ["last_heartbeat_at"], name: "index_solid_queue_processes_on_last_heartbeat_at" - t.index ["name", "supervisor_id"], name: "index_solid_queue_processes_on_name_and_supervisor_id", unique: true - t.index ["supervisor_id"], name: "index_solid_queue_processes_on_supervisor_id" - end - - create_table "solid_queue_ready_executions", force: :cascade do |t| - t.bigint "job_id", null: false - t.string "queue_name", null: false - t.integer "priority", default: 0, null: false - t.datetime "created_at", null: false - t.index ["job_id"], name: "index_solid_queue_ready_executions_on_job_id", unique: true - t.index ["priority", "job_id"], name: "index_solid_queue_poll_all" - t.index ["queue_name", "priority", "job_id"], name: "index_solid_queue_poll_by_queue" - end - - create_table "solid_queue_recurring_executions", force: :cascade do |t| - t.bigint "job_id", null: false - t.string "task_key", null: false - t.datetime "run_at", null: false - t.datetime "created_at", null: false - t.index ["job_id"], name: "index_solid_queue_recurring_executions_on_job_id", unique: true - t.index ["task_key", "run_at"], name: "index_solid_queue_recurring_executions_on_task_key_and_run_at", unique: true - end - - create_table "solid_queue_recurring_tasks", force: :cascade do |t| - t.string "key", null: false - t.string "schedule", null: false - t.string "command", limit: 2048 - t.string "class_name" - t.text "arguments" - t.string "queue_name" - t.integer "priority", default: 0 - t.boolean "static", default: true, null: false - t.text "description" - t.datetime "created_at", null: false - t.datetime "updated_at", null: false - t.index ["key"], name: "index_solid_queue_recurring_tasks_on_key", unique: true - t.index ["static"], name: "index_solid_queue_recurring_tasks_on_static" - end - - create_table "solid_queue_scheduled_executions", force: :cascade do |t| - t.bigint "job_id", null: false - t.string "queue_name", null: false - t.integer "priority", default: 0, null: false - t.datetime "scheduled_at", null: false - t.datetime "created_at", null: false - t.index ["job_id"], name: "index_solid_queue_scheduled_executions_on_job_id", unique: true - t.index ["scheduled_at", "priority", "job_id"], name: "index_solid_queue_dispatch_all" - end - - create_table "solid_queue_semaphores", force: :cascade do |t| - t.string "key", null: false - t.integer "value", default: 1, null: false - t.datetime "expires_at", null: false - t.datetime "created_at", null: false - t.datetime "updated_at", null: false - t.index ["expires_at"], name: "index_solid_queue_semaphores_on_expires_at" - t.index ["key", "value"], name: "index_solid_queue_semaphores_on_key_and_value" - t.index ["key"], name: "index_solid_queue_semaphores_on_key", unique: true - end - create_table "taggings", id: :serial, force: :cascade do |t| t.integer "tag_id" t.string "taggable_type" @@ -495,12 +374,6 @@ add_foreign_key "ratings", "users" add_foreign_key "reports", "tasks" add_foreign_key "reports", "users" - add_foreign_key "solid_queue_blocked_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade - add_foreign_key "solid_queue_claimed_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade - add_foreign_key "solid_queue_failed_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade - add_foreign_key "solid_queue_ready_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade - add_foreign_key "solid_queue_recurring_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade - add_foreign_key "solid_queue_scheduled_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade add_foreign_key "task_contributions", "tasks" add_foreign_key "task_files", "task_files", column: "parent_id", on_delete: :nullify add_foreign_key "task_labels", "tasks" diff --git a/db/scripts/copy_data.rb b/db/scripts/copy_data.rb new file mode 100644 index 000000000..55761745e --- /dev/null +++ b/db/scripts/copy_data.rb @@ -0,0 +1,108 @@ +# frozen_string_literal: true + +module CopyData + BATCH_SIZE = 500 + + def copy_data(source_connection:, target_connection:, operation:, condition:) + tables = tables(connection: source_connection, operation:, condition:) + sequences = sequences(connection: source_connection, operation:, condition:) + views = views(connection: source_connection, operation:, condition:) + + source_connection.transaction do + target_connection.transaction do + copy_tables(tables, source_connection:, target_connection:) + copy_sequences(sequences, source_connection:, target_connection:) + copy_views(views, source_connection:, target_connection:) + end + end + end + + # Some tables or views might be referenced by other tables or views. + # Those need to be handled first when copying data or last when removing columns. + def foreign_key_targets + [] + end + + private + + def copy_tables(tables, source_connection:, target_connection:) + return unless tables.any? + + # Lock tables to prevent new data from being added + # As soon as the transaction is committed, the lock is released automatically + quoted_table_names = tables.map {|table| source_connection.quote_table_name(table) }.join(', ') + source_connection.execute("LOCK #{quoted_table_names} IN ACCESS EXCLUSIVE MODE") + + # Copy tables by fetching and inserting records in batches + tables.each do |table| + arel_table = Arel::Table.new(table) + offset = 0 + + loop do + select_manager = arel_table.project(Arel.star).take(BATCH_SIZE).skip(offset) + records = source_connection.execute(select_manager.to_sql) + break if records.ntuples.zero? + + insert_manager = Arel::InsertManager.new + insert_manager.into(arel_table) + insert_manager.columns.concat(records.fields.map {|field| arel_table[field] }) + insert_manager.values = insert_manager.create_values_list(records.values) + target_connection.execute(insert_manager.to_sql) + + offset += BATCH_SIZE + end + end + end + + def copy_sequences(sequences, source_connection:, target_connection:) + sequences.each do |sequence| + sequence_table = Arel::Table.new(sequence) + select_manager = sequence_table.project(sequence_table[:last_value]) + max_value = source_connection.execute(select_manager.to_sql).first['last_value'] + # Set sequence to the *next* value. + target_connection.execute("ALTER SEQUENCE #{target_connection.quote_table_name(sequence)} RESTART WITH #{max_value + 1}") + end + end + + def copy_views(views, source_connection:, target_connection:) + pg_views = Arel::Table.new('pg_views') + + views.each do |view| + select_manager = pg_views.project(pg_views[:definition]).where(pg_views[:schemaname].eq('public').and(pg_views[:viewname].eq(view))) + definition = source_connection.execute(select_manager.to_sql).first['definition'] + target_connection.execute("CREATE VIEW #{target_connection.quote_table_name(view)} AS #{definition}") + end + end + + def tables(connection:, operation: :matches, condition: '%') + tables = query_for(connection:, kind: :table, operation:, condition:) + tables.sort_by {|element| foreign_key_targets.index(element) || tables.size } + end + + def sequences(connection:, operation: :matches, condition: '%') + query_for(connection:, kind: :sequence, operation:, condition:) + end + + def views(connection:, operation: :matches, condition: '%') + query_for(connection:, kind: :view, operation:, condition:) + end + + # @param connection [ActiveRecord::ConnectionAdapters::PostgreSQLAdapter] + # @param kind [Symbol] :table, :sequence, or :view + # @param operation [Symbol] :matches, :does_not_match, :eq, :not_eq, :lt, :lteq, :gt, :gteq + # @param condition [String] The condition to match + # @return [Array] The names of the tables, sequences, or views that match the condition + def query_for(connection:, kind:, operation:, condition:) + table_name = :"information_schema.#{kind}s" + column_name = :"#{kind == :view ? :table : kind}_name" + schema_name = :"#{kind == :view ? :table : kind}_schema" + + information_schema = Arel::Table.new(table_name) + + query = information_schema + .project(information_schema[column_name]) + .where(information_schema[schema_name].eq('public').and(information_schema[column_name].public_send(operation, condition))) + + connection.execute(query.to_sql).pluck(column_name.to_s) + end +end diff --git a/db/seeds.rb b/db/seeds.rb index d2f636db5..4894984e4 100644 --- a/db/seeds.rb +++ b/db/seeds.rb @@ -8,6 +8,9 @@ # * development: Only needed for local development # +# Disable seeding if there are already users in the database +return if User.any? + ['all', Rails.env].each do |seed| seed_file = Rails.root.join("db/seeds/#{seed}.rb") if seed_file.exist? diff --git a/docs/LOCAL_SETUP.md b/docs/LOCAL_SETUP.md index 5ac404d7d..76ec0c645 100644 --- a/docs/LOCAL_SETUP.md +++ b/docs/LOCAL_SETUP.md @@ -1,6 +1,6 @@ # Local Setup CodeHarbor -CodeHarbor consists of a web application that is connected to a PostgreSQL database. The following document will guide you through the setup of CodeHarbor with all aforementioned components. +CodeHarbor consists of a web application that is connected to multiple PostgreSQL databases. The following document will guide you through the setup of CodeHarbor with all aforementioned components. We recommend using the **native setup** as described below. We also prepared a setup with Vagrant using a virtual machine as [described in this guide](./LOCAL_SETUP_VAGRANT.md). However, the Vagrant setup might be outdated and is not actively maintained (PRs are welcome though!) @@ -171,12 +171,12 @@ bundle install yarn install ``` -### Initialize the database +### Initialize the databases -The following command will create a database for the development and test environments, setup tables, and load seed data. +The following command will create the necessary databases for the development and test environments, setup tables, and load seed data. ```shell -rake db:setup +rake db:prepare ``` ### Start CodeHarbor diff --git a/docs/backup.md b/docs/backup.md index 818fbed54..817d86555 100644 --- a/docs/backup.md +++ b/docs/backup.md @@ -1,6 +1,6 @@ # Backups -CodeHarbor persists most data in the PostgreSQL database including tasks and the respective user files. Only files uploaded to ActiveStorage are located in the `storage` folder. +CodeHarbor persists most data in the PostgreSQL databases including tasks and the respective user files. Only files uploaded to ActiveStorage are located in the `storage` folder. ## Summary @@ -8,5 +8,5 @@ In order to back up CodeHarbor, you should consider these locations: - `config/*.yml` - The values of [environment variables](environment_variables.md) set for the web server -- PostgreSQL database as specified in `config/database.yml` +- PostgreSQL databases as specified in `config/database.yml` - `storage` and all sub-folders diff --git a/spec/db/seeds_spec.rb b/spec/db/seeds_spec.rb index 5b64746cd..bfba6a776 100644 --- a/spec/db/seeds_spec.rb +++ b/spec/db/seeds_spec.rb @@ -8,10 +8,10 @@ before do Rails.application.load_tasks if Rake::Task.tasks.empty? - # We need to migrate the test database before seeding + # We need to prepare the test database before seeding # Otherwise, Rails 7.1+ will throw an `NoMethodError`: `pending_migrations.any?` # See ActiveRecord gem, file `lib/active_record/railties/databases.rake` - Rake::Task['db:migrate'].invoke + Rake::Task['db:prepare'].invoke # We want to execute the seeds for the dev environment against the test database # rubocop:disable Rails/Inquiry