diff --git a/env.development b/env.development index b5314ffe..5b044f2f 100644 --- a/env.development +++ b/env.development @@ -1,6 +1,6 @@ SOLR_HOST="http://solr:8983" SOLR_PASSWORD=SolrRocks -SOLR_USERNAME=solr +SOLR_USER=solr SOLR_CONFIGURATION="authority_browse" SOLR_COLLECTION="authority_browse" MARIADB_ROOT_PASSWORD=password diff --git a/lib/authority_browse.rb b/lib/authority_browse.rb index 0850363e..39a270b7 100644 --- a/lib/authority_browse.rb +++ b/lib/authority_browse.rb @@ -5,6 +5,7 @@ require "logger" require "byebug" require "services" +require "concurrent" module AuthorityBrowse IS_JRUBY = (RUBY_ENGINE == "jruby") diff --git a/lib/authority_browse/db/names.rb b/lib/authority_browse/db/names.rb index 8dc81506..9187ac24 100644 --- a/lib/authority_browse/db/names.rb +++ b/lib/authority_browse/db/names.rb @@ -4,19 +4,17 @@ class Names < AuthorityBrowse::DB def self.database_definitions { names: proc do - String :id, primary_key: true + String :id String :label, text: true - String :match_text, text: true, index: true - Boolean :deprecated, default: false, index: true - Integer :count, default: 0, index: true + String :match_text, text: true + Boolean :deprecated, default: false + Integer :count, default: 0 end, names_see_also: proc do - primary_key :id - String :name_id, index: true - String :see_also_id, index: true + String :name_id + String :see_also_id end, names_from_biblio: proc do - primary_key :id String :term, text: true String :match_text, text: true, index: true Integer :count, default: 0 @@ -24,6 +22,20 @@ def self.database_definitions end } end + + def self.set_names_indexes! + AuthorityBrowse.db.alter_table(:names) do + add_index :id + add_index :match_text + add_index :deprecated + add_index :count + end + + AuthorityBrowse.db.alter_table(:names_see_also) do + add_index :name_id + add_index :see_also_id + end + end end end end diff --git a/lib/authority_browse/names.rb b/lib/authority_browse/names.rb index 20a9f71a..bb1900df 100644 --- a/lib/authority_browse/names.rb +++ b/lib/authority_browse/names.rb @@ -16,10 +16,9 @@ def reset_db(loc_file_getter = lambda { fetch_skos_file }) DB::Names.recreate_table!(:names) DB::Names.recreate_table!(:names_see_also) - milemarker = Milemarker.new(batch_size: 100_000, name: "adding to entries array", logger: Services.logger) - milemarker.log "Starting adding to entries array" + milemarker = Milemarker.new(batch_size: 100_000, name: "add names to db", logger: Services.logger) + milemarker.log "Start adding names to db" Zinzout.zin(skos_file).each_slice(100_000) do |slice| - # Zinzout.zin("./data/smaller.jsonld.gz").each_slice(100_000) do |slice| entries = slice.map do |line| AuthorityBrowse::LocAuthorities::Entry.new(JSON.parse(line)) end @@ -44,16 +43,33 @@ def reset_db(loc_file_getter = lambda { fetch_skos_file }) milemarker.log_final_line - DBMutator::Names.remove_deprecated_when_undeprecated_match_text_exists + S.logger.info "Start: set the indexes" + S.logger.measure_info("set the indexes") do + AuthorityBrowse::DB::Names.set_names_indexes! + end + S.logger.info "Start: remove deprecated when undeprecated match text exists" + S.logger.measure_info("removed deprecated terms with undprecated match text") do + DBMutator::Names.remove_deprecated_when_undeprecated_match_text_exists + end end # Fetches terms from Biblio, updates counts in :names, and adds loc ids to # :names_from_biblio def update + S.logger.info "Start Term fetcher" TermFetcher.new.run - DBMutator::Names.zero_out_counts - DBMutator::Names.update_names_with_counts - DBMutator::Names.add_ids_to_names_from_biblio + S.logger.info "Start: zeroing out counts" + S.logger.measure_info("Zeroed out counts") do + DBMutator::Names.zero_out_counts + end + S.logger.info "Start: update names with counts" + S.logger.measure_info("updated names with counts") do + DBMutator::Names.update_names_with_counts + end + S.logger.info "Start: add ids to names_from_biblio" + S.logger.measure_info("Updated ids in names_from_biblio") do + DBMutator::Names.add_ids_to_names_from_biblio + end end # Loads solr with documents of names that match data from library of diff --git a/spec/authority_browse/db/names_spec.rb b/spec/authority_browse/db/names_spec.rb index 2d954a5a..d9c69252 100644 --- a/spec/authority_browse/db/names_spec.rb +++ b/spec/authority_browse/db/names_spec.rb @@ -43,4 +43,13 @@ expect(AuthorityBrowse.db[:names_from_biblio].count).to eq(0) end end + context ".set_names_indexes!" do + it "sets the indexes on names and names_see_also" do + expect(AuthorityBrowse.db.indexes(:names)).to eq({}) + expect(AuthorityBrowse.db.indexes(:names_see_also)).to eq({}) + subject.set_names_indexes! + expect(AuthorityBrowse.db.indexes(:names)).not_to eq({}) + expect(AuthorityBrowse.db.indexes(:names_see_also)).not_to eq({}) + end + end end