diff --git a/Gemfile.lock b/Gemfile.lock index c53e706..b1a4f6b 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -312,6 +312,7 @@ PLATFORMS arm64-darwin-22 arm64-darwin-23 x86_64-darwin-22 + x86_64-darwin-23 x86_64-linux DEPENDENCIES diff --git a/app/models/art_museum_document.rb b/app/models/art_museum_document.rb index 87bad59..fd96501 100644 --- a/app/models/art_museum_document.rb +++ b/app/models/art_museum_document.rb @@ -61,4 +61,8 @@ def object_number def date document.dig(:_source, :displaydate) end + + def do_not_sanitize_these_fields + super + [:primary_image] + end end diff --git a/app/models/article_document.rb b/app/models/article_document.rb index 5b605ab..9bd6683 100644 --- a/app/models/article_document.rb +++ b/app/models/article_document.rb @@ -63,6 +63,6 @@ def publication_date end def publication_year - document.publication_date.year + document.publication_date.year.to_s end end diff --git a/app/models/catalog_document.rb b/app/models/catalog_document.rb index b10e8e5..f314c09 100644 --- a/app/models/catalog_document.rb +++ b/app/models/catalog_document.rb @@ -66,4 +66,8 @@ def electronic_access electronic_access_string.present? ? JSON.parse(electronic_access_string) : {} end end + + def do_not_sanitize_these_fields + super + [:resource_url] + end end diff --git a/app/models/document.rb b/app/models/document.rb index 3efd663..f473993 100644 --- a/app/models/document.rb +++ b/app/models/document.rb @@ -12,7 +12,7 @@ def initialize(document:, doc_keys:) def to_h doc_hash = {} @doc_keys.each do |key| - val = send(key) + val = get_value key doc_hash[key] = val if val end doc_hash @@ -31,10 +31,23 @@ def sanitize(text) end def other_fields - doc_keys&.index_with { |key| send(key) } + doc_keys&.index_with { |key| get_value key } &.compact &.transform_values(&:to_s) end + def get_value(key) + value = send key + sanitize_field?(key) ? sanitize(value) : value + end + + def sanitize_field?(key) + do_not_sanitize_these_fields.exclude? key + end + + def do_not_sanitize_these_fields + [:url, :other_fields] + end + attr_reader :document end diff --git a/app/models/library_staff_document.rb b/app/models/library_staff_document.rb index aa73a2c..3cd3c6c 100644 --- a/app/models/library_staff_document.rb +++ b/app/models/library_staff_document.rb @@ -50,8 +50,8 @@ def library_title document.library_title end - def section - document.section + def team + document.team end def division @@ -89,7 +89,7 @@ def url end def doc_keys - [:first_name, :middle_name, :last_name, :netid, :library_title, :phone, :email, :section, :division, :department, + [:first_name, :middle_name, :last_name, :netid, :library_title, :phone, :email, :team, :division, :department, :unit, :office, :building] end end diff --git a/app/models/library_staff_record.rb b/app/models/library_staff_record.rb index ed82622..20f4b73 100644 --- a/app/models/library_staff_record.rb +++ b/app/models/library_staff_record.rb @@ -34,6 +34,9 @@ def self.new_from_csv(row) record.building = row[8] record.department = row[9] record.unit = row[11] + record.areas_of_study = row[14]&.gsub('//', ', ') + record.my_scheduler_link = row[18] + record.other_entities = row[19]&.gsub('//', ', ') record.library_title = title record.title = title record.save! if record.valid? diff --git a/app/models/sanitizer.rb b/app/models/sanitizer.rb index e46a3a5..348d36f 100644 --- a/app/models/sanitizer.rb +++ b/app/models/sanitizer.rb @@ -1,13 +1,14 @@ # frozen_string_literal: true require 'rails-html-sanitizer' +require 'cgi' class Sanitizer < Rails::HTML5::SafeListSanitizer def sanitize(html, options = {}) # Add spaces before opening HTML tags, so that words don't run together # after the tags are removed - with_spaces = html.gsub(/(\S)(<\w)/, '\1 \2') - sanitized = super(with_spaces, options) - sanitized.gsub(' ', ' ').strip + with_spaces = html.to_s.gsub(/(\S)(<\w)/, '\1 \2') + sanitized = super(with_spaces, options).gsub(' ', ' ').strip + CGI.unescapeHTML sanitized end end diff --git a/app/services/library_staff_loading_service.rb b/app/services/library_staff_loading_service.rb index e9e9f36..c6f3b50 100644 --- a/app/services/library_staff_loading_service.rb +++ b/app/services/library_staff_loading_service.rb @@ -12,7 +12,7 @@ def class_to_load def expected_headers %w[puid netid phone name lastName firstName email address building department division - unit team title areasOfStudy websiteUrl bios expertise mySchedulerLink] + unit team title areasOfStudy websiteUrl bios expertise mySchedulerLink otherEntities] end def uri diff --git a/db/migrate/20240716210532_add_staff_records_fields.rb b/db/migrate/20240716210532_add_staff_records_fields.rb new file mode 100644 index 0000000..79d6ada --- /dev/null +++ b/db/migrate/20240716210532_add_staff_records_fields.rb @@ -0,0 +1,20 @@ +class AddStaffRecordsFields < ActiveRecord::Migration[7.1] + + def up + change_table :library_staff_records do |t| + t.column :areas_of_study, :string + t.column :other_entities, :string + t.column :my_scheduler_link, :string + t.rename :section, :team + end + end + def down + change_table :library_staff_records do |t| + t.remove :areas_of_study + t.remove :other_entities + t.remove :my_scheduler_link + t.rename :team, :section + end + end + +end diff --git a/db/migrate/20240716214838_update_searchable_library_staff_records.rb b/db/migrate/20240716214838_update_searchable_library_staff_records.rb new file mode 100644 index 0000000..0810519 --- /dev/null +++ b/db/migrate/20240716214838_update_searchable_library_staff_records.rb @@ -0,0 +1,44 @@ +class UpdateSearchableLibraryStaffRecords < ActiveRecord::Migration[7.1] + def up + change_table :library_staff_records do |t| + t.remove :searchable + t.virtual :searchable, type: :tsvector, + as: "to_tsvector('english', coalesce(title, '') || ' ' || " \ + "coalesce(first_name, '') || ' ' || " \ + "coalesce(middle_name, '') || ' ' || " \ + "coalesce(last_name, '') || ' ' || " \ + "coalesce(title, '') || ' ' || " \ + "coalesce(email, '') || ' ' || " \ + "coalesce(department, '') || ' ' || " \ + "coalesce(office, '') || ' ' || " \ + "coalesce(building, '') || ' ' || " \ + "coalesce(team, '') || ' ' || " \ + "coalesce(division, '') || ' ' || " \ + "coalesce(unit, '') || ' ' ||" \ + "coalesce(areas_of_study, '') || ' ' || " \ + "coalesce(other_entities, ''))", + stored: true + t.index ["searchable"], name: "staff_search_idx", using: :gin + end + end + + def down + change_table :library_staff_records do |t| + t.remove :searchable + t.virtual :searchable, type: :tsvector, + as: "to_tsvector('english', coalesce(title, '') || ' ' || " \ + "coalesce(first_name, '') || ' ' || " \ + "coalesce(middle_name, '') || ' ' || " \ + "coalesce(last_name, '') || ' ' || " \ + "coalesce(title, '') || ' ' || " \ + "coalesce(email, '') || ' ' || " \ + "coalesce(department, '') || ' ' || " \ + "coalesce(office, '') || ' ' || " \ + "coalesce(building, '') || ' ' || " \ + "coalesce(team, '') || ' ' || " \ + "coalesce(division, '') || ' ' || " \ + "coalesce(unit, ''))" , + stored: true + end + end +end diff --git a/db/schema.rb b/db/schema.rb index 22c37f8..6e2024d 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -77,7 +77,7 @@ t.string "title", null: false t.string "library_title", null: false t.string "email", null: false - t.string "section" + t.string "team" t.string "division" t.string "department" t.string "unit" @@ -85,7 +85,10 @@ t.string "building" t.datetime "created_at", null: false t.datetime "updated_at", null: false - t.virtual "searchable", type: :tsvector, as: "to_tsvector('english'::regconfig, (((((((((((((((((((((((COALESCE(title, ''::character varying))::text || ' '::text) || (COALESCE(first_name, ''::character varying))::text) || ' '::text) || (COALESCE(middle_name, ''::character varying))::text) || ' '::text) || (COALESCE(last_name, ''::character varying))::text) || ' '::text) || (COALESCE(title, ''::character varying))::text) || ' '::text) || (COALESCE(email, ''::character varying))::text) || ' '::text) || (COALESCE(department, ''::character varying))::text) || ' '::text) || (COALESCE(office, ''::character varying))::text) || ' '::text) || (COALESCE(building, ''::character varying))::text) || ' '::text) || (COALESCE(section, ''::character varying))::text) || ' '::text) || (COALESCE(division, ''::character varying))::text) || ' '::text) || (COALESCE(unit, ''::character varying))::text))", stored: true + t.string "areas_of_study" + t.string "other_entities" + t.string "my_scheduler_link" + t.virtual "searchable", type: :tsvector, as: "to_tsvector('english'::regconfig, (((((((((((((((((((((((((((COALESCE(title, ''::character varying))::text || ' '::text) || (COALESCE(first_name, ''::character varying))::text) || ' '::text) || (COALESCE(middle_name, ''::character varying))::text) || ' '::text) || (COALESCE(last_name, ''::character varying))::text) || ' '::text) || (COALESCE(title, ''::character varying))::text) || ' '::text) || (COALESCE(email, ''::character varying))::text) || ' '::text) || (COALESCE(department, ''::character varying))::text) || ' '::text) || (COALESCE(office, ''::character varying))::text) || ' '::text) || (COALESCE(building, ''::character varying))::text) || ' '::text) || (COALESCE(team, ''::character varying))::text) || ' '::text) || (COALESCE(division, ''::character varying))::text) || ' '::text) || (COALESCE(unit, ''::character varying))::text) || ' '::text) || (COALESCE(areas_of_study, ''::character varying))::text) || ' '::text) || (COALESCE(other_entities, ''::character varying))::text))", stored: true t.index ["searchable"], name: "staff_search_idx", using: :gin end diff --git a/spec/fixtures/files/art_museum/cats.json b/spec/fixtures/files/art_museum/cats.json index ed94830..0830d9f 100644 --- a/spec/fixtures/files/art_museum/cats.json +++ b/spec/fixtures/files/art_museum/cats.json @@ -21,7 +21,7 @@ "creditline": "Bequest of Dan Fellows Platt, Class of 1895", "displaydate": "1900", "primaryimage": [ - "https://puam-loris.aws.princeton.edu/loris/INV34694.jp2" + "https://puam-loris.aws.princeton.edu/loris/INV34694.jp2?a=b&c=d" ], "medium": "Graphite", "displaytitle": "Two cats", diff --git a/spec/fixtures/files/library_staff/staff-directory-blank-lines.csv b/spec/fixtures/files/library_staff/staff-directory-blank-lines.csv index 9aa84c6..e755390 100644 --- a/spec/fixtures/files/library_staff/staff-directory-blank-lines.csv +++ b/spec/fixtures/files/library_staff/staff-directory-blank-lines.csv @@ -1,4 +1,4 @@ -puid,netid,phone,name,lastName,firstName,email,address,building,department,division,unit,team,title,areasOfStudy,websiteUrl,bios,expertise,mySchedulerLink -,,,,,,,,,,,,,,,,,, -"000000001","lucyfs","(555) 123-1234","Stardust, Lucy","Stardust","Lucy Fae",lucyfs@princeton.edu,Forrestal,Recap Library,Office of the Deputy Dean of Libraries,Facilities,,,"Pest Removal Specialist",,,,, -,,,,,,,,,,,,,,,,,, +puid,netid,phone,name,lastName,firstName,email,address,building,department,division,unit,team,title,areasOfStudy,websiteUrl,bios,expertise,mySchedulerLink,otherEntities +,,,,,,,,,,,,,,,,,,, +"000000001","lucyfs","(555) 123-1234","Stardust, Lucy","Stardust","Lucy Fae",lucyfs@princeton.edu,Forrestal,Recap Library,Office of the Deputy Dean of Libraries,Facilities,,,"Pest Removal Specialist",,,,,, +,,,,,,,,,,,,,,,,,,, diff --git a/spec/fixtures/files/library_staff/staff-directory.csv b/spec/fixtures/files/library_staff/staff-directory.csv index b273fab..5baec64 100644 --- a/spec/fixtures/files/library_staff/staff-directory.csv +++ b/spec/fixtures/files/library_staff/staff-directory.csv @@ -1,5 +1,5 @@ -"puid","netid","phone","name","lastName","firstName",email,address,building,department,division,unit,team,"title","areasOfStudy","websiteUrl","bios","expertise","mySchedulerLink" -"000000001","lucyfs","(555) 123-1234","Stardust, Lucy","Stardust","Lucy Fae",lucyfs@princeton.edu,Forrestal,Recap Library,Office of the Deputy Dean of Libraries,Facilities,,,"Pest Removal Specialist",,,,, -"000000002","nimbuskt","(555) 111-1111","Trout, Nimbus","Trout","Nimbus Kilgore",nibmus@princeton.edu,A-200,Firestone Library,Office of the Deputy Dean of Libraries,Information Technology,IT Operations and Digitization,,"Nap Coordinator","Naps//Coordination",,,, -"000000003","tiberius","(555) 222-2222","Adams, Tiberius","Adams","Spot Tiberius",tiberius@princeton.edu,B-300,Firestone Library,My Department,Library - Collections and Access Services,Access & Fulfillment Services,,"Lead Hairball Engineer",,,,, -"000000010","brutus","(555) 222-2222","Cat, Brutus","Cat","Brutus The",brutus@princeton.edu,B-300,Stokes Library,My Department,Library - Collections and Access Services,PCRP - Physical Collections Receipt & Processing Unit,,"Fluffiest cat",,,,, +"puid","netid","phone","name","lastName","firstName",email,address,building,department,division,unit,team,"title","areasOfStudy","websiteUrl","bios","expertise","mySchedulerLink","otherEntities" +"000000001","lucyfs","(555) 123-1234","Stardust, Lucy","Stardust","Lucy Fae",lucyfs@princeton.edu,Forrestal,Recap Library,Office of the Deputy Dean of Libraries,Facilities,,,"Pest Removal Specialist",,,,,,"MS Chadha Center for Global India" +"000000002","nimbuskt","(555) 111-1111","Trout, Nimbus","Trout","Nimbus Kilgore",nibmus@princeton.edu,A-200,Firestone Library,Office of the Deputy Dean of Libraries,Information Technology,IT Operations and Digitization,,"Nap Coordinator","Naps//Coordination",,,,,"Center for Culture, Society and Religion//University Center for Human Values" +"000000003","tiberius","(555) 222-2222","Adams, Tiberius","Adams","Spot Tiberius",tiberius@princeton.edu,B-300,Firestone Library,My Department,Library - Collections and Access Services,Access & Fulfillment Services,,"Lead Hairball Engineer",,,,,,"Center for International Security Studies (CISS)//Center for the Study of Democratic Politics (CSDP)//Empirical Studies of Conflict (ESOC)//Innovations for Successful Societies (ISS)//Liechtenstein Institute on Self-Determination (LISD)//Niehaus Center for lization and Governance (NCGG)//Princeton Survey Research Center (SRC)//Research Program in Political Economy (RPPE)" +"000000010","brutus","(555) 222-2222","Cat, Brutus","Cat","Brutus The",brutus@princeton.edu,B-300,Stokes Library,My Department,Library - Collections and Access Services,PCRP - Physical Collections Receipt & Processing Unit,,"Fluffiest cat",,,,,, diff --git a/spec/models/article_document_spec.rb b/spec/models/article_document_spec.rb index 13db42d..1b801ae 100644 --- a/spec/models/article_document_spec.rb +++ b/spec/models/article_document_spec.rb @@ -22,7 +22,7 @@ expect(article_document.description).to include('In 1994, the Government of Cameroon') expect(article_document.url).to include('princeton.summon.serialssolutions.com/2.0.0/link/0/') expect(article_document.publication_date).to eq('2009') - expect(article_document.publication_year).to eq(2009) + expect(article_document.publication_year).to eq('2009') expect(article_document.fulltext_available).to be('Full-text available') expect(article_document.abstract).to include('In 1994, the Government of Cameroon') expect(article_document.isxn).to eq('9780821378786') @@ -39,7 +39,7 @@ expect(article_document.title).to eq('Potato') expect(article_document.publication_title).to eq('Plants (Basel)') expect(article_document.publication_date).to eq('20221001') - expect(article_document.publication_year).to eq(2022) + expect(article_document.publication_year).to eq('2022') expect(article_document.volume).to eq('11') expect(article_document.issue).to eq('20') expect(article_document.type).to eq('Journal Article') diff --git a/spec/models/library_staff_record_spec.rb b/spec/models/library_staff_record_spec.rb index 3d0e9b2..c6f2ac2 100644 --- a/spec/models/library_staff_record_spec.rb +++ b/spec/models/library_staff_record_spec.rb @@ -30,5 +30,11 @@ expect(results[0].first_name).to eq('Nimbus Kilgore') expect(results[1].first_name).to eq('Spot Tiberius') end + + it 'finds records by other entities' do + results = described_class.query('Center for Global') + expect(results.length).to eq(1) + expect(results[0].other_entities).to eq('MS Chadha Center for Global India') + end end end diff --git a/spec/models/sanitizer_spec.rb b/spec/models/sanitizer_spec.rb index e4ad5b4..d19c12a 100644 --- a/spec/models/sanitizer_spec.rb +++ b/spec/models/sanitizer_spec.rb @@ -40,5 +40,11 @@ expect(sanitizer.sanitize(test_string, scrubber: TextScrubber.new)).to eq(expected) end + + it 'repeats the & character verbatim' do + test_string = 'Cataloging & Classification Quarterly' + + expect(sanitizer.sanitize(test_string, scrubber: TextScrubber.new)).to eq(test_string) + end end end diff --git a/spec/requests/art_museum_spec.rb b/spec/requests/art_museum_spec.rb index fe58087..70ddf1a 100644 --- a/spec/requests/art_museum_spec.rb +++ b/spec/requests/art_museum_spec.rb @@ -29,7 +29,7 @@ credit_line: 'Bequest of Dan Fellows Platt, Class of 1895', medium: 'Graphite', dimensions: '25.1 × 26.2 cm. (9 7/8 × 10 5/16 in.)', - primary_image: 'https://puam-loris.aws.princeton.edu/loris/INV34694.jp2', + primary_image: 'https://puam-loris.aws.princeton.edu/loris/INV34694.jp2?a=b&c=d', object_number: 'x1948-1210', date: '1900' } } diff --git a/spec/requests/library_database_spec.rb b/spec/requests/library_database_spec.rb index 509418b..abc6995 100644 --- a/spec/requests/library_database_spec.rb +++ b/spec/requests/library_database_spec.rb @@ -10,7 +10,7 @@ number: 3, records: [ title: 'Oxford Music Online', - id: 2_939_886, + id: '2939886', type: 'Database', description: 'Biographical articles for composers, performers, librettists, conductors and others. ' \ 'Includes entries from Grove dictionaries of jazz and opera as well.', @@ -57,7 +57,6 @@ it 'matches the expected first record' do get '/search/database?query=oxford music' response_body = JSON.parse(response.body, symbolize_names: true) - expect(response_body[:records][0].keys).to match_array(expected_record_keys) expected_record_keys.each do |key| expect(response_body[:records][0][key]).to match(expected_response[:records].first[key]) diff --git a/spec/requests/library_staff_spec.rb b/spec/requests/library_staff_spec.rb index 452c148..54b3ea0 100644 --- a/spec/requests/library_staff_spec.rb +++ b/spec/requests/library_staff_spec.rb @@ -10,7 +10,7 @@ number: 3, records: [ title: 'Trout, Nimbus', - id: 00_0000_002, + id: '2', type: 'Library Staff', url: 'https://library.psb-prod.princeton.edu/people/nimbus-kilgore-trout', other_fields: { diff --git a/spec/requests/pulmap_spec.rb b/spec/requests/pulmap_spec.rb index a4a6cdc..1368e4c 100644 --- a/spec/requests/pulmap_spec.rb +++ b/spec/requests/pulmap_spec.rb @@ -25,7 +25,7 @@ records: [ { title: 'South America : wall-atlas', creator: 'Guyot, A. (Arnold), 1807-1884', - description: "\"Card series.\" Relief shown by hachures and form lines. \"Entered according to Act of Congress in the year 1865 by Charles Scribner & Co. ...\" Inset: Profiles from west to east. Wall map. Scribner, Armstrong, & Co. flourished ca. 1871-1879. cf. Tooley's dictionary of mapmakers.", + description: "\"Card series.\" Relief shown by hachures and form lines. \"Entered according to Act of Congress in the year 1865 by Charles Scribner & Co. ...\" Inset: Profiles from west to east. Wall map. Scribner, Armstrong, & Co. flourished ca. 1871-1879. cf. Tooley's dictionary of mapmakers.", publisher: 'New York : Published by Scribner, Armstrong & Co. ... [between 1871 and 1879].', id: 'princeton-6682x6396', type: 'TIFF',