Skip to content

Commit

Permalink
Merge pull request #269 from pulibrary/268-solr-no-title
Browse files Browse the repository at this point in the history
Handle transformation and solr indexing errors
  • Loading branch information
tpendragon authored Nov 26, 2024
2 parents f157ac1 + 8a0d5cf commit 3bfacc1
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ defmodule DpulCollections.IndexingPipeline.Figgy.HydrationCacheEntry do

%{
id: id,
title_txtm: get_in(metadata, ["title"]),
title_txtm: extract_title(metadata),
description_txtm: get_in(metadata, ["description"]),
years_is: extract_years(data),
display_date_s: format_date(metadata),
Expand All @@ -49,6 +49,8 @@ defmodule DpulCollections.IndexingPipeline.Figgy.HydrationCacheEntry do
extract_service_url(member_data[id])
end

defp extract_service_url(_id, _), do: nil

# Find the derivative FileMetadata
defp extract_service_url(%{
"internal_resource" => "FileSet",
Expand Down Expand Up @@ -82,7 +84,11 @@ defmodule DpulCollections.IndexingPipeline.Figgy.HydrationCacheEntry do

defp extract_service_url(nil), do: nil

defp extract_service_url(_id, _), do: nil
def extract_title(%{"title" => []}) do
["[Missing Title]"]
end

def extract_title(%{"title" => title}), do: title

defp is_derivative(%{
"mime_type" => ["image/tiff"],
Expand Down
38 changes: 32 additions & 6 deletions lib/dpul_collections/solr.ex
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
defmodule DpulCollections.Solr do
require Logger

@spec document_count(String.t()) :: integer()
def document_count(collection \\ read_collection()) do
{:ok, response} =
Expand Down Expand Up @@ -95,12 +97,36 @@ defmodule DpulCollections.Solr do
end
end

@spec add(list(map()), String.t()) :: {:ok, Req.Response.t()} | {:error, Exception.t()}
def add(docs, collection \\ read_collection()) do
Req.post(
update_url(collection),
json: docs
)
@spec add(list(map()) | String.t(), String.t()) ::
{:ok, Req.Response.t()} | {:error, Exception.t()}
def add(docs, collection \\ read_collection())

def add(docs, collection) when is_list(docs) do
response =
Req.post!(
update_url(collection),
json: docs
)

if response.status != 200 do
docs |> Enum.each(&add/1)
end

response
end

def add(doc, collection) when not is_list(doc) do
response =
Req.post!(
update_url(collection),
json: [doc]
)

if response.status != 200 do
Logger.warning("error indexing solr document with id: #{doc["id"]} #{response.body}")
end

response
end

@spec commit(String.t()) :: {:ok, Req.Response.t()} | {:error, Exception.t()}
Expand Down
2 changes: 1 addition & 1 deletion test/dpul_collections/indexing_pipeline/coherence_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ defmodule DpulCollections.IndexingPipeline.CoherenceTest do
end

test "index_parity?/0 is true when the new index and the old index have equal freshness" do
{marker1, marker2, _marker3} = FiggyTestFixtures.hydration_cache_markers(1)
{marker1, _marker2, _marker3} = FiggyTestFixtures.hydration_cache_markers(1)
FiggyTestFixtures.hydration_cache_markers(2)

IndexingPipeline.write_processor_marker(%{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,5 +282,24 @@ defmodule DpulCollections.IndexingPipeline.Figgy.HydrationCacheEntryTest do
assert capture_log(fn -> HydrationCacheEntry.to_solr_document(entry) end) =~
"couldn't parse date"
end

test "an empty solr document is returned with a empty title field" do
{:ok, entry} =
IndexingPipeline.write_hydration_cache_entry(%{
cache_version: 0,
record_id: "f134f41f-63c5-4fdf-b801-0774e3bc3b2d",
source_cache_order: ~U[2018-03-09 20:19:36.465203Z],
data: %{
"id" => "f134f41f-63c5-4fdf-b801-0774e3bc3b2d",
"internal_resource" => "EphemeraFolder",
"metadata" => %{
"title" => [],
"date_created" => ["2022"]
}
}
})

assert %{title_txtm: ["[Missing Title]"]} = HydrationCacheEntry.to_solr_document(entry)
end
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,12 @@ defmodule DpulCollections.IndexingPipeline.Figgy.IndexingIntegrationTest do
{marker1, _marker2, _marker3} = FiggyTestFixtures.transformation_cache_markers()

Solr.add(
%{
"id" => marker1.id,
"title" => ["old title"]
},
[
%{
"id" => marker1.id,
"title_txtm" => ["old title"]
}
],
active_collection()
)

Expand Down
2 changes: 1 addition & 1 deletion test/dpul_collections/indexing_pipeline_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ defmodule DpulCollections.IndexingPipelineTest do

test "delete_cache_version/1 deletes entries for that cache version from each table" do
for cache_version <- [0, 1] do
{marker1, _marker2, _marker3} = FiggyTestFixtures.hydration_cache_markers(cache_version)
FiggyTestFixtures.hydration_cache_markers(cache_version)

{marker1, _marker2, _marker3} =
FiggyTestFixtures.transformation_cache_markers(cache_version)
Expand Down
29 changes: 29 additions & 0 deletions test/dpul_collections/solr_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ defmodule DpulCollections.SolrTest do
use DpulCollections.DataCase
alias DpulCollections.Solr
import SolrTestSupport
import ExUnit.CaptureLog

setup do
Solr.delete_all(active_collection())
Expand Down Expand Up @@ -201,4 +202,32 @@ defmodule DpulCollections.SolrTest do
assert Solr.find_by_id("3cb7627b-defc-401b-9959-42ebc4488f74")["slug_s"] ==
"él-no-responde-mis-mensajes"
end

test "an exception is logged when indexing a document raises a solr error" do
doc = %{
# No title
"id" => "3cb7627b-defc-401b-9959-42ebc4488f74"
}

assert capture_log(fn -> Solr.add([doc], active_collection()) end) =~
"error indexing solr document"
end

test "a valid solr document is indexed when in the same batch as an invalid solr document" do
valid_doc = %{
"id" => "e0602353-4429-4405-b080-064952f9b267",
"title_txtm" => ["test title 1"]
}

invalid_doc = %{
# No title
"id" => "3cb7627b-defc-401b-9959-42ebc4488f74"
}

assert capture_log(fn -> Solr.add([valid_doc, invalid_doc], active_collection()) end) =~
"error indexing solr document"

Solr.commit(active_collection())
assert Solr.find_by_id(valid_doc["id"])["id"] == valid_doc["id"]
end
end

0 comments on commit 3bfacc1

Please sign in to comment.