Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle transformation and solr indexing errors #269

Merged
merged 4 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ defmodule DpulCollections.IndexingPipeline.Figgy.HydrationCacheEntry do

%{
id: id,
title_txtm: get_in(metadata, ["title"]),
title_txtm: extract_title(metadata),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like that we're starting to get to a pattern here.

description_txtm: get_in(metadata, ["description"]),
years_is: extract_years(data),
display_date_s: format_date(metadata),
Expand All @@ -49,6 +49,8 @@ defmodule DpulCollections.IndexingPipeline.Figgy.HydrationCacheEntry do
extract_service_url(member_data[id])
end

defp extract_service_url(_id, _), do: nil

# Find the derivative FileMetadata
defp extract_service_url(%{
"internal_resource" => "FileSet",
Expand Down Expand Up @@ -82,7 +84,11 @@ defmodule DpulCollections.IndexingPipeline.Figgy.HydrationCacheEntry do

defp extract_service_url(nil), do: nil

defp extract_service_url(_id, _), do: nil
def extract_title(%{"title" => []}) do
["[Missing Title]"]
end

def extract_title(%{"title" => title}), do: title

defp is_derivative(%{
"mime_type" => ["image/tiff"],
Expand Down
38 changes: 32 additions & 6 deletions lib/dpul_collections/solr.ex
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
defmodule DpulCollections.Solr do
require Logger

@spec document_count(String.t()) :: integer()
def document_count(collection \\ read_collection()) do
{:ok, response} =
Expand Down Expand Up @@ -95,12 +97,36 @@ defmodule DpulCollections.Solr do
end
end

@spec add(list(map()), String.t()) :: {:ok, Req.Response.t()} | {:error, Exception.t()}
def add(docs, collection \\ read_collection()) do
Req.post(
update_url(collection),
json: docs
)
@spec add(list(map()) | String.t(), String.t()) ::
{:ok, Req.Response.t()} | {:error, Exception.t()}
def add(docs, collection \\ read_collection())

def add(docs, collection) when is_list(docs) do
response =
Req.post!(
update_url(collection),
json: docs
)

if response.status != 200 do
docs |> Enum.each(&add/1)
end

response
end

def add(doc, collection) when not is_list(doc) do
response =
Req.post!(
update_url(collection),
json: [doc]
)

if response.status != 200 do
Logger.warning("error indexing solr document with id: #{doc["id"]} #{response.body}")
end

response
end

@spec commit(String.t()) :: {:ok, Req.Response.t()} | {:error, Exception.t()}
Expand Down
2 changes: 1 addition & 1 deletion test/dpul_collections/indexing_pipeline/coherence_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ defmodule DpulCollections.IndexingPipeline.CoherenceTest do
end

test "index_parity?/0 is true when the new index and the old index have equal freshness" do
{marker1, marker2, _marker3} = FiggyTestFixtures.hydration_cache_markers(1)
{marker1, _marker2, _marker3} = FiggyTestFixtures.hydration_cache_markers(1)
FiggyTestFixtures.hydration_cache_markers(2)

IndexingPipeline.write_processor_marker(%{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,5 +282,24 @@ defmodule DpulCollections.IndexingPipeline.Figgy.HydrationCacheEntryTest do
assert capture_log(fn -> HydrationCacheEntry.to_solr_document(entry) end) =~
"couldn't parse date"
end

test "an empty solr document is returned with a empty title field" do
{:ok, entry} =
IndexingPipeline.write_hydration_cache_entry(%{
cache_version: 0,
record_id: "f134f41f-63c5-4fdf-b801-0774e3bc3b2d",
source_cache_order: ~U[2018-03-09 20:19:36.465203Z],
data: %{
"id" => "f134f41f-63c5-4fdf-b801-0774e3bc3b2d",
"internal_resource" => "EphemeraFolder",
"metadata" => %{
"title" => [],
"date_created" => ["2022"]
}
}
})

assert %{title_txtm: ["[Missing Title]"]} = HydrationCacheEntry.to_solr_document(entry)
end
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,12 @@ defmodule DpulCollections.IndexingPipeline.Figgy.IndexingIntegrationTest do
{marker1, _marker2, _marker3} = FiggyTestFixtures.transformation_cache_markers()

Solr.add(
%{
"id" => marker1.id,
"title" => ["old title"]
},
[
%{
"id" => marker1.id,
"title_txtm" => ["old title"]
}
],
active_collection()
)

Expand Down
2 changes: 1 addition & 1 deletion test/dpul_collections/indexing_pipeline_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ defmodule DpulCollections.IndexingPipelineTest do

test "delete_cache_version/1 deletes entries for that cache version from each table" do
for cache_version <- [0, 1] do
{marker1, _marker2, _marker3} = FiggyTestFixtures.hydration_cache_markers(cache_version)
FiggyTestFixtures.hydration_cache_markers(cache_version)

{marker1, _marker2, _marker3} =
FiggyTestFixtures.transformation_cache_markers(cache_version)
Expand Down
29 changes: 29 additions & 0 deletions test/dpul_collections/solr_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ defmodule DpulCollections.SolrTest do
use DpulCollections.DataCase
alias DpulCollections.Solr
import SolrTestSupport
import ExUnit.CaptureLog

setup do
Solr.delete_all(active_collection())
Expand Down Expand Up @@ -201,4 +202,32 @@ defmodule DpulCollections.SolrTest do
assert Solr.find_by_id("3cb7627b-defc-401b-9959-42ebc4488f74")["slug_s"] ==
"él-no-responde-mis-mensajes"
end

test "an exception is logged when indexing a document raises a solr error" do
doc = %{
# No title
"id" => "3cb7627b-defc-401b-9959-42ebc4488f74"
}

assert capture_log(fn -> Solr.add([doc], active_collection()) end) =~
"error indexing solr document"
end

test "a valid solr document is indexed when in the same batch as an invalid solr document" do
valid_doc = %{
"id" => "e0602353-4429-4405-b080-064952f9b267",
"title_txtm" => ["test title 1"]
}

invalid_doc = %{
# No title
"id" => "3cb7627b-defc-401b-9959-42ebc4488f74"
}

assert capture_log(fn -> Solr.add([valid_doc, invalid_doc], active_collection()) end) =~
"error indexing solr document"

Solr.commit(active_collection())
assert Solr.find_by_id(valid_doc["id"])["id"] == valid_doc["id"]
end
end