From f28c8cabc797e01585704e8c5ee077e7da2ce7a6 Mon Sep 17 00:00:00 2001 From: Eliot Jordan Date: Mon, 25 Nov 2024 12:34:05 -0600 Subject: [PATCH] Assign default when title is missing --- .../figgy/hydration_cache_entry.ex | 10 ++++++++-- lib/dpul_collections/solr.ex | 8 +++++++- .../figgy/hydration_cache_entry_test.exs | 19 +++++++++++++++++++ .../figgy/indexing_integration_test.exs | 16 ++++++++++++++++ test/dpul_collections/solr_test.exs | 2 -- 5 files changed, 50 insertions(+), 5 deletions(-) diff --git a/lib/dpul_collections/indexing_pipeline/figgy/hydration_cache_entry.ex b/lib/dpul_collections/indexing_pipeline/figgy/hydration_cache_entry.ex index 8628c88e..6b211663 100644 --- a/lib/dpul_collections/indexing_pipeline/figgy/hydration_cache_entry.ex +++ b/lib/dpul_collections/indexing_pipeline/figgy/hydration_cache_entry.ex @@ -27,7 +27,7 @@ defmodule DpulCollections.IndexingPipeline.Figgy.HydrationCacheEntry do %{ id: id, - title_txtm: get_in(metadata, ["title"]), + title_txtm: extract_title(metadata), description_txtm: get_in(metadata, ["description"]), years_is: extract_years(data), display_date_s: format_date(metadata), @@ -49,6 +49,8 @@ defmodule DpulCollections.IndexingPipeline.Figgy.HydrationCacheEntry do extract_service_url(member_data[id]) end + defp extract_service_url(_id, _), do: nil + # Find the derivative FileMetadata defp extract_service_url(%{ "internal_resource" => "FileSet", @@ -82,7 +84,11 @@ defmodule DpulCollections.IndexingPipeline.Figgy.HydrationCacheEntry do defp extract_service_url(nil), do: nil - defp extract_service_url(_id, _), do: nil + def extract_title(%{"title" => []}) do + ["[Missing Title]"] + end + + def extract_title(%{"title" => title}), do: title defp is_derivative(%{ "mime_type" => ["image/tiff"], diff --git a/lib/dpul_collections/solr.ex b/lib/dpul_collections/solr.ex index 6ad1695c..33f4f333 100644 --- a/lib/dpul_collections/solr.ex +++ b/lib/dpul_collections/solr.ex @@ -98,7 +98,9 @@ defmodule DpulCollections.Solr do end @spec add(list(map()), String.t()) :: {:ok, Req.Response.t()} | {:error, Exception.t()} - def add(docs, collection \\ read_collection()) when length(docs) > 1 do + def add(docs, collection \\ read_collection()) + + def add(docs, collection) when length(docs) > 1 do response = Req.post!( update_url(collection), @@ -108,6 +110,8 @@ defmodule DpulCollections.Solr do if response.status != 200 do Enum.each(docs, fn doc -> add([doc]) end) end + + response end def add(docs, collection) when length(docs) when length(docs) == 1 do @@ -121,6 +125,8 @@ defmodule DpulCollections.Solr do doc = docs |> Enum.at(0) Logger.warning("error indexing solr document with id: #{doc["id"]} #{response.body}") end + + response end @spec commit(String.t()) :: {:ok, Req.Response.t()} | {:error, Exception.t()} diff --git a/test/dpul_collections/indexing_pipeline/figgy/hydration_cache_entry_test.exs b/test/dpul_collections/indexing_pipeline/figgy/hydration_cache_entry_test.exs index 0d5c0d32..9f5d333f 100644 --- a/test/dpul_collections/indexing_pipeline/figgy/hydration_cache_entry_test.exs +++ b/test/dpul_collections/indexing_pipeline/figgy/hydration_cache_entry_test.exs @@ -282,5 +282,24 @@ defmodule DpulCollections.IndexingPipeline.Figgy.HydrationCacheEntryTest do assert capture_log(fn -> HydrationCacheEntry.to_solr_document(entry) end) =~ "couldn't parse date" end + + test "an empty solr document is returned with a empty title field" do + {:ok, entry} = + IndexingPipeline.write_hydration_cache_entry(%{ + cache_version: 0, + record_id: "f134f41f-63c5-4fdf-b801-0774e3bc3b2d", + source_cache_order: ~U[2018-03-09 20:19:36.465203Z], + data: %{ + "id" => "f134f41f-63c5-4fdf-b801-0774e3bc3b2d", + "internal_resource" => "EphemeraFolder", + "metadata" => %{ + "title" => [], + "date_created" => ["2022"] + } + } + }) + + assert %{title_txtm: ["[Missing Title]"]} = HydrationCacheEntry.to_solr_document(entry) + end end end diff --git a/test/dpul_collections/indexing_pipeline/integration/figgy/indexing_integration_test.exs b/test/dpul_collections/indexing_pipeline/integration/figgy/indexing_integration_test.exs index ac47edc5..5963ae3f 100644 --- a/test/dpul_collections/indexing_pipeline/integration/figgy/indexing_integration_test.exs +++ b/test/dpul_collections/indexing_pipeline/integration/figgy/indexing_integration_test.exs @@ -115,6 +115,22 @@ defmodule DpulCollections.IndexingPipeline.Figgy.IndexingIntegrationTest do indexer |> Broadway.stop(:normal) end + test "when cache entry has an empty document, solr doesn't index it or return an error" do + IndexingPipeline.write_transformation_cache_entry(%{ + cache_version: 0, + record_id: "17276197-e223-471c-99d7-405c5f6c5285", + source_cache_order: ~U[1999-03-09 20:19:34.486004Z], + data: %{} + }) + + indexer = start_indexing_producer() + MockFiggyIndexingProducer.process(1) + assert_receive {:ack_done}, 500 + Solr.commit(active_collection()) + assert Solr.document_count() == 0 + indexer |> Broadway.stop(:normal) + end + test "solr collection creation" do cache_version = 0 new_collection = "new_index1" diff --git a/test/dpul_collections/solr_test.exs b/test/dpul_collections/solr_test.exs index 9b9c91f8..912371ef 100644 --- a/test/dpul_collections/solr_test.exs +++ b/test/dpul_collections/solr_test.exs @@ -209,8 +209,6 @@ defmodule DpulCollections.SolrTest do "id" => "3cb7627b-defc-401b-9959-42ebc4488f74" } - # Solr.commit(active_collection()) - assert capture_log(fn -> Solr.add([doc], active_collection()) end) =~ "error indexing solr document" end