Skip to content

Commit

Permalink
Validate that solr doc has title
Browse files Browse the repository at this point in the history
  • Loading branch information
eliotjordan committed Nov 25, 2024
1 parent 2fb2e09 commit d196044
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,16 @@ defmodule DpulCollections.IndexingPipeline.Figgy.HydrationCacheEntry do
page_count_i: page_count(metadata),
image_service_urls_ss: image_service_urls(metadata, related_data)
}
|> validate_solr_document()
end

defp validate_solr_document(%{id: id, title_txtm: nil}) do
Logger.warning("Solr document is missing title: #{id}")
%{}
end

defp validate_solr_document(doc), do: doc

defp image_service_urls(%{"member_ids" => member_ids}, related_data) do
member_ids
|> Enum.map(&extract_service_url(&1, related_data))
Expand Down
4 changes: 2 additions & 2 deletions lib/dpul_collections/solr.ex
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ defmodule DpulCollections.Solr do
response =
Req.post!(
update_url(collection),
json: docs
json: docs |> Enum.filter(fn doc -> map_size(doc) > 0 end)
)

if response.status != 200 do
Expand All @@ -114,7 +114,7 @@ defmodule DpulCollections.Solr do
response =
Req.post!(
update_url(collection),
json: docs
json: docs |> Enum.filter(fn doc -> map_size(doc) > 0 end)
)

if response.status != 200 do
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,5 +282,43 @@ defmodule DpulCollections.IndexingPipeline.Figgy.HydrationCacheEntryTest do
assert capture_log(fn -> HydrationCacheEntry.to_solr_document(entry) end) =~
"couldn't parse date"
end

@tag capture_log: true
test "an empty solr document is returned with a missing title" do
{:ok, entry} =
IndexingPipeline.write_hydration_cache_entry(%{
cache_version: 0,
record_id: "f134f41f-63c5-4fdf-b801-0774e3bc3b2d",
source_cache_order: ~U[2018-03-09 20:19:36.465203Z],
data: %{
"id" => "f134f41f-63c5-4fdf-b801-0774e3bc3b2d",
"internal_resource" => "EphemeraFolder",
"metadata" => %{
"date_created" => ["2022"]
}
}
})

assert HydrationCacheEntry.to_solr_document(entry) == %{}
end

test "a message is logged when a solr document is missing a title" do
{:ok, entry} =
IndexingPipeline.write_hydration_cache_entry(%{
cache_version: 0,
record_id: "f134f41f-63c5-4fdf-b801-0774e3bc3b2d",
source_cache_order: ~U[2018-03-09 20:19:36.465203Z],
data: %{
"id" => "f134f41f-63c5-4fdf-b801-0774e3bc3b2d",
"internal_resource" => "EphemeraFolder",
"metadata" => %{
"date_created" => ["2022"]
}
}
})

assert capture_log(fn -> HydrationCacheEntry.to_solr_document(entry) end) =~
"missing title: f134f41f-63c5-4fdf-b801-0774e3bc3b2d"
end
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,22 @@ defmodule DpulCollections.IndexingPipeline.Figgy.IndexingIntegrationTest do
indexer |> Broadway.stop(:normal)
end

test "when cache entry has an empty document, solr doesn't index it or return an error" do
IndexingPipeline.write_transformation_cache_entry(%{
cache_version: 0,
record_id: "17276197-e223-471c-99d7-405c5f6c5285",
source_cache_order: ~U[1999-03-09 20:19:34.486004Z],
data: %{}
})

indexer = start_indexing_producer()
MockFiggyIndexingProducer.process(1)
assert_receive {:ack_done}, 500
Solr.commit(active_collection())
assert Solr.document_count() == 0
indexer |> Broadway.stop(:normal)
end

test "solr collection creation" do
cache_version = 0
new_collection = "new_index1"
Expand Down
13 changes: 13 additions & 0 deletions test/dpul_collections/solr_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -232,4 +232,17 @@ defmodule DpulCollections.SolrTest do
Solr.commit(active_collection())
assert Solr.find_by_id(valid_doc["id"])["id"] == valid_doc["id"]
end

test "a valid solr document is indexed when in the same batch as an empty document" do
valid_doc = %{
"id" => "e0602353-4429-4405-b080-064952f9b267",
"title_txtm" => ["test title 1"]
}

empty_doc = %{}

Solr.add([valid_doc, empty_doc], active_collection())
Solr.commit(active_collection())
assert Solr.find_by_id(valid_doc["id"])["id"] == valid_doc["id"]
end
end

0 comments on commit d196044

Please sign in to comment.