Skip to content

Commit

Permalink
Assign default when title is missing
Browse files Browse the repository at this point in the history
  • Loading branch information
eliotjordan committed Nov 26, 2024
1 parent ad12c36 commit c0b4c2d
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,26 +27,15 @@ defmodule DpulCollections.IndexingPipeline.Figgy.HydrationCacheEntry do

%{
id: id,
title_txtm: get_in(metadata, ["title"]),
title_txtm: get_in(metadata, ["title"]) || ["Missing Title"],
description_txtm: get_in(metadata, ["description"]),
years_is: extract_years(data),
display_date_s: format_date(metadata),
page_count_i: page_count(metadata),
image_service_urls_ss: image_service_urls(metadata, related_data)
}
# Remove k,v with empty values
|> Enum.filter(fn {k, v} -> v end)
|> Map.new()
|> validate_solr_document()
end

defp validate_solr_document(doc) when not is_map_key(doc, :title_txtm) do
Logger.warning("Solr document is missing title: #{doc[:id]}")
%{}
end

defp validate_solr_document(doc), do: doc

defp image_service_urls(%{"member_ids" => member_ids}, related_data) do
member_ids
|> Enum.map(&extract_service_url(&1, related_data))
Expand All @@ -60,6 +49,8 @@ defmodule DpulCollections.IndexingPipeline.Figgy.HydrationCacheEntry do
extract_service_url(member_data[id])
end

defp extract_service_url(_id, _), do: nil

# Find the derivative FileMetadata
defp extract_service_url(%{
"internal_resource" => "FileSet",
Expand Down Expand Up @@ -93,8 +84,6 @@ defmodule DpulCollections.IndexingPipeline.Figgy.HydrationCacheEntry do

defp extract_service_url(nil), do: nil

defp extract_service_url(_id, _), do: nil

defp is_derivative(%{
"mime_type" => ["image/tiff"],
"use" => [%{"@id" => "http://pcdm.org/use#ServiceFile"}]
Expand Down
12 changes: 9 additions & 3 deletions lib/dpul_collections/solr.ex
Original file line number Diff line number Diff line change
Expand Up @@ -98,29 +98,35 @@ defmodule DpulCollections.Solr do
end

@spec add(list(map()), String.t()) :: {:ok, Req.Response.t()} | {:error, Exception.t()}
def add(docs, collection \\ read_collection()) when length(docs) > 1 do
def add(docs, collection \\ read_collection())

def add(docs, collection) when length(docs) > 1 do
response =
Req.post!(
update_url(collection),
json: docs |> Enum.filter(fn doc -> map_size(doc) > 0 end)
json: docs
)

if response.status != 200 do
Enum.each(docs, fn doc -> add([doc]) end)
end

response
end

def add(docs, collection) when length(docs) when length(docs) == 1 do
response =
Req.post!(
update_url(collection),
json: docs |> Enum.filter(fn doc -> map_size(doc) > 0 end)
json: docs
)

if response.status != 200 do
doc = docs |> Enum.at(0)
Logger.warning("error indexing solr document with id: #{doc["id"]} #{response.body}")
end

response
end

@spec commit(String.t()) :: {:ok, Req.Response.t()} | {:error, Exception.t()}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,6 @@ defmodule DpulCollections.IndexingPipeline.Figgy.HydrationCacheEntryTest do
"couldn't parse date"
end

@tag capture_log: true
test "an empty solr document is returned with a missing title" do
{:ok, entry} =
IndexingPipeline.write_hydration_cache_entry(%{
Expand All @@ -299,26 +298,7 @@ defmodule DpulCollections.IndexingPipeline.Figgy.HydrationCacheEntryTest do
}
})

assert HydrationCacheEntry.to_solr_document(entry) == %{}
end

test "a message is logged when a solr document is missing a title" do
{:ok, entry} =
IndexingPipeline.write_hydration_cache_entry(%{
cache_version: 0,
record_id: "f134f41f-63c5-4fdf-b801-0774e3bc3b2d",
source_cache_order: ~U[2018-03-09 20:19:36.465203Z],
data: %{
"id" => "f134f41f-63c5-4fdf-b801-0774e3bc3b2d",
"internal_resource" => "EphemeraFolder",
"metadata" => %{
"date_created" => ["2022"]
}
}
})

assert capture_log(fn -> HydrationCacheEntry.to_solr_document(entry) end) =~
"missing title: f134f41f-63c5-4fdf-b801-0774e3bc3b2d"
assert %{title_txtm: ["Missing Title"]} = HydrationCacheEntry.to_solr_document(entry)
end
end
end
15 changes: 0 additions & 15 deletions test/dpul_collections/solr_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,6 @@ defmodule DpulCollections.SolrTest do
"id" => "3cb7627b-defc-401b-9959-42ebc4488f74"
}

# Solr.commit(active_collection())

assert capture_log(fn -> Solr.add([doc], active_collection()) end) =~
"error indexing solr document"
end
Expand All @@ -232,17 +230,4 @@ defmodule DpulCollections.SolrTest do
Solr.commit(active_collection())
assert Solr.find_by_id(valid_doc["id"])["id"] == valid_doc["id"]
end

test "a valid solr document is indexed when in the same batch as an empty document" do
valid_doc = %{
"id" => "e0602353-4429-4405-b080-064952f9b267",
"title_txtm" => ["test title 1"]
}

empty_doc = %{}

Solr.add([valid_doc, empty_doc], active_collection())
Solr.commit(active_collection())
assert Solr.find_by_id(valid_doc["id"])["id"] == valid_doc["id"]
end
end

0 comments on commit c0b4c2d

Please sign in to comment.