From 0bb6b009bc24e7bc7e4c935876686e90031c7def Mon Sep 17 00:00:00 2001 From: "Michael B. Klein" Date: Tue, 20 Aug 2024 22:24:19 +0000 Subject: [PATCH 1/2] Update Ark ANVL escaping to include :, \r, \n --- app/lib/meadow/ark/serializer.ex | 15 ++++++- app/test/meadow/ark/serializer_test.exs | 54 +++++++++++++++++++------ 2 files changed, 54 insertions(+), 15 deletions(-) diff --git a/app/lib/meadow/ark/serializer.ex b/app/lib/meadow/ark/serializer.ex index 909f6b89d..0dc0d059f 100644 --- a/app/lib/meadow/ark/serializer.ex +++ b/app/lib/meadow/ark/serializer.ex @@ -48,6 +48,17 @@ defmodule Meadow.Ark.Serializer do |> Enum.join("\n") end - def serialize({key, value}) when is_atom(key), - do: Map.get(@datacite_map, key) <> ": " <> String.replace(value, "%", "%25") + def serialize({key, value}) when is_atom(key) do + escapable = + case key do + :target -> "%\r\n" + _ -> ":%\r\n" + end + + [ + Map.get(@datacite_map, key), + URI.encode(value, fn c -> not String.contains?(escapable, <>) end) + ] + |> Enum.join(": ") + end end diff --git a/app/test/meadow/ark/serializer_test.exs b/app/test/meadow/ark/serializer_test.exs index beda91905..09aa2014f 100644 --- a/app/test/meadow/ark/serializer_test.exs +++ b/app/test/meadow/ark/serializer_test.exs @@ -3,7 +3,16 @@ defmodule Meadow.Ark.SerializerTest do alias Meadow.Ark.Serializer - @response_body "success: ark:/99999/fk4z90ps4x\n_updated: 1630613597\ndatacite.publisher: Test publisher\n_profile: datacite\ndatacite.title: Test title\n_export: yes\ndatacite.creator: Test creator\n_owner: apitest\n_ownergroup: apitest\n_target: https://test/items/123\n_created: 1630613597\ndatacite.publicationyear: 2021\ndatacite.resourcetype: Image\n_status: public\n" + @request_payload """ + _profile: datacite + datacite.creator: Test %25 creator + datacite.publicationyear: 2021 + datacite.publisher: Publisher%3A Test + datacite.resourcetype: Image + _status: public + _target: https://test/items/123 + datacite.title: 100%25 + """ describe "serialize/1" do test "desconstructs a Meadow.Ark and properly handles ANVL escaping of % characters" do @@ -11,29 +20,48 @@ defmodule Meadow.Ark.SerializerTest do ark: "ark:/99999/fk4z90ps4x", creator: "Test % creator", publication_year: "2021", - publisher: "%Test publisher%", + publisher: "Publisher: Test", resource_type: "Image", status: "public", target: "https://test/items/123", title: "100%" } - assert Serializer.serialize(ark) == "_profile: datacite\ndatacite.creator: Test %25 creator\ndatacite.publicationyear: 2021\ndatacite.publisher: %25Test publisher%25\ndatacite.resourcetype: Image\n_status: public\n_target: https://test/items/123\ndatacite.title: 100%25" + assert Serializer.serialize(ark) == String.trim(@request_payload) end end + @response_body """ + success: ark:/99999/fk4z90ps4x + _updated: 1630613597 + datacite.publisher: Test publisher + _profile: datacite + datacite.title: Test title + _export: yes + datacite.creator: Test creator + _owner: apitest + _ownergroup: apitest + _target: https://test/items/123 + _created: 1630613597 + datacite.publicationyear: 2021 + datacite.resourcetype: Image + _status: public + """ + describe "deserialize/1" do test "builds a Meadow.Ark struct" do - assert %Meadow.Ark{ - ark: "ark:/99999/fk4z90ps4x", - creator: "Test creator", - publication_year: "2021", - publisher: "Test publisher", - resource_type: "Image", - status: "public", - target: "https://test/items/123", - title: "Test title" - } = Serializer.deserialize(@response_body) + expected = %Meadow.Ark{ + ark: "ark:/99999/fk4z90ps4x", + creator: "Test creator", + publication_year: "2021", + publisher: "Test publisher", + resource_type: "Image", + status: "public", + target: "https://test/items/123", + title: "Test title" + } + + assert Serializer.deserialize(@response_body) == expected end end end From 58e33aebc692f6a98eefdbfa04bdbfd7f2b69aff Mon Sep 17 00:00:00 2001 From: "Michael B. Klein" Date: Tue, 20 Aug 2024 22:24:33 +0000 Subject: [PATCH 2/2] Fix compiler warnings --- app/lib/meadow/search/bulk.ex | 6 +++--- app/lib/meadow/search/config.ex | 2 -- app/lib/meadow/utils/arks.ex | 2 +- app/lib/meadow_web/resolvers/data.ex | 14 -------------- 4 files changed, 4 insertions(+), 20 deletions(-) diff --git a/app/lib/meadow/search/bulk.ex b/app/lib/meadow/search/bulk.ex index 8c86572fc..d26d73bb3 100644 --- a/app/lib/meadow/search/bulk.ex +++ b/app/lib/meadow/search/bulk.ex @@ -40,16 +40,16 @@ defmodule Meadow.Search.Bulk do defp upload_batch(docs, index) do with_log_metadata module: __MODULE__, index: index do bulk_document = docs |> Enum.join("\n") - + Logger.info("Uploading batch of #{Enum.count(docs)} documents to #{index}") - + case HTTP.post("/#{index}/_bulk", bulk_document <> "\n") do {:ok, %{status_code: status} = response} -> Logger.info("Bulk upload status: #{status}") {:ok, response} {:retry, response} -> - Logger.warn("Bulk upload retrying") + Logger.warning("Bulk upload retrying") {:retry, response} {:error, error} -> diff --git a/app/lib/meadow/search/config.ex b/app/lib/meadow/search/config.ex index 13c3ee1bf..300437505 100644 --- a/app/lib/meadow/search/config.ex +++ b/app/lib/meadow/search/config.ex @@ -2,8 +2,6 @@ defmodule Meadow.Search.Config do @moduledoc """ Convenience methods for retrieving search-specific configuration """ - alias Meadow.Search.HTTP - require Logger def index_configs do diff --git a/app/lib/meadow/utils/arks.ex b/app/lib/meadow/utils/arks.ex index 74cf9677a..f85a61608 100644 --- a/app/lib/meadow/utils/arks.ex +++ b/app/lib/meadow/utils/arks.ex @@ -52,7 +52,7 @@ defmodule Meadow.Arks do """ def mint_ark(%Work{descriptive_metadata: %{ark: ark}} = work) when not is_nil(ark) do - Logger.warn("Not minting ARK for work #{work.id} because it already has one: #{ark}") + Logger.warning("Not minting ARK for work #{work.id} because it already has one: #{ark}") {:noop, work} end diff --git a/app/lib/meadow_web/resolvers/data.ex b/app/lib/meadow_web/resolvers/data.ex index 2978417b1..3f9d83152 100644 --- a/app/lib/meadow_web/resolvers/data.ex +++ b/app/lib/meadow_web/resolvers/data.ex @@ -164,20 +164,6 @@ defmodule MeadowWeb.Resolvers.Data do end end - def replace_file_set(_, %{id: id} = params, _) do - file_set = FileSets.get_file_set!(id) - - case Pipeline.replace_the_file_set(file_set, Map.delete(params, :id)) do - {:error, changeset} -> - {:error, - message: "Could not replace file set", - details: ChangesetErrors.humanize_errors(changeset)} - - {:ok, file_set} -> - {:ok, file_set} - end - end - def update_file_set(_, %{id: id} = params, _) do file_set = FileSets.get_file_set!(id)