Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use mainTitle and fallback to name, do not update if nothing changes #6827

Merged
merged 5 commits into from
Jun 30, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ public class IssnClient {
private static final String START_OF_STRING = "\u0098";
private static final String STRING_TERMINATOR = "\u009C";

private static final String RESOURCE_MAIN = "resource/ISSN/%issn";
private static final String RESOURCE_KEY_TITLE = "resource/ISSN/%issn#KeyTitle";

@Resource
private IssnPortalUrlBuilder issnPortalUrlBuilder;

Expand All @@ -44,7 +47,7 @@ public IssnData getIssnData(String issn) {
}
try {
if (json != null) {
IssnData data = extractIssnData(json);
IssnData data = extractIssnData(issn.toUpperCase(), json);
data.setIssn(issn);
return data;
} else {
Expand All @@ -56,31 +59,72 @@ public IssnData getIssnData(String issn) {
}
}

private IssnData extractIssnData(String json) throws JSONException {
private IssnData extractIssnData(String issn, String json) throws JSONException {
LOG.info("Extracting json data from " + issn);
JSONObject jsonObject = new JSONObject(json);
JSONArray jsonArray = jsonObject.getJSONArray("@graph");
IssnData issnData = new IssnData();
if (issnData != null) {
JSONArray jsonArray = jsonObject.getJSONArray("@graph");
if (jsonArray != null) {
IssnData issnData = new IssnData();
// Look for the KeyTitle element
for (int i = 0; i < jsonArray.length(); i++) {
if (jsonArray.getJSONObject(i).has("mainTitle")) {
String title = jsonArray.getJSONObject(i).getString("mainTitle");
String cleanTitle = cleanText(title);
issnData.setMainTitle(cleanTitle);
return issnData;
} else if (jsonArray.getJSONObject(i).has("name")) {
// name and mainTitle always in same object - therefore if
// no mainTitle but name present, no mainTitle in data
try {
issnData.setMainTitle(jsonArray.getJSONObject(i).getJSONArray("name").getString(0));
} catch (JSONException e) {
// may not be an array
issnData.setMainTitle(jsonArray.getJSONObject(i).getString("name"));
JSONObject obj = jsonArray.getJSONObject(i);
if (obj.has("@id")) {
String idName = obj.getString("@id");
if (idName.equals(RESOURCE_KEY_TITLE.replace("%issn", issn))) {
String title = obj.getString("value");
String cleanTitle = cleanText(title);
issnData.setMainTitle(cleanTitle);
LOG.debug("Found KeyTitle for '" + issn + "' " + cleanTitle);
return issnData;
}
}
}

// If it can't find it, look for the main resource and extract the
// mainTitle from it or the first element in the name array
for (int i = 0; i < jsonArray.length(); i++) {
JSONObject obj = jsonArray.getJSONObject(i);
if (obj.has("@id")) {
String idName = obj.getString("@id");
if (idName.equals(RESOURCE_MAIN.replace("%issn", issn))) {
LOG.debug("Found main resource for " + issn);
// Look for the mainTitle
if (obj.has("mainTitle")) {
String title = obj.getString("mainTitle");
String cleanTitle = cleanText(title);
issnData.setMainTitle(cleanTitle);
LOG.debug("Found mainTitle for '" + issn + "' " + cleanTitle);
return issnData;
} else if (obj.has("name")) {
LOG.debug("Found name array for " + issn);
// If the mainTitle is not available, look for the
// name array
Object nameObject = jsonArray.getJSONObject(i).get("name");
if (nameObject instanceof JSONArray) {
String title = jsonArray.getJSONObject(i).getJSONArray("name").getString(0);
String cleanTitle = cleanText(title);
issnData.setMainTitle(cleanTitle);
LOG.debug("Found KeyTitle for '" + issn + "' " + cleanTitle);
return issnData;
} else if (nameObject instanceof String) {
String title = jsonArray.getJSONObject(i).getString("name");
String cleanTitle = cleanText(title);
issnData.setMainTitle(cleanTitle);
LOG.debug("Found name for '" + issn + "' " + cleanTitle);
return issnData;
} else {
LOG.warn("Unable to extract name, it is not a string nor an array for " + issn);
throw new IllegalArgumentException("Unable to extract name, it is not a string nor an array for " + issn);
}
} else {
LOG.warn("Unable to extract name, couldn't find the mainTitle nor the name for " + issn);
throw new IllegalArgumentException("Unable to extract name, couldn't find the mainTitle nor the name for " + issn);
}
}
return issnData;
}
}
}
return null;
throw new IllegalArgumentException("Unable to extract name, couldn't find the Key Title nor the main resource for " + issn);
}

private String getJsonDataFromIssnPortal(String issn) throws IOException, InterruptedException, URISyntaxException {
Expand Down
128 changes: 87 additions & 41 deletions orcid-core/src/test/java/org/orcid/core/issn/IssnClientTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.when;
Expand Down Expand Up @@ -41,90 +42,135 @@ public void setUp() throws IOException {
Mockito.when(mockUrlBuilder.buildJsonIssnPortalUrlForIssn(Mockito.anyString())).thenReturn("anything");
ReflectionTestUtils.setField(issnClient, "issnPortalUrlBuilder", mockUrlBuilder);
ReflectionTestUtils.setField(issnClient, "httpRequestUtils", mockHttpRequestUtils);
}

@Test
public void testGetIssnDataUseKeyTitle() throws IOException, JSONException, InterruptedException, URISyntaxException {
when(mockHttpRequestUtils.doGet(any())).thenReturn(mockResponse);
when(mockResponse.body()).thenReturn(getJsonInputStreamKeyTitle());
when(mockResponse.statusCode()).thenReturn(200);

IssnData data = issnClient.getIssnData("1755-4349");
assertEquals("Nature chemistry (Online) - Key Title", data.getMainTitle());
assertEquals("1755-4349", data.getIssn());
}

@Test
public void testGetIssnData() throws IOException, JSONException, InterruptedException, URISyntaxException {
public void testGetIssnDataUseMainTitle() throws IOException, JSONException, InterruptedException, URISyntaxException {
when(mockHttpRequestUtils.doGet(any())).thenReturn(mockResponse);
when(mockResponse.body()).thenReturn(getJsonInputStream());
when(mockResponse.body()).thenReturn(getJsonInputStreamFallbackToMainTitle());
when(mockResponse.statusCode()).thenReturn(200);

IssnData data = issnClient.getIssnData("my-issn-0");
assertEquals("Nature chemistry.", data.getMainTitle());
// Should ignore ISSN from the metadata
assertEquals("my-issn-0", data.getIssn());
IssnData data = issnClient.getIssnData("0260-8774");
assertEquals("Journal of food engineering - Main title", data.getMainTitle());
assertEquals("0260-8774", data.getIssn());
}

@Test
public void testGetIssnDataWhenNoIssnInMetadata() throws IOException, JSONException, InterruptedException, URISyntaxException {
public void testGetIssnDataUseNameArray() throws IOException, JSONException, InterruptedException, URISyntaxException {
when(mockHttpRequestUtils.doGet(any())).thenReturn(mockResponse);
when(mockResponse.body()).thenReturn(getJsonInputStreamNoIssnInMetadata());
when(mockResponse.body()).thenReturn(getJsonInputStreamFallbackToNameArray());
when(mockResponse.statusCode()).thenReturn(200);

IssnData data = issnClient.getIssnData("my-issn-1");
assertEquals("Nature chemistry.", data.getMainTitle());
// Should ignore ISSN from the metadata
assertEquals("my-issn-1", data.getIssn());
IssnData data = issnClient.getIssnData("0260-8774");
assertEquals("Journal of food engineering - Array", data.getMainTitle());
assertEquals("0260-8774", data.getIssn());
}

@Test
public void testGetIssnDataNoMainTitle() throws IOException, JSONException, InterruptedException, URISyntaxException {
public void testGetIssnDataUseNameArrayAsString() throws IOException, JSONException, InterruptedException, URISyntaxException {
when(mockHttpRequestUtils.doGet(any())).thenReturn(mockResponse);
when(mockResponse.body()).thenReturn(getJsonInputStreamNoMainTitle());
when(mockResponse.body()).thenReturn(getJsonInputStreamFallbackToNameArrayAsString());
when(mockResponse.statusCode()).thenReturn(200);

IssnData data = issnClient.getIssnData("my-issn-2");
assertEquals("Journal of food engineering", data.getMainTitle());
// Should ignore ISSN from the metadata
assertEquals("my-issn-2", data.getIssn());
IssnData data = issnClient.getIssnData("0260-8774");
assertEquals("Journal of food engineering - String", data.getMainTitle());
assertEquals("0260-8774", data.getIssn());
}

@Test
public void testGetIssnDataNoMainTitleNameArray() throws IOException, JSONException, InterruptedException, URISyntaxException {
public void testGetIssnDataNoTitle() throws IOException, JSONException, InterruptedException, URISyntaxException {
when(mockHttpRequestUtils.doGet(any())).thenReturn(mockResponse);
when(mockResponse.body()).thenReturn(getJsonInputStreamNoMainTitleNameArray());
when(mockResponse.body()).thenReturn(getJsonInputStreamNoTitle());
when(mockResponse.statusCode()).thenReturn(200);

IssnData data = issnClient.getIssnData("my-issn-3");
assertEquals("Shalom (Glyvrar)", data.getMainTitle());
// Should ignore ISSN from the metadata
assertEquals("my-issn-3", data.getIssn());
IssnData data = issnClient.getIssnData("0260-8774");
assertNull(data);
}

@Test
public void testGetIssnDataBadCharacters() throws IOException, JSONException, InterruptedException, URISyntaxException {
public void testGetIssnDataBadCharactersKeyTitle() throws IOException, JSONException, InterruptedException, URISyntaxException {
when(mockHttpRequestUtils.doGet(any())).thenReturn(mockResponse);
when(mockResponse.body()).thenReturn(getJsonInputStreamBadCharacters());
when(mockResponse.body()).thenReturn(getJsonInputStreamBadCharactersKeyTitle());
when(mockResponse.statusCode()).thenReturn(200);

IssnData data = issnClient.getIssnData("my-issn-2");
assertFalse("\u0098The \u009CJournal of cell biology.".equals(data.getMainTitle()));
assertEquals("The Journal of cell biology.", data.getMainTitle());
assertTrue("\u0098The \u009CJournal of cell biology.".getBytes().length != data.getMainTitle().getBytes().length);
IssnData data = issnClient.getIssnData("0021-9525");
assertFalse("\u0098The \u009CJournal of cell biology - Key Title".equals(data.getMainTitle()));
assertEquals("The Journal of cell biology - Key Title", data.getMainTitle());
assertTrue("\u0098The \u009CJournal of cell biology - Key Title".getBytes().length != data.getMainTitle().getBytes().length);
}

private String getJsonInputStream() throws IOException {
InputStream is = getClass().getResourceAsStream("/issn-response.json");
@Test
public void testGetIssnDataBadCharactersMainTitle() throws IOException, JSONException, InterruptedException, URISyntaxException {
when(mockHttpRequestUtils.doGet(any())).thenReturn(mockResponse);
when(mockResponse.body()).thenReturn(getJsonInputStreamBadCharactersMainTitle());
when(mockResponse.statusCode()).thenReturn(200);

IssnData data = issnClient.getIssnData("0021-9525");
assertFalse("\u0098The \u009CJournal of cell biology - Main Title".equals(data.getMainTitle()));
assertEquals("The Journal of cell biology - Main Title", data.getMainTitle());
assertTrue("\u0098The \u009CJournal of cell biology - Main Title".getBytes().length != data.getMainTitle().getBytes().length);
}

@Test
public void testGetIssnDataBadCharactersNameArray() throws IOException, JSONException, InterruptedException, URISyntaxException {
when(mockHttpRequestUtils.doGet(any())).thenReturn(mockResponse);
when(mockResponse.body()).thenReturn(getJsonInputStreamBadCharactersNameArray());
when(mockResponse.statusCode()).thenReturn(200);

IssnData data = issnClient.getIssnData("0021-9525");
assertFalse("\u0098The \u009CJournal of cell biology - Name Array".equals(data.getMainTitle()));
assertEquals("The Journal of cell biology - Name Array", data.getMainTitle());
assertTrue("\u0098The \u009CJournal of cell biology - Name Array".getBytes().length != data.getMainTitle().getBytes().length);
}

private String getJsonInputStreamKeyTitle() throws IOException {
InputStream is = getClass().getResourceAsStream("/issn-response-key-title.json");
return new String(is.readAllBytes(), StandardCharsets.UTF_8);
}

private String getJsonInputStreamNoIssnInMetadata() throws IOException {
InputStream is = getClass().getResourceAsStream("/issn-response-no-issn-in-metadata.json");
private String getJsonInputStreamFallbackToMainTitle() throws IOException {
InputStream is = getClass().getResourceAsStream("/issn-response-fallback-to-main-title.json");
return new String(is.readAllBytes(), StandardCharsets.UTF_8);
}

private String getJsonInputStreamNoMainTitle() throws IOException {
InputStream is = getClass().getResourceAsStream("/issn-response-no-mainTitle.json");
private String getJsonInputStreamFallbackToNameArray() throws IOException {
InputStream is = getClass().getResourceAsStream("/issn-response-fallback-to-name-array.json");
return new String(is.readAllBytes(), StandardCharsets.UTF_8);
}

private String getJsonInputStreamNoMainTitleNameArray() throws IOException {
InputStream is = getClass().getResourceAsStream("/issn-response-no-mainTitle-name-array.json");
private String getJsonInputStreamFallbackToNameArrayAsString() throws IOException {
InputStream is = getClass().getResourceAsStream("/issn-response-fallback-to-name-array-as-string.json");
return new String(is.readAllBytes(), StandardCharsets.UTF_8);
}

private String getJsonInputStreamNoTitle() throws IOException {
InputStream is = getClass().getResourceAsStream("/issn-response-no-title.json");
return new String(is.readAllBytes(), StandardCharsets.UTF_8);
}

private String getJsonInputStreamBadCharacters() throws IOException {
InputStream is = getClass().getResourceAsStream("/issn-response-bad-characters.json");
private String getJsonInputStreamBadCharactersKeyTitle() throws IOException {
InputStream is = getClass().getResourceAsStream("/issn-response-bad-characters-key-title.json");
return new String(is.readAllBytes(), StandardCharsets.UTF_8);
}
}

private String getJsonInputStreamBadCharactersMainTitle() throws IOException {
InputStream is = getClass().getResourceAsStream("/issn-response-bad-characters-main-title.json");
return new String(is.readAllBytes(), StandardCharsets.UTF_8);
}

private String getJsonInputStreamBadCharactersNameArray() throws IOException {
InputStream is = getClass().getResourceAsStream("/issn-response-bad-characters-name-array.json");
return new String(is.readAllBytes(), StandardCharsets.UTF_8);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"@id" : "resource/ISSN/0021-9525",
"@type" : [ "http://id.loc.gov/ontologies/bibframe/Work", "http://id.loc.gov/ontologies/bibframe/Instance", "http://schema.org/Periodical" ],
"identifiedBy" : [ "resource/ISSN/0021-9525#KeyTitle", "resource/ISSN/0021-9525#ISSN", "resource/ISSN/0021-9525#ISSN-L" ],
"mainTitle" : "˜\u0098The \u009CJournal of cell biology.",
"mainTitle" : "˜\u0098The \u009CJournal of cell biology - Main Title",
"otherPhysicalFormat" : "resource/ISSN/1540-8140",
"title" : "resource/ISSN/0021-9525#KeyTitle",
"format" : "vocabularies/medium#Print",
Expand All @@ -22,7 +22,7 @@
"http://purl.org/ontology/bibo/issn" : "0021-9525",
"isPartOf" : "resource/ISSN-L/0021-9525",
"issn" : "0021-9525",
"name" : [ "˜The œJournal of cell biology.", "˜The œJournal of cell biology" ],
"name" : [ "˜\u0098The \u009CJournal of cell biology - Name Array"],
"publication" : "resource/ISSN/0021-9525#ReferencePublicationEvent",
"url" : [ "http://www.jcb.org/", "http://www.jstor.org/journals/00219525.html", "http://www.pubmedcentral.nih.gov/tocrender.fcgi?journal=482&action=archive", "http://bibpurl.oclc.org/web/21389" ]
}, {
Expand All @@ -38,7 +38,7 @@
}, {
"@id" : "resource/ISSN/0021-9525#KeyTitle",
"@type" : [ "http://id.loc.gov/ontologies/bibframe/Identifier", "http://id.loc.gov/ontologies/bibframe/KeyTitle" ],
"value" : "˜The œJournal of cell biology"
"value" : "˜\u0098The \u009CJournal of cell biology - Key Title"
}, {
"@id" : "resource/ISSN/0021-9525#Record",
"@type" : "http://schema.org/CreativeWork",
Expand Down
Loading