Skip to content

Commit

Permalink
Use mainTitle and fallback to name, do not update if nothing changes
Browse files Browse the repository at this point in the history
  • Loading branch information
amontenegro committed Jun 28, 2023
1 parent 2b66222 commit 117971b
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,24 @@ private IssnData extractIssnData(String json) throws JSONException {
String cleanTitle = cleanText(title);
issnData.setMainTitle(cleanTitle);
return issnData;
} else if (jsonArray.getJSONObject(i).has("name")) {
// name and mainTitle always in same object - therefore if
// no mainTitle but name present, no mainTitle in data
}
}
// If we reach this point it means the mainTitle was not available.
// Lets iterate again now looking for key
for (int i = 0; i < jsonArray.length(); i++) {
if (jsonArray.getJSONObject(i).has("name")) {
try {
issnData.setMainTitle(jsonArray.getJSONObject(i).getJSONArray("name").getString(0));
String title = jsonArray.getJSONObject(i).getString("name");
issnData.setMainTitle(cleanText(title));
} catch (JSONException e) {
// may not be an array
issnData.setMainTitle(jsonArray.getJSONObject(i).getString("name"));
// may be an array
try {
String title = jsonArray.getJSONObject(i).getJSONArray("name").getString(0);
issnData.setMainTitle(cleanText(title));
} catch(Exception ee) {
// Nothing else to try, propagate the exception
throw ee;
}
}
return issnData;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import javax.annotation.Resource;

import org.apache.commons.lang3.StringUtils;
import org.orcid.core.groupIds.issn.IssnClient;
import org.orcid.core.groupIds.issn.IssnData;
import org.orcid.core.groupIds.issn.IssnValidator;
Expand All @@ -18,6 +19,7 @@
import org.orcid.persistence.jpa.entities.InvalidIssnGroupIdRecordEntity;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;

@Component
Expand All @@ -27,8 +29,12 @@ public class IssnLoadSource {

private static Pattern issnGroupTypePattern = Pattern.compile("^issn:(\\d{4}-\\d{3}[\\dXx])$");

private static final int BATCH_SIZE = 30;
@Value("${org.orcid.scheduler.issnLoadSource.batchSize:50}")
private int batchSize;

@Value("${org.orcid.scheduler.issnLoadSource.waitBetweenBatches:30000}")
private int waitBetweenBatches;

@Resource
private GroupIdRecordDao groupIdRecordDao;

Expand Down Expand Up @@ -64,34 +70,39 @@ public void loadIssn(String issnSource) {

private void updateIssnGroupIdRecords() {
Date start = new Date();
List<GroupIdRecordEntity> issnEntities = groupIdRecordDaoReadOnly.getIssnRecordsNotModifiedSince(BATCH_SIZE, start);
List<GroupIdRecordEntity> issnEntities = groupIdRecordDaoReadOnly.getIssnRecordsNotModifiedSince(batchSize, start);
int count = 0;
while (!issnEntities.isEmpty()) {
for (GroupIdRecordEntity issnEntity : issnEntities) {
String issn = getIssn(issnEntity);
if (issn != null && issnValidator.issnValid(issn)) {
count++;
IssnData issnData = issnClient.getIssnData(issn);
if (issnData != null) {
updateIssnEntity(issnEntity, issnData);
count++;
try {
LOG.info("Updated group id record {} - {}, processed count now {}",
new Object[] { issnEntity.getId(), issnEntity.getGroupId(), Integer.toString(count) });
Thread.sleep(10000l);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
LOG.info("Updated group id record {} - {}, processed count now {}",
new Object[] { issnEntity.getId(), issnEntity.getGroupId(), Integer.toString(count) });
} else {
LOG.warn("ISSN data not found for {}", issn);
recordFailure(issnEntity.getId(), "Data not found");
}
} else {
LOG.info("Issn for group record {} not valid: {}", issnEntity.getId(), issnEntity.getGroupId());
recordFailure(issnEntity.getId(), "Invalid record");
}
try {
// Lets sleep for 30 secs after processing one batch
if(count >= batchSize) {
Thread.sleep(waitBetweenBatches);
// Reset the count
count = 0;
}
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
issnEntities = groupIdRecordDaoReadOnly.getIssnRecordsNotModifiedSince(BATCH_SIZE, start);
issnEntities = groupIdRecordDaoReadOnly.getIssnRecordsNotModifiedSince(batchSize, start);
}
}

Expand All @@ -104,10 +115,16 @@ private void recordFailure(Long id, String notes) {

private void updateIssnEntity(GroupIdRecordEntity issnEntity, IssnData issnData) {
String currentGroupName = issnEntity.getGroupName();
issnEntity.setGroupName(issnData.getMainTitle());
issnEntity.setClientSourceId(orcidSource.getId());
LOG.info("group id: " + issnEntity.getGroupId() + " | current group name: " + currentGroupName + " | group name to be updated: " + issnEntity.getGroupName());
groupIdRecordDao.merge(issnEntity);
String updatedGroupName = issnData.getMainTitle();

if(!StringUtils.equals(currentGroupName, updatedGroupName)) {
issnEntity.setGroupName(updatedGroupName);
issnEntity.setClientSourceId(orcidSource.getId());
LOG.info("group id: " + issnEntity.getGroupId() + " | current group name: " + currentGroupName + " | group name to be updated: " + issnEntity.getGroupName());
groupIdRecordDao.merge(issnEntity);
} else {
LOG.info("Group id: " + issnEntity.getGroupId() + " is up to date");
}
}

private String getIssn(GroupIdRecordEntity issnEntity) {
Expand Down

0 comments on commit 117971b

Please sign in to comment.