Skip to content

Commit

Permalink
For ISSN loading, use the table id to iterate over the existing list …
Browse files Browse the repository at this point in the history
…of issns and process them just once
  • Loading branch information
amontenegro committed Jul 31, 2023
1 parent 771ad25 commit 0ba9124
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 16 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.orcid.persistence.dao;

import java.util.Date;
import java.util.List;

import org.orcid.persistence.jpa.entities.GroupIdRecordEntity;
Expand All @@ -17,5 +16,5 @@ public interface GroupIdRecordDao extends GenericDao<GroupIdRecordEntity, Long>

boolean duplicateExists(Long putCode, String groupId);

List<GroupIdRecordEntity> getIssnRecordsNotModifiedSince(int batchSize, Date start);
List<GroupIdRecordEntity> getIssnRecordsSortedById(int batchSize, long initialId);
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.orcid.persistence.dao.impl;

import java.util.Date;
import java.util.List;

import javax.persistence.Query;
Expand All @@ -9,8 +8,8 @@
import org.orcid.persistence.dao.GroupIdRecordDao;
import org.orcid.persistence.jpa.entities.GroupIdRecordEntity;

public class GroupIdRecordDaoImpl extends GenericDaoImpl<GroupIdRecordEntity, Long> implements GroupIdRecordDao {

public class GroupIdRecordDaoImpl extends GenericDaoImpl<GroupIdRecordEntity, Long> implements GroupIdRecordDao {
public GroupIdRecordDaoImpl() {
super(GroupIdRecordEntity.class);
}
Expand Down Expand Up @@ -71,13 +70,13 @@ public boolean duplicateExists(Long putCode, String groupId) {
}
Long result = query.getSingleResult();
return (result != null && result > 0);
}

@SuppressWarnings("unchecked")
}

@Override
public List<GroupIdRecordEntity> getIssnRecordsNotModifiedSince(int pageSize, Date date) {
Query query = entityManager.createNativeQuery("SELECT * FROM group_id_record g LEFT OUTER JOIN invalid_issn_group_id_record p ON g.id = p.id where p.id IS NULL AND g.group_id like 'issn:%' and g.last_modified < :date", GroupIdRecordEntity.class);
query.setParameter("date", date);
public List<GroupIdRecordEntity> getIssnRecordsSortedById(int batchSize, long initialId) {
Query query = entityManager.createNativeQuery("SELECT * FROM group_id_record g LEFT OUTER JOIN invalid_issn_group_id_record p ON g.id = p.id where p.id IS NULL AND g.group_id like 'issn:%' and g.id > :initialId order by g.id", GroupIdRecordEntity.class);
query.setParameter("initialId", initialId);
query.setMaxResults(batchSize);
return query.getResultList();
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.orcid.scheduler.loader.source.issn;

import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand Down Expand Up @@ -29,7 +28,7 @@ public class IssnLoadSource {

private static Pattern issnGroupTypePattern = Pattern.compile("^issn:(\\d{4}-\\d{3}[\\dXx])$");

@Value("${org.orcid.scheduler.issnLoadSource.batchSize:50}")
@Value("${org.orcid.scheduler.issnLoadSource.batchSize:5000}")
private int batchSize;

@Value("${org.orcid.scheduler.issnLoadSource.waitBetweenBatches:30000}")
Expand Down Expand Up @@ -69,8 +68,10 @@ public void loadIssn(String issnSource) {
}

private void updateIssnGroupIdRecords() {
Date start = new Date();
List<GroupIdRecordEntity> issnEntities = groupIdRecordDaoReadOnly.getIssnRecordsNotModifiedSince(batchSize, start);
Long nextBatchStartId = 0L;
// Get the first batch of issn's
LOG.info("Loading batch of ISSN's, starting id: " + nextBatchStartId + " batch size: " + batchSize);
List<GroupIdRecordEntity> issnEntities = groupIdRecordDaoReadOnly.getIssnRecordsSortedById(batchSize, nextBatchStartId);
int batchCount = 0;
int total = 0;
while (!issnEntities.isEmpty()) {
Expand Down Expand Up @@ -104,8 +105,13 @@ private void updateIssnGroupIdRecords() {
// TODO Auto-generated catch block
LOG.warn("Exception while pausing the issn loader", e);
}

if (issnEntity.getId() > nextBatchStartId) {
nextBatchStartId = issnEntity.getId();
}
}
issnEntities = groupIdRecordDaoReadOnly.getIssnRecordsNotModifiedSince(batchSize, start);
LOG.info("Loading batch of ISSN's, starting id: " + nextBatchStartId);
issnEntities = groupIdRecordDaoReadOnly.getIssnRecordsSortedById(batchSize, nextBatchStartId);
}
}

Expand Down

0 comments on commit 0ba9124

Please sign in to comment.