Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
janvonde committed Sep 5, 2022
2 parents 1bb7cf8 + f8fc2af commit ff02e2a
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 12 deletions.
6 changes: 3 additions & 3 deletions goobi-viewer-indexer/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>io.goobi.viewer</groupId>
<artifactId>viewer-indexer</artifactId>
<version>22.07</version>
<version>22.08-SNAPSHOT</version>


<name>Goobi viewer - Indexer</name>
Expand Down Expand Up @@ -50,12 +50,12 @@
<icu.version>71.1</icu.version>
<imageio-openjpeg.version>0.6.2</imageio-openjpeg.version>
<log4j.version>2.18.0</log4j.version>
<jackson.version>2.13.3</jackson.version>
<jackson.version>2.13.4</jackson.version>
<jaxen.version>1.2.0</jaxen.version>
<jai.version>1.4.0</jai.version>
<javax.mail.version>1.6.2</javax.mail.version>
<jdom2.version>2.0.6.1</jdom2.version>
<jsoup.version>1.15.2</jsoup.version>
<jsoup.version>1.15.3</jsoup.version>
<json.version>20220320</json.version>
<junit.version>4.13.2</junit.version>
<metadata-extractor.version>2.18.0</metadata-extractor.version>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.util.Map.Entry;

import org.apache.commons.lang3.StringUtils;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.common.SolrInputDocument;
import org.json.JSONException;
import org.json.JSONObject;
Expand Down Expand Up @@ -59,8 +60,16 @@ public UsageStatisticsIndexer(Hotfolder hotfolder) {
* @param sourceFile
* @throws IOException
* @throws FatalIndexerException
* @throws SolrServerException
*/
public SolrInputDocument index(Path sourceFile) throws IOException, FatalIndexerException {
public SolrInputDocument index(Path sourceFile) throws IOException, FatalIndexerException, SolrServerException {
String solrDateString = getStatisticsDate(sourceFile);
if(statisticsExists(solrDateString)) {
logger.info("Don't index usage statistics for " + solrDateString + ": Statistics already exist for that date");
return null;
}


String jsonString = Files.readString(sourceFile);
if (StringUtils.isBlank(jsonString)) {
throw new IllegalArgumentException("Usage statistics file {} is empty".replace("{}", sourceFile.toString()));
Expand All @@ -75,13 +84,25 @@ public SolrInputDocument index(Path sourceFile) throws IOException, FatalIndexer
ISolrWriteStrategy writeStrategy = AbstractWriteStrategy.create(sourceFile, Collections.emptyMap(), this.hotfolder);
writeStrategy.setRootDoc(rootDoc);
writeStrategy.writeDocs(Configuration.getInstance().isAggregateRecords());
logger.info("Written usage statistics from " + sourceFile.toString() + " to index with IDDOC " + rootDoc.getFieldValue("IDDOC"));
return rootDoc;
} catch (JSONException | IndexerException e) {
throw new IllegalArgumentException("Usage statistics file {} contains invalid json".replace("{}", sourceFile.toString()));
}

}

/**
* @param solrDateString
* @return
* @throws IOException
* @throws SolrServerException
*/
private boolean statisticsExists(String solrDateString) throws SolrServerException, IOException {
String query = "+" + StatisticsLuceneFields.DATE + ":\"" + solrDateString + "\" +" + SolrConstants.DOCTYPE + ":" + StatisticsLuceneFields.USAGE_STATISTICS_DOCTYPE;
return hotfolder.getSearchIndex().getNumHits(query) > 0;
}

/**
* @param stats
* @return
Expand All @@ -90,6 +111,7 @@ public SolrInputDocument index(Path sourceFile) throws IOException, FatalIndexer
private IndexObject createIndexObject(DailyUsageStatistics stats) throws FatalIndexerException {
IndexObject indexObj = new IndexObject(getNextIddoc(hotfolder.getSearchIndex()));
indexObj.addToLucene(SolrConstants.IDDOC, Long.toString(indexObj.getIddoc()));
indexObj.addToLucene(SolrConstants.GROUPFIELD, Long.toString(indexObj.getIddoc()));
indexObj.addToLucene(SolrConstants.DOCTYPE, StatisticsLuceneFields.USAGE_STATISTICS_DOCTYPE);
indexObj.addToLucene(StatisticsLuceneFields.VIEWER_NAME, stats.getViewerName());
indexObj.addToLucene(StatisticsLuceneFields.DATE, StatisticsLuceneFields.solrDateFormatter.format(stats.getDate().atStartOfDay()));
Expand Down Expand Up @@ -119,17 +141,23 @@ private IndexObject createIndexObject(DailyUsageStatistics stats) throws FatalIn
* @throws FatalIndexerException
*/
public boolean removeFromIndex(Path sourceFile) throws FatalIndexerException {
String dateString = sourceFile.getFileName().toString().replaceAll("statistics-usage-([\\d-]+).\\w+", "$1");
LocalDate date = LocalDate.parse(dateString, DailyUsageStatistics.getDateformatter());
String solrDateString = StatisticsLuceneFields.solrDateFormatter.format(date.atStartOfDay());

String solrDateString = getStatisticsDate(sourceFile);

try {
String query = "+" + StatisticsLuceneFields.DATE + ":\"" + solrDateString + "\" +" + SolrConstants.DOCTYPE + ":" + StatisticsLuceneFields.USAGE_STATISTICS_DOCTYPE;
logger.info("Deleting usage statistics for " + StatisticsLuceneFields.DATE + ":" + solrDateString);
return hotfolder.getSearchIndex().deleteByQuery(query);
} finally {
hotfolder.getSearchIndex().commit(false);
}

}

private String getStatisticsDate(Path sourceFile) {
String dateString = sourceFile.getFileName().toString().replaceAll("statistics-usage-([\\d-]+).\\w+", "$1");
LocalDate date = LocalDate.parse(dateString, DailyUsageStatistics.getDateformatter());
String solrDateString = StatisticsLuceneFields.solrDateFormatter.format(date.atStartOfDay());
return solrDateString;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ public boolean scan() throws FatalIndexerException {
return true;
}
logger.trace("Hotfolder: Listing files...");
try (DirectoryStream<Path> stream = Files.newDirectoryStream(hotfolderPath, "*.{xml,delete,purge,docupdate,UPDATED}")) {
try (DirectoryStream<Path> stream = Files.newDirectoryStream(hotfolderPath, "*.{xml,json,delete,purge,docupdate,UPDATED}")) {
for (Path path : stream) {
// Only one file at a time right now
if (currentIndexer != null) {
Expand Down Expand Up @@ -648,10 +648,12 @@ private boolean handleSourceFile(Path sourceFile, boolean fromReindexQueue, Map<
} else if (filename.endsWith(".json")) {
if (filename.startsWith(FILENAME_PREFIX_STATISTICS_USAGE)) {
addUsageStatisticsToIndex(sourceFile);
Files.delete(sourceFile);
}
} else if (filename.endsWith(FILENAME_EXTENSION_DELETE)) {
if (filename.startsWith(FILENAME_PREFIX_STATISTICS_USAGE)) {
removeUsageStatisticsFromIndex(sourceFile);
Files.delete(sourceFile);
} else {
// DELETE
DataRepository[] repositories = dataRepositoryStrategy.selectDataRepository(null, sourceFile, null, searchIndex, oldSearchIndex);
Expand All @@ -661,6 +663,7 @@ private boolean handleSourceFile(Path sourceFile, boolean fromReindexQueue, Map<
} else if (filename.endsWith(FILENAME_EXTENSION_PURGE)) {
if (filename.startsWith(FILENAME_PREFIX_STATISTICS_USAGE)) {
removeUsageStatisticsFromIndex(sourceFile);
Files.delete(sourceFile);
} else {
// PURGE (delete with no "deleted" doc)
DataRepository[] repositories = dataRepositoryStrategy.selectDataRepository(null, sourceFile, null, searchIndex, oldSearchIndex);
Expand Down Expand Up @@ -1200,7 +1203,7 @@ private void addUsageStatisticsToIndex(Path sourceFile) {
try {
this.currentIndexer = new UsageStatisticsIndexer(this);
((UsageStatisticsIndexer) this.currentIndexer).index(sourceFile);
} catch (IOException | IllegalArgumentException | FatalIndexerException e) {
} catch (IOException | IllegalArgumentException | FatalIndexerException | SolrServerException e) {
logger.error("Error indexing file {}. Reason: {}", sourceFile, e.getMessage());
} finally {
this.currentIndexer = null;
Expand Down
17 changes: 15 additions & 2 deletions goobi-viewer-indexer/src/main/resources/config_indexer.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3420,7 +3420,20 @@
</list>
</MD_PI_ARK>



<MD_USEANDREPRODUCTIONLICENSE>
<list>
<item>
<xpath>
<list>
<item>mets:xmlData/mods:mods/mods:accessCondition[@type="use and reproduction"]</item>
<item>dc:rights[contains(text(),'http')]</item>
<item>mets:xmlData/mods:mods/mods:accessCondition[@lang="eng"][@type='work-license']</item>
</list>
</xpath>
<addToDefault>true</addToDefault>
<addUntokenizedVersion>false</addUntokenizedVersion>
</item>
</list>
</MD_USEANDREPRODUCTIONLICENSE>
</fields>
</root>

0 comments on commit ff02e2a

Please sign in to comment.