Skip to content

Commit

Permalink
[#228] Skeleton code to implement unified tabular processing
Browse files Browse the repository at this point in the history
Not complete -- tests are failing here.
  • Loading branch information
blcham committed Nov 10, 2023
1 parent 54dfb12 commit 1a29167
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import cz.cvut.spipes.modules.exception.SheetIsNotSpecifiedException;
import cz.cvut.spipes.modules.exception.SpecificationNonComplianceException;
import cz.cvut.spipes.modules.model.*;
import cz.cvut.spipes.modules.tabular.CSVReader;
import cz.cvut.spipes.modules.tabular.TabularReader;
import cz.cvut.spipes.modules.util.*;
import cz.cvut.spipes.registry.StreamResource;
import cz.cvut.spipes.registry.StreamResourceRegistry;
Expand Down Expand Up @@ -250,12 +252,14 @@ ExecutionContext executeSelf() {
try {
ICsvListReader listReader = getCsvListReader(csvPreference);

if (listReader == null) {
if (listReader == null) { // TODO we need to detect this situation without need to create list reader
logMissingQuoteError();
return getExecutionContext(inputModel, outputModel);
}

String[] header = listReader.getHeader(true); // skip the header (can't be used with CsvListReader)
TabularReader tabularReader = new CSVReader(listReader);

List<String> header = tabularReader.getHeader();

if (header == null) {
LOG.warn("Input stream resource {} to provide tabular data is empty.", this.sourceResource.getUri());
Expand All @@ -276,7 +280,7 @@ ExecutionContext executeSelf() {
em.close();
em.getEntityManagerFactory().close();

outputColumns = new ArrayList<>(header.length);
outputColumns = new ArrayList<>(header.size());

for (String columnTitle : header) {
String columnName = normalize(columnTitle);
Expand Down Expand Up @@ -321,10 +325,10 @@ ExecutionContext executeSelf() {
// an initial subject, the non-core annotation as property, and the
// value of the non-core annotation as value.

for (int i = 0; i < header.length; i++) {
for (int i = 0; i < header.size(); i++) {
// 4.6.8.1
Column column = outputColumns.get(i);
String cellValue = getValueFromRow(row, i, header.length, rowNumber);
String cellValue = getValueFromRow(row, i, header.size(), rowNumber);
if (cellValue != null) rowStatements.add(createRowResource(cellValue, rowNumber, column));
// 4.6.8.2
r.setDescribes(tableSchema.createAboutUrl(rowNumber));
Expand Down Expand Up @@ -678,7 +682,8 @@ public void processTableAtIndex(int sheetNumber) {
this.processTableAtIndex = sheetNumber;
}

private String[] getHeaderFromSchema(Model inputModel, String[] header, boolean hasInputSchema) {
private List<String> getHeaderFromSchema(Model inputModel, final List<String> header, boolean hasInputSchema) {
List<String> headerToReturn = null;
if (hasInputSchema) {
List<String> orderList = new ArrayList<>();
Resource tableSchemaResource = inputModel.getResource(tableSchema.getUri().toString());
Expand All @@ -690,22 +695,22 @@ private String[] getHeaderFromSchema(Model inputModel, String[] header, boolean

rdfList.iterator().forEach(rdfNode -> orderList.add(String.valueOf(rdfNode)));
tableSchema.setOrderList(orderList);
header = createHeaders(header.length, tableSchema.sortColumns(orderList));
headerToReturn = createHeader(header.size(), tableSchema.sortColumns(orderList));

} else LOG.info("Order of columns was not provided in the schema.");
} else {
header = createHeaders(header.length, new ArrayList<>());
headerToReturn = createHeader(header.size(), new ArrayList<>());
}
return header;
return headerToReturn;
}

private String[] createHeaders(int size, List<Column> columns) {
String[] headers = new String[size];
private List<String> createHeader(int size, List<Column> columns) {
List<String> headers = new ArrayList<>(size);

for (int i = 0; i < size; i++) {
if (!columns.isEmpty()) {
headers[i] = columns.get(i).getName();
} else headers[i] = "column_" + (i + 1);
headers.set(i, columns.get(i).getName());
} else headers.set(i, "column_" + (i + 1));
}
return headers;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package cz.cvut.spipes.modules.tabular;

import org.supercsv.io.ICsvListReader;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;

public class CSVReader implements TabularReader {

ICsvListReader listReader;

public CSVReader(ICsvListReader listReader) {
this.listReader = listReader;
}

@Override
public List<String> getHeader() throws IOException {
return Arrays.asList((listReader.getHeader(true))); // skip the header (can't be used with CsvListReader);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
package cz.cvut.spipes.modules.tabular;

public class ExcelReader {
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
package cz.cvut.spipes.modules.tabular;

public class HtmlReader {
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package cz.cvut.spipes.modules.tabular;

import java.io.IOException;
import java.util.List;

public interface TabularReader {
List<String> getHeader() throws IOException;
}

0 comments on commit 1a29167

Please sign in to comment.