Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Enable loading XSLX with blank rows and sheets for schema and source data #1065

Merged
merged 1 commit into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ public boolean isValid() {
*/
private void showMessage() {
if (header == null || header.length == 0)
setErrorMessage("The file contains no data");
setErrorMessage("The file contains no data or not valid data");
else if (!sfe.isValid())
setErrorMessage("Please enter a valid Type Name");
else if (!isValid)
Expand Down
7 changes: 0 additions & 7 deletions io/plugins/eu.esdihumboldt.hale.io.xls.ui/plugin.xml
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,6 @@
ref="eu.esdihumboldt.hale.io.xls.reader.schema">
</provider>
</configPage>
<configPage
class="eu.esdihumboldt.hale.io.csv.ui.TypeSelectionPage"
order="0">
<provider
ref="eu.esdihumboldt.hale.io.xls.reader.instance">
</provider>
</configPage>
<configPage
class="eu.esdihumboldt.hale.io.xls.ui.XLSInstanceExportConfigurationPage"
order="0">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,15 @@
import eu.esdihumboldt.hale.io.xls.reader.ReaderSettings;
import eu.esdihumboldt.hale.ui.io.config.AbstractConfigurationPage;
import eu.esdihumboldt.hale.ui.io.instance.InstanceImportWizard;
import eu.esdihumboldt.hale.io.csv.ui.TypeSelectionPage;

/**
* Configuration page for the instance export provider of Excel files
*
* @author Patrick Lieb
*/
public class XLSInstanceImportConfigurationPage
extends AbstractConfigurationPage<InstanceReader, InstanceImportWizard> {

public class XLSInstanceImportConfigurationPage extends TypeSelectionPage {

private static final ALogger log = ALoggerFactory
.getLogger(XLSInstanceImportConfigurationPage.class);
Expand All @@ -52,9 +53,8 @@ public class XLSInstanceImportConfigurationPage
* Default Constructor
*/
public XLSInstanceImportConfigurationPage() {
super("xls.instance.import.sheet.selection");
setTitle("Sheet selection");
setDescription("Select sheet to import instances");
setDescription("Select sheet to import instances, your Type and Data reading setting");
}

/**
Expand All @@ -63,21 +63,16 @@ public XLSInstanceImportConfigurationPage() {
@Override
protected void createContent(Composite page) {

page.setLayout(new GridLayout(1, false));

Composite menu = new Composite(page, SWT.NONE);
menu.setLayout(new GridLayout(2, false));

GridDataFactory.fillDefaults().grab(true, false).applyTo(menu);
page.setLayout(new GridLayout(2, false));

Label sheetLabel = new Label(menu, SWT.None);
Label sheetLabel = new Label(page, SWT.None);
sheetLabel.setText("Select sheet");

sheetSelection = new Combo(menu, SWT.DROP_DOWN | SWT.READ_ONLY);
sheetSelection = new Combo(page, SWT.DROP_DOWN | SWT.READ_ONLY);
GridDataFactory.swtDefaults().align(SWT.FILL, SWT.CENTER).grab(true, false)
.applyTo(sheetSelection);

setPageComplete(false);
super.createContent(page);
}

/**
Expand Down Expand Up @@ -108,7 +103,7 @@ protected void onShowPage(boolean firstShow) {
}
super.onShowPage(firstShow);
sheetSelection.select(0);
setPageComplete(true);
setPageComplete(false);
}

/**
Expand All @@ -118,7 +113,8 @@ protected void onShowPage(boolean firstShow) {
public boolean updateConfiguration(InstanceReader provider) {
provider.setParameter(InstanceTableIOConstants.SHEET_INDEX,
Value.of(sheetSelection.getSelectionIndex()));
return true;

return super.updateConfiguration(provider);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ public void widgetSelected(SelectionEvent event) {
} catch (Exception e) {
setPageComplete(false);
clearSuperPage();
setErrorMessage("The sheet is empty!");
setErrorMessage("The sheet is empty or the header is not valid!");
}

}
Expand All @@ -123,7 +123,7 @@ public void widgetDefaultSelected(SelectionEvent e) {
} catch (Exception e1) {
setPageComplete(false);
clearSuperPage();
setErrorMessage("The sheet is empty!");
setErrorMessage("The sheet is empty or the header is not valid!");
}
}

Expand Down Expand Up @@ -166,11 +166,12 @@ protected void onShowPage(boolean firstShow) {
}
ArrayList<String> items = new ArrayList<String>();
for (int i = 0; i < numberOfSheets; i++) {
items.add(wb.getSheetAt(i).getSheetName());
// only add items if there is a header (no empty sheet)
Row row = wb.getSheetAt(i).getRow(0);
if (row == null && newLocation != null && !newLocation.equals(oldLocation)) {
sheetNum++;
items.add(wb.getSheetAt(i).getSheetName());
if (row != null) {
update(i);
sheetNum = i;
}
}

Expand All @@ -190,20 +191,25 @@ protected void onShowPage(boolean firstShow) {

} catch (OldExcelFormatException e) {
// the setup is not in a valid state
clearPage();
clearSuperPage();
setErrorMessage(
"Old excel format detected (format 5.0/7.0 (BIFF5)). Please convert the excel file to BIFF8 from Excel versions 97/2000/XP/2003.");
setPageComplete(false);
clearFromException();
} catch (Exception e) {
log.error("Error loading Excel file", e);
clearPage();
clearSuperPage();
setErrorMessage("Excel file cannot be loaded!");
setPageComplete(false);
clearFromException();
}
}

/**
* clear page and super page
*/
private void clearFromException() {
clearPage();
clearSuperPage();
setPageComplete(false);
}

/**
* Use this if an error occurs
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Import-Package: com.orientechnologies.orient.core.db.record;version="1.5.1";reso
org.apache.poi.openxml4j.opc;version="5.2.3",
org.apache.poi.poifs.filesystem;version="5.2.3",
org.apache.poi.ss.usermodel;version="5.2.3",
org.apache.poi.ss.util;version="5.2.3",
org.apache.poi.xssf.usermodel;version="5.2.3",
org.springframework.core.convert;version="5.2.0",
org.springframework.core.convert.support;version="5.2.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,19 @@ public static Workbook loadWorkbook(InputStream input, URI location, boolean xls
*/
protected void analyseHeader(Sheet sheet) {
Row header = sheet.getRow(0);

// identify columns
for (int i = header.getFirstCellNum(); i < header.getLastCellNum(); i++) {
Cell cell = header.getCell(i);
String text = extractText(cell);

headerCell(i, text);
if (header != null) {

// identify columns
int count = 0;
for (int i = header.getFirstCellNum(); i < header.getLastCellNum(); i++) {
Cell cell = header.getCell(i);
String text = extractText(cell, sheet);
// cell cannot be empty to extract the text
if (text != null) {
headerCell(count, text);
count++;
}
}
}
}

Expand All @@ -138,7 +144,9 @@ private void analyseContent(Sheet sheet) {
// for each row starting from the second
for (int i = 1; i <= sheet.getLastRowNum(); i++) {
Row row = sheet.getRow(i);
analyseRow(i, row);
if (row != null) {
analyseRow(i, row, sheet);
}
}
}

Expand All @@ -148,8 +156,9 @@ private void analyseContent(Sheet sheet) {
* @param num the row number (starting from one as the header row is handled
* separately)
* @param row the table row
* @param sheet the sheet
*/
protected abstract void analyseRow(int num, Row row);
protected abstract void analyseRow(int num, Row row, Sheet sheet);

/**
* Extract the text from a given cell. Formulas are evaluated, for blank or
Expand All @@ -158,8 +167,8 @@ private void analyseContent(Sheet sheet) {
* @param cell the cell
* @return the cell text
*/
protected String extractText(Cell cell) {
return XLSUtil.extractText(cell, evaluator);
protected String extractText(Cell cell, Sheet sheet) {
return XLSUtil.extractText(cell, evaluator, sheet);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.util.Map;

import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;

import eu.esdihumboldt.hale.common.core.io.supplier.LocatableInputSupplier;

Expand Down Expand Up @@ -60,8 +61,9 @@ public AnalyseXLSSchemaTable(LocatableInputSupplier<? extends InputStream> sourc
*/
@Override
protected void headerCell(int num, String text) {
if (num == header.size())
emanuelaepure10 marked this conversation as resolved.
Show resolved Hide resolved
if (num == header.size()) {
header.add(text);
}
header.set(num, text);
}

Expand All @@ -70,12 +72,14 @@ protected void headerCell(int num, String text) {
* org.apache.poi.ss.usermodel.Row)
*/
@Override
protected void analyseRow(int num, Row row) {
protected void analyseRow(int num, Row row, Sheet sheet) {
List<String> rowContent = new ArrayList<String>();
for (int i = 0; i < row.getLastCellNum(); i++) {
rowContent.add(extractText(row.getCell(i)));
rowContent.add(extractText(row.getCell(i), sheet));
}
if (!rowContent.isEmpty() && rowContent.stream().anyMatch(text -> text != null)) {
rows.put(num, rowContent);
}
rows.put(num, rowContent);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.CellValue;
import org.apache.poi.ss.usermodel.FormulaEvaluator;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.util.CellRangeAddress;

/**
* General utilities when working with Excel files.
Expand All @@ -35,10 +38,19 @@ public class XLSUtil {
* @param evaluator the formula evaluator
* @return the cell text
*/
public static String extractText(Cell cell, FormulaEvaluator evaluator) {
public static String extractText(Cell cell, FormulaEvaluator evaluator, Sheet sheet) {
if (cell == null)
return null;

if (isCellPartOfMergedRegion(cell, sheet)) {
// Get the merged region
CellRangeAddress mergedRegion = getMergedRegion(cell, sheet);

// Get the first cell of the merged region (top-left cell)
Row mergedRow = sheet.getRow(mergedRegion.getFirstRow());
cell = mergedRow.getCell(mergedRegion.getFirstColumn());
}

if (cell.getCellType() == CellType.BLANK) {
// do this check here as the evaluator seems to return null on a
// blank
Expand Down Expand Up @@ -73,4 +85,24 @@ else if (CellType.STRING.equals(value.getCellType())) {
}
}

private static boolean isCellPartOfMergedRegion(Cell cell, Sheet sheet) {
for (int i = 0; i < sheet.getNumMergedRegions(); i++) {
CellRangeAddress region = sheet.getMergedRegion(i);
if (region.isInRange(cell.getRowIndex(), cell.getColumnIndex())) {
return true;
}
}
return false;
}

private static CellRangeAddress getMergedRegion(Cell cell, Sheet sheet) {
for (int i = 0; i < sheet.getNumMergedRegions(); i++) {
CellRangeAddress region = sheet.getMergedRegion(i);
if (region.isInRange(cell.getRowIndex(), cell.getColumnIndex())) {
return region;
}
}
return null;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,11 @@ public Map<Value, Value> read(Workbook workbook, boolean skipFirst, int keyColum
for (; row < sheet.getPhysicalNumberOfRows(); row++) {
Row currentRow = sheet.getRow(row);
if (currentRow != null) {
String value = XLSUtil.extractText(currentRow.getCell(valueColumn), evaluator);
String value = XLSUtil.extractText(currentRow.getCell(valueColumn), evaluator,
sheet);
if (value != null && (!ignoreEmptyStrings || !value.isEmpty())) {
map.put(Value.of(XLSUtil.extractText(currentRow.getCell(keyColumn), evaluator)),
map.put(Value.of(
XLSUtil.extractText(currentRow.getCell(keyColumn), evaluator, sheet)),
Value.of(value));
}
}
Expand Down