Skip to content

Commit

Permalink
fix: Enable loading XSLX with blank rows
Browse files Browse the repository at this point in the history
It is possible to load XLSX with multiple sheets that may contain blank pages or rows, as source data or schema.
Merged cells are giving the top-left value to all the cells in the merged area.
Wizard: add the last 2 steps in one single step.

ING-4075
  • Loading branch information
emanuelaepure10 committed Nov 14, 2023
1 parent 7e35a00 commit a3f4614
Show file tree
Hide file tree
Showing 9 changed files with 95 additions and 52 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ public boolean isValid() {
*/
private void showMessage() {
if (header == null || header.length == 0)
setErrorMessage("The file contains no data");
setErrorMessage("The file contains no data or not valid data");
else if (!sfe.isValid())
setErrorMessage("Please enter a valid Type Name");
else if (!isValid)
Expand Down
7 changes: 0 additions & 7 deletions io/plugins/eu.esdihumboldt.hale.io.xls.ui/plugin.xml
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,6 @@
ref="eu.esdihumboldt.hale.io.xls.reader.schema">
</provider>
</configPage>
<configPage
class="eu.esdihumboldt.hale.io.csv.ui.TypeSelectionPage"
order="0">
<provider
ref="eu.esdihumboldt.hale.io.xls.reader.instance">
</provider>
</configPage>
<configPage
class="eu.esdihumboldt.hale.io.xls.ui.XLSInstanceExportConfigurationPage"
order="0">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,15 @@
import eu.esdihumboldt.hale.io.xls.reader.ReaderSettings;
import eu.esdihumboldt.hale.ui.io.config.AbstractConfigurationPage;
import eu.esdihumboldt.hale.ui.io.instance.InstanceImportWizard;
import eu.esdihumboldt.hale.io.csv.ui.TypeSelectionPage;

/**
* Configuration page for the instance export provider of Excel files
*
* @author Patrick Lieb
*/
public class XLSInstanceImportConfigurationPage
extends AbstractConfigurationPage<InstanceReader, InstanceImportWizard> {

public class XLSInstanceImportConfigurationPage extends TypeSelectionPage {

private static final ALogger log = ALoggerFactory
.getLogger(XLSInstanceImportConfigurationPage.class);
Expand All @@ -52,9 +53,8 @@ public class XLSInstanceImportConfigurationPage
* Default Constructor
*/
public XLSInstanceImportConfigurationPage() {
super("xls.instance.import.sheet.selection");
setTitle("Sheet selection");
setDescription("Select sheet to import instances");
setDescription("Select sheet to import instances, your Type and Data reading setting");
}

/**
Expand All @@ -63,21 +63,16 @@ public XLSInstanceImportConfigurationPage() {
@Override
protected void createContent(Composite page) {

page.setLayout(new GridLayout(1, false));

Composite menu = new Composite(page, SWT.NONE);
menu.setLayout(new GridLayout(2, false));

GridDataFactory.fillDefaults().grab(true, false).applyTo(menu);
page.setLayout(new GridLayout(2, false));

Label sheetLabel = new Label(menu, SWT.None);
Label sheetLabel = new Label(page, SWT.None);
sheetLabel.setText("Select sheet");

sheetSelection = new Combo(menu, SWT.DROP_DOWN | SWT.READ_ONLY);
sheetSelection = new Combo(page, SWT.DROP_DOWN | SWT.READ_ONLY);
GridDataFactory.swtDefaults().align(SWT.FILL, SWT.CENTER).grab(true, false)
.applyTo(sheetSelection);

setPageComplete(false);
super.createContent(page);
}

/**
Expand Down Expand Up @@ -108,7 +103,7 @@ protected void onShowPage(boolean firstShow) {
}
super.onShowPage(firstShow);
sheetSelection.select(0);
setPageComplete(true);
setPageComplete(false);
}

/**
Expand All @@ -118,7 +113,8 @@ protected void onShowPage(boolean firstShow) {
public boolean updateConfiguration(InstanceReader provider) {
provider.setParameter(InstanceTableIOConstants.SHEET_INDEX,
Value.of(sheetSelection.getSelectionIndex()));
return true;

return super.updateConfiguration(provider);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ public void widgetSelected(SelectionEvent event) {
} catch (Exception e) {
setPageComplete(false);
clearSuperPage();
setErrorMessage("The sheet is empty!");
setErrorMessage("The sheet is empty or the header is not valid!");
}

}
Expand All @@ -123,7 +123,7 @@ public void widgetDefaultSelected(SelectionEvent e) {
} catch (Exception e1) {
setPageComplete(false);
clearSuperPage();
setErrorMessage("The sheet is empty!");
setErrorMessage("The sheet is empty or the header is not valid!");
}
}

Expand Down Expand Up @@ -166,11 +166,12 @@ protected void onShowPage(boolean firstShow) {
}
ArrayList<String> items = new ArrayList<String>();
for (int i = 0; i < numberOfSheets; i++) {
items.add(wb.getSheetAt(i).getSheetName());
// only add items if there is a header (no empty sheet)
Row row = wb.getSheetAt(i).getRow(0);
if (row == null && newLocation != null && !newLocation.equals(oldLocation)) {
sheetNum++;
items.add(wb.getSheetAt(i).getSheetName());
if (row != null) {
update(i);
sheetNum = i;
}
}

Expand All @@ -190,20 +191,25 @@ protected void onShowPage(boolean firstShow) {

} catch (OldExcelFormatException e) {
// the setup is not in a valid state
clearPage();
clearSuperPage();
setErrorMessage(
"Old excel format detected (format 5.0/7.0 (BIFF5)). Please convert the excel file to BIFF8 from Excel versions 97/2000/XP/2003.");
setPageComplete(false);
clearFromException();
} catch (Exception e) {
log.error("Error loading Excel file", e);
clearPage();
clearSuperPage();
setErrorMessage("Excel file cannot be loaded!");
setPageComplete(false);
clearFromException();
}
}

/**
* clear page and super page
*/
private void clearFromException() {
clearPage();
clearSuperPage();
setPageComplete(false);
}

/**
* Use this if an error occurs
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Import-Package: com.orientechnologies.orient.core.db.record;version="1.5.1";reso
org.apache.poi.openxml4j.opc;version="5.2.3",
org.apache.poi.poifs.filesystem;version="5.2.3",
org.apache.poi.ss.usermodel;version="5.2.3",
org.apache.poi.ss.util;version="5.2.3",
org.apache.poi.xssf.usermodel;version="5.2.3",
org.springframework.core.convert;version="5.2.0",
org.springframework.core.convert.support;version="5.2.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,19 @@ public static Workbook loadWorkbook(InputStream input, URI location, boolean xls
*/
protected void analyseHeader(Sheet sheet) {
Row header = sheet.getRow(0);

// identify columns
for (int i = header.getFirstCellNum(); i < header.getLastCellNum(); i++) {
Cell cell = header.getCell(i);
String text = extractText(cell);

headerCell(i, text);
if (header != null) {

// identify columns
int count = 0;
for (int i = header.getFirstCellNum(); i < header.getLastCellNum(); i++) {
Cell cell = header.getCell(i);
String text = extractText(cell, sheet);
// cell cannot be empty to extract the text
if (text != null) {
headerCell(count, text);
count++;
}
}
}
}

Expand All @@ -138,7 +144,9 @@ private void analyseContent(Sheet sheet) {
// for each row starting from the second
for (int i = 1; i <= sheet.getLastRowNum(); i++) {
Row row = sheet.getRow(i);
analyseRow(i, row);
if (row != null) {
analyseRow(i, row, sheet);
}
}
}

Expand All @@ -148,8 +156,9 @@ private void analyseContent(Sheet sheet) {
* @param num the row number (starting from one as the header row is handled
* separately)
* @param row the table row
* @param sheet the sheet
*/
protected abstract void analyseRow(int num, Row row);
protected abstract void analyseRow(int num, Row row, Sheet sheet);

/**
* Extract the text from a given cell. Formulas are evaluated, for blank or
Expand All @@ -158,8 +167,8 @@ private void analyseContent(Sheet sheet) {
* @param cell the cell
* @return the cell text
*/
protected String extractText(Cell cell) {
return XLSUtil.extractText(cell, evaluator);
protected String extractText(Cell cell, Sheet sheet) {
return XLSUtil.extractText(cell, evaluator, sheet);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.util.Map;

import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;

import eu.esdihumboldt.hale.common.core.io.supplier.LocatableInputSupplier;

Expand Down Expand Up @@ -60,8 +61,9 @@ public AnalyseXLSSchemaTable(LocatableInputSupplier<? extends InputStream> sourc
*/
@Override
protected void headerCell(int num, String text) {
if (num == header.size())
if (num == header.size()) {
header.add(text);
}
header.set(num, text);
}

Expand All @@ -70,12 +72,14 @@ protected void headerCell(int num, String text) {
* org.apache.poi.ss.usermodel.Row)
*/
@Override
protected void analyseRow(int num, Row row) {
protected void analyseRow(int num, Row row, Sheet sheet) {
List<String> rowContent = new ArrayList<String>();
for (int i = 0; i < row.getLastCellNum(); i++) {
rowContent.add(extractText(row.getCell(i)));
rowContent.add(extractText(row.getCell(i), sheet));
}
if (!rowContent.isEmpty() && rowContent.stream().anyMatch(text -> text != null)) {
rows.put(num, rowContent);
}
rows.put(num, rowContent);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.CellValue;
import org.apache.poi.ss.usermodel.FormulaEvaluator;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.util.CellRangeAddress;

/**
* General utilities when working with Excel files.
Expand All @@ -35,10 +38,19 @@ public class XLSUtil {
* @param evaluator the formula evaluator
* @return the cell text
*/
public static String extractText(Cell cell, FormulaEvaluator evaluator) {
public static String extractText(Cell cell, FormulaEvaluator evaluator, Sheet sheet) {
if (cell == null)
return null;

if (isCellPartOfMergedRegion(cell, sheet)) {
// Get the merged region
CellRangeAddress mergedRegion = getMergedRegion(cell, sheet);

// Get the first cell of the merged region (top-left cell)
Row mergedRow = sheet.getRow(mergedRegion.getFirstRow());
cell = mergedRow.getCell(mergedRegion.getFirstColumn());
}

if (cell.getCellType() == CellType.BLANK) {
// do this check here as the evaluator seems to return null on a
// blank
Expand Down Expand Up @@ -73,4 +85,24 @@ else if (CellType.STRING.equals(value.getCellType())) {
}
}

private static boolean isCellPartOfMergedRegion(Cell cell, Sheet sheet) {
for (int i = 0; i < sheet.getNumMergedRegions(); i++) {
CellRangeAddress region = sheet.getMergedRegion(i);
if (region.isInRange(cell.getRowIndex(), cell.getColumnIndex())) {
return true;
}
}
return false;
}

private static CellRangeAddress getMergedRegion(Cell cell, Sheet sheet) {
for (int i = 0; i < sheet.getNumMergedRegions(); i++) {
CellRangeAddress region = sheet.getMergedRegion(i);
if (region.isInRange(cell.getRowIndex(), cell.getColumnIndex())) {
return region;
}
}
return null;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,11 @@ public Map<Value, Value> read(Workbook workbook, boolean skipFirst, int keyColum
for (; row < sheet.getPhysicalNumberOfRows(); row++) {
Row currentRow = sheet.getRow(row);
if (currentRow != null) {
String value = XLSUtil.extractText(currentRow.getCell(valueColumn), evaluator);
String value = XLSUtil.extractText(currentRow.getCell(valueColumn), evaluator,
sheet);
if (value != null && (!ignoreEmptyStrings || !value.isEmpty())) {
map.put(Value.of(XLSUtil.extractText(currentRow.getCell(keyColumn), evaluator)),
map.put(Value.of(
XLSUtil.extractText(currentRow.getCell(keyColumn), evaluator, sheet)),
Value.of(value));
}
}
Expand Down

0 comments on commit a3f4614

Please sign in to comment.