Skip to content

Commit

Permalink
Update table creation
Browse files Browse the repository at this point in the history
  • Loading branch information
xavctn committed Nov 22, 2023
1 parent d866faa commit edc9c80
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 6 deletions.
5 changes: 4 additions & 1 deletion src/img2table/document/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,10 @@ def get_table_content(self, tables: Dict[int, List["Table"]], ocr: "OCRInstance"
# Reset OCR
self.ocr_df = None

return {k: [tb.extracted_table for tb in v] for k, v in tables.items()}
return {k: [tb.extracted_table for tb in v
if (max(tb.nb_rows, tb.nb_columns) >= 2 and not tb._borderless)
or (tb.nb_rows >= 2 and tb.nb_columns >= 3)]
for k, v in tables.items()}

def extract_tables(self, ocr: "OCRInstance" = None, implicit_rows: bool = False, borderless_tables: bool = False,
min_confidence: int = 50) -> Dict[int, List[ExtractedTable]]:
Expand Down
3 changes: 2 additions & 1 deletion src/img2table/tables/objects/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@


class Table(TableObject):
def __init__(self, rows: Union[Row, List[Row]]):
def __init__(self, rows: Union[Row, List[Row]], borderless: bool = False):
if rows is None:
self._items = []
elif isinstance(rows, Row):
self._items = [rows]
else:
self._items = rows
self._title = None
self._borderless = borderless

@property
def items(self) -> List[Row]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,12 @@ def remove_unwanted_elements(table: Table, elements: List[Cell]) -> Table:
return table


def cluster_to_table(cluster_cells: List[Cell], elements: List[Cell]) -> Table:
def cluster_to_table(cluster_cells: List[Cell], elements: List[Cell], borderless: bool = False) -> Table:
"""
Convert a cell cluster to a Table object
:param cluster_cells: list of cells that form a table
:param elements: list of image elements
:param borderless: boolean indicating if the created table is borderless
:return: table with rows inferred from table cells
"""
# Get list of vertical delimiters
Expand Down Expand Up @@ -126,7 +127,7 @@ def cluster_to_table(cluster_cells: List[Cell], elements: List[Cell]) -> Table:
list_rows.append(Row(cells=list_cells))

# Create table
table = Table(rows=list_rows)
table = Table(rows=list_rows, borderless=borderless)

# Remove empty/unnecessary rows and columns from the table, based on elements
processed_table = remove_unwanted_elements(table=table, elements=elements)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,6 @@ def get_table(columns: DelimiterGroup, table_rows: List[TableRow], contours: Lis
list_cells.append(cell)

# Create table object
table = cluster_to_table(cluster_cells=list_cells, elements=contours)
table = cluster_to_table(cluster_cells=list_cells, elements=contours, borderless=True)

return table if table.nb_columns >= 3 and table.nb_rows >= 3 else None
return table if table.nb_columns >= 3 and table.nb_rows >= 2 else None

0 comments on commit edc9c80

Please sign in to comment.