Skip to content

Commit

Permalink
test(wired_table_rec): Add issue #13 unit testing
Browse files Browse the repository at this point in the history
  • Loading branch information
SWHL committed Jun 14, 2024
1 parent 8f55dfe commit 21f8ac9
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 25 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/wired_table_rec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
- name: Unit testings
run: |
pip install -r requirements.txt
pip install pytest
pip install pytest beautifulsoup4
wget https://github.com/RapidAI/TableStructureRec/releases/download/v0.0.0/wired_table_rec_models.zip
unzip wired_table_rec_models.zip
Expand Down
4 changes: 2 additions & 2 deletions demo_wired.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
# @Author: SWHL
# @Contact: liekkaskono@163.com
from pathlib import Path
from wired_table_rec import WiredTableRecognition

from wired_table_rec import WiredTableRecognition

table_rec = WiredTableRecognition()

img_path = "tests/test_files/wired/row_span.png"
img_path = "tests/test_files/wired/squeeze_error.jpeg"
table_str, elapse = table_rec(img_path)
print(table_str)
print(elapse)
Expand Down
Binary file added tests/test_files/wired/squeeze_error.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
40 changes: 20 additions & 20 deletions tests/test_wired.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pathlib import Path

import pytest
from bs4 import BeautifulSoup
from rapidocr_onnxruntime import RapidOCR

cur_dir = Path(__file__).resolve().parent
Expand All @@ -20,36 +21,35 @@
ocr_engine = RapidOCR()


@pytest.mark.parametrize(
"img_path, gt1, gt2",
[
("table_recognition.jpg", 1245, "d colsp"),
("table2.jpg", 924, "td><td "),
("row_span.png", 312, "></td><"),
],
)
def test_input_normal(img_path, gt1, gt2):
img_path = test_file_dir / img_path
def get_td_nums(html: str) -> int:
soup = BeautifulSoup(html, "html.parser")
tds = soup.table.find_all("td")
return len(tds)


def test_squeeze_bug():
img_path = test_file_dir / "squeeze_error.jpeg"
ocr_result, _ = ocr_engine(img_path)
table_str, _ = table_recog(str(img_path), ocr_result)

assert len(table_str) >= gt1
assert table_str[-53:-46] == gt2
td_nums = get_td_nums(table_str)
assert td_nums == 153


@pytest.mark.parametrize(
"img_path, gt1, gt2",
"img_path, gt_td_nums, gt2",
[
("table_recognition.jpg", 1245, "d colsp"),
("table2.jpg", 924, "td><td "),
("row_span.png", 311, "></td><"),
("table_recognition.jpg", 35, "d colsp"),
("table2.jpg", 22, "td><td "),
("row_span.png", 17, "></td><"),
],
)
def test_input_without_ocr(img_path, gt1, gt2):
def test_input_normal(img_path, gt_td_nums, gt2):
img_path = test_file_dir / img_path

table_str, _ = table_recog(str(img_path))
ocr_result, _ = ocr_engine(img_path)
table_str, _ = table_recog(str(img_path), ocr_result)
td_nums = get_td_nums(table_str)

assert len(table_str) >= gt1
assert td_nums == gt_td_nums
assert table_str[-53:-46] == gt2

3 changes: 1 addition & 2 deletions wired_table_rec/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,7 @@ def __call__(
except Exception:
logging.warning(traceback.format_exc())
return "", 0.0
else:
return table_str, elapse
return table_str, elapse


def main():
Expand Down

0 comments on commit 21f8ac9

Please sign in to comment.