From 60e5df5bb9bc7752185c0b1e1b612ba86fa489d7 Mon Sep 17 00:00:00 2001 From: Joker1212 <519548295@qq.com> Date: Mon, 30 Sep 2024 15:27:30 +0800 Subject: [PATCH 1/3] fix: use gray img for table cls --- table_cls/main.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/table_cls/main.py b/table_cls/main.py index 9209e2a..9ad7820 100644 --- a/table_cls/main.py +++ b/table_cls/main.py @@ -1,6 +1,7 @@ import time from pathlib import Path +import cv2 import numpy as np import onnxruntime from PIL import Image @@ -39,8 +40,10 @@ def _preprocess(self, image): def __call__(self, content: InputType): ss = time.perf_counter() img = self.load_img(content) - img = self._preprocess(img) - output = self.table_cls.run(None, {"input": img}) + gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + gray_img = np.stack((gray_img,) * 3, axis=-1) + gray_img = self._preprocess(gray_img) + output = self.table_cls.run(None, {"input": gray_img}) predict = np.exp(output[0] - np.max(output[0], axis=1, keepdims=True)) predict /= np.sum(predict, axis=1, keepdims=True) predict_cla = np.argmax(predict, axis=1)[0] From 89d6196994b990ad98b540ab601505de4381ec83 Mon Sep 17 00:00:00 2001 From: Joker1212 <519548295@qq.com> Date: Mon, 30 Sep 2024 15:28:50 +0800 Subject: [PATCH 2/3] chore: remove wired_table_v2 workflow --- .github/workflows/wired_table_rec.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/wired_table_rec.yml b/.github/workflows/wired_table_rec.yml index 1c5f5a2..fc65e1b 100644 --- a/.github/workflows/wired_table_rec.yml +++ b/.github/workflows/wired_table_rec.yml @@ -33,10 +33,6 @@ jobs: unzip wired_table_rec_models.zip mv wired_table_rec_models/*.onnx wired_table_rec/models/ - wget https://github.com/RapidAI/TableStructureRec/releases/download/v0.0.0/wired_table_rec_modelsV2.zip - unzip wired_table_rec_modelsV2.zip - mv cycle_center_net_v2.onnx wired_table_rec/models/ - pytest tests/test_wired_table_rec.py GenerateWHL_PushPyPi: From a6c30a2e205a0cc6f89d0f55969d3c13029f8b22 Mon Sep 17 00:00:00 2001 From: Joker1212 <519548295@qq.com> Date: Mon, 30 Sep 2024 17:32:27 +0800 Subject: [PATCH 3/3] chore: add online demo --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 9deafa6..b0ff3fb 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,9 @@ - **2024.9.26** - 修正RapidTable默认英文模型导致的测评结果错误。 - 补充测评数据集,补充开源社区更多模型的测评结果 - +- **2024.9.30** + - 优化表格类型判断,增加在线演示 + ### 简介 💖该仓库是用来对文档中表格做结构化识别的推理库,包括来自paddle的表格识别模型, 阿里读光有线和无线表格识别模型,llaipython(微信)贡献的有线表格模型,网易Qanything内置表格分类模型等。 @@ -29,6 +31,9 @@ 🛡️ **稳**: 不依赖任何第三方训练框架,只依赖必要基础库,避免包冲突 +### 在线演示 +[modelscope魔塔](https://www.modelscope.cn/studios/jockerK/TableRec) +[huggingface](https://huggingface.co/spaces/Joker1212/TableDetAndRec) ### 效果展示