Skip to content

Commit

Permalink
feat: Support yolov8n publaynet and general layout model
Browse files Browse the repository at this point in the history
  • Loading branch information
SWHL committed Jun 29, 2024
1 parent 1f26070 commit a4a3440
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 48 deletions.
50 changes: 26 additions & 24 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@
|`pp_layout_table`| 表格 | `layout_table.onnx` |`["table"]` |
| `pp_layout_publaynet`| 英文 | `layout_publaynet.onnx` |`["text", "title", "list", "table", "figure"]` |
| `pp_layout_cdla`| 中文 | `layout_cdla.onnx` | `['text', 'title', 'figure', 'figure_caption', 'table', 'table_caption', 'header', 'footer', 'reference', 'equation']` |
| `yolov8n_layout_paper`| 论文 | `yolov8n_layout_paper.onnx` | `['text', 'title', 'figure', 'figure_caption', 'table', 'table_caption', 'header', 'footer', 'reference', 'equation']` |
| `yolov8n_layout_report`| 研报 | `yolov8n_layout_report.onnx` | `['text', 'title', 'header', 'footer', 'figure', 'figure_caption', 'table', 'table_caption', 'toc']` |
| `yolov8n_layout_paper`| 论文 | `yolov8n_layout_paper.onnx` | `['Text', 'Title', 'Header', 'Footer', 'Figure', 'Table', 'Toc', 'Figure caption', 'Table caption']` |
| `yolov8n_layout_report`| 研报 | `yolov8n_layout_report.onnx` | `['Text', 'Title', 'Header', 'Footer', 'Figure', 'Table', 'Toc', 'Figure caption', 'Table caption']` |
| `yolov8n_layout_publaynet`| 英文 | `yolov8n_layout_publaynet.onnx` | `["Text", "Title", "List", "Table", "Figure"]` |
| `yolov8n_layout_general6`| 通用 | `yolov8n_layout_general6.onnx` | `["Text", "Title", "Figure", "Table", "Caption", "Equation"]` |

PP模型来源:[PaddleOCR 版面分析](https://github.com/PaddlePaddle/PaddleOCR/blob/133d67f27dc8a241d6b2e30a9f047a0fb75bebbe/ppstructure/layout/README_ch.md)

Expand Down Expand Up @@ -58,28 +60,28 @@ if ploted_img is not None:
```

#### 终端运行
- 用法:
```bash
$ rapid_layout -h
usage: rapid_layout [-h] -img IMG_PATH [-m {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report}]
[--conf_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report}]
[--iou_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report}] [--use_cuda] [--use_dml]
[-v]

options:
-h, --help show this help message and exit
-img IMG_PATH, --img_path IMG_PATH
Path to image for layout.
-m {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report}, --model_type {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report}
Support model type
--conf_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report}
Box threshold, the range is [0, 1]
--iou_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report}
IoU threshold, the range is [0, 1]
--use_cuda Whether to use cuda.
--use_dml Whether to use DirectML, which only works in Windows10+.
-v, --vis Wheter to visualize the layout results.
```
```bash
$ rapid_layout -h
usage: rapid_layout [-h] -img IMG_PATH
[-m {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report,yolov8n_layout_publaynet,yolov8n_layout_general6}]
[--conf_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report,yolov8n_layout_publaynet,yolov8n_layout_general6}]
[--iou_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report,yolov8n_layout_publaynet,yolov8n_layout_general6}]
[--use_cuda] [--use_dml] [-v]

options:
-h, --help show this help message and exit
-img IMG_PATH, --img_path IMG_PATH
Path to image for layout.
-m {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report,yolov8n_layout_publaynet,yolov8n_layout_general6}, --model_type {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report,yolov8n_layout_publaynet,yolov8n_layout_general6}
Support model type
--conf_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report,yolov8n_layout_publaynet,yolov8n_layout_general6}
Box threshold, the range is [0, 1]
--iou_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report,yolov8n_layout_publaynet,yolov8n_layout_general6}
IoU threshold, the range is [0, 1]
--use_cuda Whether to use cuda.
--use_dml Whether to use DirectML, which only works in Windows10+.
-v, --vis Wheter to visualize the layout results.
```
- 示例:
```bash
$ rapid_layout -v -img test_images/layout.png
Expand Down
15 changes: 9 additions & 6 deletions rapid_layout/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
"pp_layout_table": f"{ROOT_URL}/layout_table.onnx",
"yolov8n_layout_paper": f"{ROOT_URL}/yolov8n_layout_paper.onnx",
"yolov8n_layout_report": f"{ROOT_URL}/yolov8n_layout_report.onnx",
"yolov8n_layout_publaynet": f"{ROOT_URL}/yolov8n_layout_publaynet.onnx",
"yolov8n_layout_general6": f"{ROOT_URL}/yolov8n_layout_general6.onnx",
}
DEFAULT_MODEL_PATH = str(ROOT_DIR / "models" / "layout_cdla.onnx")

Expand Down Expand Up @@ -72,12 +74,10 @@ def __init__(

self.load_img = LoadImage()

self.pp_layout_type = [
"pp_layout_cdla",
"pp_layout_publaynet",
"pp_layout_table",
self.pp_layout_type = [k for k in KEY_TO_MODEL_URL if k.startswith("pp")]
self.yolov8_layout_type = [
k for k in KEY_TO_MODEL_URL if k.startswith("yolov8n")
]
self.yolov8_layout_type = ["yolov8n_layout_paper", "yolov8n_layout_report"]

def __call__(
self, img_content: Union[str, np.ndarray, bytes, Path]
Expand All @@ -104,12 +104,15 @@ def pp_layout(self, img: np.ndarray, ori_img_shape: Tuple[int, int]):
return boxes, scores, class_names, elapse

def yolov8_layout(self, img: np.ndarray, ori_img_shape: Tuple[int, int]):
s_time = time.time()

input_tensor = self.yolov8_preprocess(img)
outputs = self.session(input_tensor)
boxes, scores, class_names = self.yolov8_postprocess(
outputs, ori_img_shape, self.yolov8_input_shape
)
return boxes, scores, class_names
elapse = time.time() - s_time
return boxes, scores, class_names, elapse

@staticmethod
def get_model_path(model_type: str, model_path: Union[str, Path, None]) -> str:
Expand Down
3 changes: 2 additions & 1 deletion rapid_layout/utils/load_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ def __call__(self, img: InputType) -> np.ndarray:

origin_img_type = type(img)
img = self.load_img(img)
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
if img.ndim == 3:
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
img = self.convert_img(img, origin_img_type)
return img

Expand Down
Binary file added tests/test_files/PMC3576793_00004.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
44 changes: 27 additions & 17 deletions tests/test_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,14 @@
img = cv2.imread(str(img_path))


def test_iou_outside_thres():
with pytest.raises(ValueError) as exc:
engine = RapidLayout(iou_thres=1.2)
assert exc.type is ValueError


def test_conf_outside_thres():
with pytest.raises(ValueError) as exc:
engine = RapidLayout(conf_thres=1.2)
assert exc.type is ValueError


def test_empty():
with pytest.raises(LoadImageError) as exc:
engine = RapidLayout()
engine(None)
assert exc.type is LoadImageError
@pytest.mark.parametrize(
"model_type,gt", [("yolov8n_layout_publaynet", 12), ("yolov8n_layout_general6", 13)]
)
def test_yolov8n_layout(model_type, gt):
img_path = test_file_dir / "PMC3576793_00004.jpg"
engine = RapidLayout(model_type=model_type)
boxes, scores, class_names, *elapse = engine(img_path)
assert len(boxes) == gt


@pytest.mark.parametrize(
Expand All @@ -56,3 +47,22 @@ def test_yolov8_layout(img_content):
engine = RapidLayout(model_type="yolov8n_layout_paper")
boxes, scores, class_names, *elapse = engine(img_content)
assert len(boxes) == 11


def test_iou_outside_thres():
with pytest.raises(ValueError) as exc:
engine = RapidLayout(iou_thres=1.2)
assert exc.type is ValueError


def test_conf_outside_thres():
with pytest.raises(ValueError) as exc:
engine = RapidLayout(conf_thres=1.2)
assert exc.type is ValueError


def test_empty():
with pytest.raises(LoadImageError) as exc:
engine = RapidLayout()
engine(None)
assert exc.type is LoadImageError

0 comments on commit a4a3440

Please sign in to comment.