diff --git a/README.md b/README.md index c0e655e..1bbc40e 100644 --- a/README.md +++ b/README.md @@ -24,8 +24,10 @@ |`pp_layout_table`| 表格 | `layout_table.onnx` |`["table"]` | | `pp_layout_publaynet`| 英文 | `layout_publaynet.onnx` |`["text", "title", "list", "table", "figure"]` | | `pp_layout_cdla`| 中文 | `layout_cdla.onnx` | `['text', 'title', 'figure', 'figure_caption', 'table', 'table_caption', 'header', 'footer', 'reference', 'equation']` | -| `yolov8n_layout_paper`| 论文 | `yolov8n_layout_paper.onnx` | `['text', 'title', 'figure', 'figure_caption', 'table', 'table_caption', 'header', 'footer', 'reference', 'equation']` | -| `yolov8n_layout_report`| 研报 | `yolov8n_layout_report.onnx` | `['text', 'title', 'header', 'footer', 'figure', 'figure_caption', 'table', 'table_caption', 'toc']` | +| `yolov8n_layout_paper`| 论文 | `yolov8n_layout_paper.onnx` | `['Text', 'Title', 'Header', 'Footer', 'Figure', 'Table', 'Toc', 'Figure caption', 'Table caption']` | +| `yolov8n_layout_report`| 研报 | `yolov8n_layout_report.onnx` | `['Text', 'Title', 'Header', 'Footer', 'Figure', 'Table', 'Toc', 'Figure caption', 'Table caption']` | +| `yolov8n_layout_publaynet`| 英文 | `yolov8n_layout_publaynet.onnx` | `["Text", "Title", "List", "Table", "Figure"]` | +| `yolov8n_layout_general6`| 通用 | `yolov8n_layout_general6.onnx` | `["Text", "Title", "Figure", "Table", "Caption", "Equation"]` | PP模型来源:[PaddleOCR 版面分析](https://github.com/PaddlePaddle/PaddleOCR/blob/133d67f27dc8a241d6b2e30a9f047a0fb75bebbe/ppstructure/layout/README_ch.md) @@ -58,28 +60,28 @@ if ploted_img is not None: ``` #### 终端运行 -- 用法: - ```bash - $ rapid_layout -h - usage: rapid_layout [-h] -img IMG_PATH [-m {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report}] - [--conf_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report}] - [--iou_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report}] [--use_cuda] [--use_dml] - [-v] - - options: - -h, --help show this help message and exit - -img IMG_PATH, --img_path IMG_PATH - Path to image for layout. - -m {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report}, --model_type {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report} - Support model type - --conf_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report} - Box threshold, the range is [0, 1] - --iou_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report} - IoU threshold, the range is [0, 1] - --use_cuda Whether to use cuda. - --use_dml Whether to use DirectML, which only works in Windows10+. - -v, --vis Wheter to visualize the layout results. - ``` +```bash +$ rapid_layout -h +usage: rapid_layout [-h] -img IMG_PATH + [-m {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report,yolov8n_layout_publaynet,yolov8n_layout_general6}] + [--conf_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report,yolov8n_layout_publaynet,yolov8n_layout_general6}] + [--iou_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report,yolov8n_layout_publaynet,yolov8n_layout_general6}] + [--use_cuda] [--use_dml] [-v] + +options: + -h, --help show this help message and exit + -img IMG_PATH, --img_path IMG_PATH + Path to image for layout. + -m {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report,yolov8n_layout_publaynet,yolov8n_layout_general6}, --model_type {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report,yolov8n_layout_publaynet,yolov8n_layout_general6} + Support model type + --conf_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report,yolov8n_layout_publaynet,yolov8n_layout_general6} + Box threshold, the range is [0, 1] + --iou_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report,yolov8n_layout_publaynet,yolov8n_layout_general6} + IoU threshold, the range is [0, 1] + --use_cuda Whether to use cuda. + --use_dml Whether to use DirectML, which only works in Windows10+. + -v, --vis Wheter to visualize the layout results. +``` - 示例: ```bash $ rapid_layout -v -img test_images/layout.png diff --git a/rapid_layout/main.py b/rapid_layout/main.py index 4c618d3..c17f06a 100644 --- a/rapid_layout/main.py +++ b/rapid_layout/main.py @@ -31,6 +31,8 @@ "pp_layout_table": f"{ROOT_URL}/layout_table.onnx", "yolov8n_layout_paper": f"{ROOT_URL}/yolov8n_layout_paper.onnx", "yolov8n_layout_report": f"{ROOT_URL}/yolov8n_layout_report.onnx", + "yolov8n_layout_publaynet": f"{ROOT_URL}/yolov8n_layout_publaynet.onnx", + "yolov8n_layout_general6": f"{ROOT_URL}/yolov8n_layout_general6.onnx", } DEFAULT_MODEL_PATH = str(ROOT_DIR / "models" / "layout_cdla.onnx") @@ -72,12 +74,10 @@ def __init__( self.load_img = LoadImage() - self.pp_layout_type = [ - "pp_layout_cdla", - "pp_layout_publaynet", - "pp_layout_table", + self.pp_layout_type = [k for k in KEY_TO_MODEL_URL if k.startswith("pp")] + self.yolov8_layout_type = [ + k for k in KEY_TO_MODEL_URL if k.startswith("yolov8n") ] - self.yolov8_layout_type = ["yolov8n_layout_paper", "yolov8n_layout_report"] def __call__( self, img_content: Union[str, np.ndarray, bytes, Path] @@ -104,12 +104,15 @@ def pp_layout(self, img: np.ndarray, ori_img_shape: Tuple[int, int]): return boxes, scores, class_names, elapse def yolov8_layout(self, img: np.ndarray, ori_img_shape: Tuple[int, int]): + s_time = time.time() + input_tensor = self.yolov8_preprocess(img) outputs = self.session(input_tensor) boxes, scores, class_names = self.yolov8_postprocess( outputs, ori_img_shape, self.yolov8_input_shape ) - return boxes, scores, class_names + elapse = time.time() - s_time + return boxes, scores, class_names, elapse @staticmethod def get_model_path(model_type: str, model_path: Union[str, Path, None]) -> str: diff --git a/rapid_layout/utils/load_image.py b/rapid_layout/utils/load_image.py index 04c49e0..d610e42 100644 --- a/rapid_layout/utils/load_image.py +++ b/rapid_layout/utils/load_image.py @@ -25,7 +25,8 @@ def __call__(self, img: InputType) -> np.ndarray: origin_img_type = type(img) img = self.load_img(img) - img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) + if img.ndim == 3: + img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) img = self.convert_img(img, origin_img_type) return img diff --git a/tests/test_files/PMC3576793_00004.jpg b/tests/test_files/PMC3576793_00004.jpg new file mode 100644 index 0000000..023ac06 Binary files /dev/null and b/tests/test_files/PMC3576793_00004.jpg differ diff --git a/tests/test_layout.py b/tests/test_layout.py index 78b639a..f7f507e 100644 --- a/tests/test_layout.py +++ b/tests/test_layout.py @@ -21,23 +21,14 @@ img = cv2.imread(str(img_path)) -def test_iou_outside_thres(): - with pytest.raises(ValueError) as exc: - engine = RapidLayout(iou_thres=1.2) - assert exc.type is ValueError - - -def test_conf_outside_thres(): - with pytest.raises(ValueError) as exc: - engine = RapidLayout(conf_thres=1.2) - assert exc.type is ValueError - - -def test_empty(): - with pytest.raises(LoadImageError) as exc: - engine = RapidLayout() - engine(None) - assert exc.type is LoadImageError +@pytest.mark.parametrize( + "model_type,gt", [("yolov8n_layout_publaynet", 12), ("yolov8n_layout_general6", 13)] +) +def test_yolov8n_layout(model_type, gt): + img_path = test_file_dir / "PMC3576793_00004.jpg" + engine = RapidLayout(model_type=model_type) + boxes, scores, class_names, *elapse = engine(img_path) + assert len(boxes) == gt @pytest.mark.parametrize( @@ -56,3 +47,22 @@ def test_yolov8_layout(img_content): engine = RapidLayout(model_type="yolov8n_layout_paper") boxes, scores, class_names, *elapse = engine(img_content) assert len(boxes) == 11 + + +def test_iou_outside_thres(): + with pytest.raises(ValueError) as exc: + engine = RapidLayout(iou_thres=1.2) + assert exc.type is ValueError + + +def test_conf_outside_thres(): + with pytest.raises(ValueError) as exc: + engine = RapidLayout(conf_thres=1.2) + assert exc.type is ValueError + + +def test_empty(): + with pytest.raises(LoadImageError) as exc: + engine = RapidLayout() + engine(None) + assert exc.type is LoadImageError