From 1698ae5c4f8b0227a126b8f50d60cc2c04f283da Mon Sep 17 00:00:00 2001 From: Carbo Kuo Date: Mon, 29 Jul 2024 23:27:20 -0400 Subject: [PATCH 1/5] Use Python 3. From debian patch: https://salsa.debian.org/debian/opencc/-/blob/master/debian/patches/0003-data-Explicitly-use-python3.patch?ref_type=heads --- data/scripts/find_target.py | 2 +- data/scripts/merge.py | 2 +- data/scripts/reverse.py | 2 +- data/scripts/sort.py | 2 +- data/scripts/sort_all.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/data/scripts/find_target.py b/data/scripts/find_target.py index 4244949c3..7b2b4771e 100755 --- a/data/scripts/find_target.py +++ b/data/scripts/find_target.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- import sys diff --git a/data/scripts/merge.py b/data/scripts/merge.py index 680c90e86..01fcf511e 100755 --- a/data/scripts/merge.py +++ b/data/scripts/merge.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- import codecs diff --git a/data/scripts/reverse.py b/data/scripts/reverse.py index 746f887d3..a87ae4674 100755 --- a/data/scripts/reverse.py +++ b/data/scripts/reverse.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- import sys diff --git a/data/scripts/sort.py b/data/scripts/sort.py index f7d4dc569..6ad6e8cb9 100755 --- a/data/scripts/sort.py +++ b/data/scripts/sort.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- import sys diff --git a/data/scripts/sort_all.py b/data/scripts/sort_all.py index bd57ae735..c742413ed 100755 --- a/data/scripts/sort_all.py +++ b/data/scripts/sort_all.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- import glob import sys From b2c912bdcb4ae4bcc4d86f221801c8953f6f5bad Mon Sep 17 00:00:00 2001 From: Carbo Kuo Date: Tue, 30 Jul 2024 01:09:00 -0400 Subject: [PATCH 2/5] bazel python --- .github/workflows/bazel.yml | 2 +- MODULE.bazel | 14 ++++++++++++++ MODULE.bazel.lock | 31 ++++++++++++++++++++++++++++++ python/opencc/BUILD.bazel | 10 ++++++++++ python/opencc/__init__.py | 17 ++++++++++++---- python/tests/BUILD.bazel | 19 ++++++++++++++++++ python/tests/requirements_lock.txt | 5 +++++ python/tests/test_opencc.py | 7 +++++++ src/BUILD.bazel | 14 ++++++++++++++ 9 files changed, 114 insertions(+), 5 deletions(-) create mode 100644 python/opencc/BUILD.bazel create mode 100644 python/tests/BUILD.bazel create mode 100644 python/tests/requirements_lock.txt diff --git a/.github/workflows/bazel.yml b/.github/workflows/bazel.yml index f9ee8580e..4c45a9d33 100644 --- a/.github/workflows/bazel.yml +++ b/.github/workflows/bazel.yml @@ -17,4 +17,4 @@ jobs: - uses: actions/checkout@v4 - uses: bazelbuild/setup-bazelisk@v3 - run: bazel build //:opencc - - run: bazel test --test_output=all //src/... //data/... //test/... + - run: bazel test --test_output=all //src/... //data/... //test/... //python/... diff --git a/MODULE.bazel b/MODULE.bazel index 743d74efe..683aff803 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -9,7 +9,21 @@ module( bazel_dep(name = "darts-clone", version = "0.32") bazel_dep(name = "googletest", version = "1.15.0", dev_dependency = True) bazel_dep(name = "marisa-trie", version = "0.2.6") +bazel_dep(name = "pybind11_bazel", version = "2.12.0") bazel_dep(name = "rapidjson", version = "1.1.0") bazel_dep(name = "rules_cc", version = "0.0.9") bazel_dep(name = "rules_python", version = "0.34.0") bazel_dep(name = "tclap", version = "1.2.5") + + +python = use_extension("@rules_python//python/extensions:python.bzl", "python") +python.toolchain( + python_version = "3.12", +) +pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip") +pip.parse( + hub_name = "pip", + python_version = "3.12", + requirements_lock = "//python/tests:requirements_lock.txt", +) +use_repo(pip, "pip") diff --git a/MODULE.bazel.lock b/MODULE.bazel.lock index cbb5f6a0f..e794d2d28 100644 --- a/MODULE.bazel.lock +++ b/MODULE.bazel.lock @@ -152,6 +152,37 @@ }, "recordedRepoMappingEntries": [] } + }, + "@@pybind11_bazel~//:internal_configure.bzl%internal_configure_extension": { + "general": { + "bzlTransitiveDigest": "+F47SE20NlARCHVGbd4r7kkjg4OA0eCJcOd5fqKq4fQ=", + "usagesDigest": "iH2lKTfsNEpn2MqtGpBNwJrxbb2C7DiYmh/XuKgDtr8=", + "recordedFileInputs": { + "@@pybind11_bazel~//MODULE.bazel": "e6f4c20442eaa7c90d7190d8dc539d0ab422f95c65a57cc59562170c58ae3d34" + }, + "recordedDirentsInputs": {}, + "envVariables": {}, + "generatedRepoSpecs": { + "pybind11": { + "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl", + "ruleClassName": "http_archive", + "attributes": { + "build_file": "@@pybind11_bazel~//:pybind11-BUILD.bazel", + "strip_prefix": "pybind11-2.12.0", + "urls": [ + "https://github.com/pybind/pybind11/archive/v2.12.0.zip" + ] + } + } + }, + "recordedRepoMappingEntries": [ + [ + "pybind11_bazel~", + "bazel_tools", + "bazel_tools" + ] + ] + } } } } diff --git a/python/opencc/BUILD.bazel b/python/opencc/BUILD.bazel new file mode 100644 index 000000000..2cfccfb4c --- /dev/null +++ b/python/opencc/BUILD.bazel @@ -0,0 +1,10 @@ +load("@rules_python//python:py_library.bzl", "py_library") + +package(default_visibility = ["//visibility:public"]) + +py_library( + name = "opencc", + srcs = ["__init__.py"], + imports = [".."], + deps = ["//src:py_opencc"], +) diff --git a/python/opencc/__init__.py b/python/opencc/__init__.py index f77f80cf9..3884bdc7f 100644 --- a/python/opencc/__init__.py +++ b/python/opencc/__init__.py @@ -3,13 +3,18 @@ import os import sys -from opencc.clib import opencc_clib +try: + import opencc_clib +except ImportError: + from opencc.clib import opencc_clib __all__ = ['OpenCC', 'CONFIGS', '__version__'] __version__ = opencc_clib.__version__ -_thisdir = os.path.dirname(os.path.abspath(__file__)) -_opencc_share_dir = os.path.join(_thisdir, 'clib', 'share', 'opencc') +_this_dir = os.path.dirname(os.path.abspath(__file__)) +_opencc_share_dir = os.path.join(_this_dir, 'clib', 'share', 'opencc') +_opencc_rootdir = os.path.abspath(os.path.join(_this_dir, '..', '..')) +_opencc_configdir = os.path.join(_opencc_rootdir, 'data', 'config') if sys.version_info.major == 2: text_type = unicode # noqa @@ -18,6 +23,8 @@ if os.path.isdir(_opencc_share_dir): CONFIGS = [f for f in os.listdir(_opencc_share_dir) if f.endswith('.json')] +elif os.path.isdir(_opencc_configdir): + CONFIGS = [f for f in os.listdir(_opencc_configdir) if f.endswith('.json')] else: CONFIGS = [] @@ -39,7 +46,9 @@ def __init__(self, config='t2s'): if not config.endswith('.json'): config += '.json' if not os.path.isfile(config): - config = os.path.join(_opencc_share_dir, config) + config_under_share_dir = os.path.join(_opencc_share_dir, config) + if os.path.isfile(config_under_share_dir): + config = config_under_share_dir super(OpenCC, self).__init__(config) self.config = config diff --git a/python/tests/BUILD.bazel b/python/tests/BUILD.bazel new file mode 100644 index 000000000..cd55567d1 --- /dev/null +++ b/python/tests/BUILD.bazel @@ -0,0 +1,19 @@ +load("@pip//:requirements.bzl", "requirement") +load("@rules_python//python:py_test.bzl", "py_test") + +py_test( + name = "test_opencc", + srcs = ["test_opencc.py"], + data = [ + "//data/config", + "//data/dictionary:binary_dictionaries", + "//data/dictionary:text_dictionaries", + "//test/testcases", + ], + imports = [".."], + deps = [ + "//python/opencc", + requirement("pytest"), + requirement("exceptiongroup"), + ], +) diff --git a/python/tests/requirements_lock.txt b/python/tests/requirements_lock.txt new file mode 100644 index 000000000..330e2743e --- /dev/null +++ b/python/tests/requirements_lock.txt @@ -0,0 +1,5 @@ +exceptiongroup==1.2.2 +iniconfig==2.0.0 +packaging==24.1 +pluggy==1.5.0 +pytest==8.3.2 diff --git a/python/tests/test_opencc.py b/python/tests/test_opencc.py index 8ca83b7ef..31b2b0453 100644 --- a/python/tests/test_opencc.py +++ b/python/tests/test_opencc.py @@ -1,6 +1,9 @@ from __future__ import unicode_literals import os +import pytest +import sys + from glob import glob _this_dir = os.path.dirname(os.path.abspath(__file__)) @@ -39,3 +42,7 @@ def test_conversion(): for text, ans in zip(intexts, anstexts): assert converter.convert(text) == ans, \ 'Failed to convert {} for {} -> {}'.format(pref, text, ans) + + +if __name__ == "__main__": + sys.exit(pytest.main(sys.argv[1:])) diff --git a/src/BUILD.bazel b/src/BUILD.bazel index 550d3b864..e1e5f24db 100644 --- a/src/BUILD.bazel +++ b/src/BUILD.bazel @@ -1,4 +1,6 @@ +load("@pybind11_bazel//:build_defs.bzl", "pybind_extension") load("@rules_cc//cc:defs.bzl", "cc_library") +load("@rules_python//python:py_library.bzl", "py_library") package(default_visibility = ["//visibility:public"]) @@ -328,6 +330,18 @@ cc_library( ], ) +pybind_extension( + name = "opencc_clib", + srcs = ["py_opencc.cpp"], + deps = [":opencc"], +) + +py_library( + name = "py_opencc", + data = [":opencc_clib"], + imports = ["."], +) + cc_library( name = "segmentation", srcs = ["Segmentation.cpp"], From 4f2fec38bd70c8273ccf1aafeaa5a7e8a04dd164 Mon Sep 17 00:00:00 2001 From: Carbo Kuo Date: Tue, 30 Jul 2024 01:16:10 -0400 Subject: [PATCH 3/5] Set up Python version --- .github/workflows/bazel.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/bazel.yml b/.github/workflows/bazel.yml index 4c45a9d33..537167599 100644 --- a/.github/workflows/bazel.yml +++ b/.github/workflows/bazel.yml @@ -16,5 +16,9 @@ jobs: steps: - uses: actions/checkout@v4 - uses: bazelbuild/setup-bazelisk@v3 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" - run: bazel build //:opencc - run: bazel test --test_output=all //src/... //data/... //test/... //python/... From cc851f336e49320d6407d7f04eb22f9a5f856f40 Mon Sep 17 00:00:00 2001 From: Carbo Kuo Date: Tue, 30 Jul 2024 01:21:36 -0400 Subject: [PATCH 4/5] :py_opencc --- BUILD.bazel | 10 ++++++++++ python/opencc/BUILD.bazel | 5 +++++ python/tests/BUILD.bazel | 3 --- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/BUILD.bazel b/BUILD.bazel index 6c61b49ba..263edc9b9 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -1,3 +1,6 @@ +load("@rules_cc//cc:defs.bzl", "cc_library") +load("@rules_python//python:py_library.bzl", "py_library") + package(default_visibility = ["//visibility:public"]) cc_library( @@ -17,3 +20,10 @@ cc_library( "//src:opencc", ], ) + +py_library( + name = "py_opencc", + deps = [ + "//python/opencc", + ], +) diff --git a/python/opencc/BUILD.bazel b/python/opencc/BUILD.bazel index 2cfccfb4c..876b1ca34 100644 --- a/python/opencc/BUILD.bazel +++ b/python/opencc/BUILD.bazel @@ -5,6 +5,11 @@ package(default_visibility = ["//visibility:public"]) py_library( name = "opencc", srcs = ["__init__.py"], + data = [ + "//data/config", + "//data/dictionary:binary_dictionaries", + "//data/dictionary:text_dictionaries", + ], imports = [".."], deps = ["//src:py_opencc"], ) diff --git a/python/tests/BUILD.bazel b/python/tests/BUILD.bazel index cd55567d1..a8ca16674 100644 --- a/python/tests/BUILD.bazel +++ b/python/tests/BUILD.bazel @@ -5,9 +5,6 @@ py_test( name = "test_opencc", srcs = ["test_opencc.py"], data = [ - "//data/config", - "//data/dictionary:binary_dictionaries", - "//data/dictionary:text_dictionaries", "//test/testcases", ], imports = [".."], From 2c8e1e441b1591488fc1933ce8cd4f01af94b4f3 Mon Sep 17 00:00:00 2001 From: Carbo Kuo Date: Tue, 30 Jul 2024 01:28:29 -0400 Subject: [PATCH 5/5] fix deps of pybind11 --- MODULE.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/MODULE.bazel b/MODULE.bazel index 683aff803..b59448e9d 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -9,6 +9,7 @@ module( bazel_dep(name = "darts-clone", version = "0.32") bazel_dep(name = "googletest", version = "1.15.0", dev_dependency = True) bazel_dep(name = "marisa-trie", version = "0.2.6") +bazel_dep(name = "platforms", version = "0.0.10") bazel_dep(name = "pybind11_bazel", version = "2.12.0") bazel_dep(name = "rapidjson", version = "1.1.0") bazel_dep(name = "rules_cc", version = "0.0.9")