diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 56ac7da..8118f76 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -22,7 +22,7 @@ jobs: strategy: matrix: os: ["ubuntu-latest"] - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] fail-fast: false env: diff --git a/CHANGES.rst b/CHANGES.rst index 5eee339..3d4d8d2 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,6 +5,7 @@ Skeem changelog in progress =========== +- Added support for Python 3.12 2023-03-09 0.1.0 diff --git a/skeem/frictionless/monkey.py b/skeem/frictionless/monkey.py index 8596400..1ec746c 100644 --- a/skeem/frictionless/monkey.py +++ b/skeem/frictionless/monkey.py @@ -2,6 +2,7 @@ from .loader_stream import read_byte_stream_create from .pandas_plugin import create_parser from .parser_jsonl import read_cell_stream_create +from .parser_xlsx import read_loader from .resource import ResourcePlus @@ -28,6 +29,7 @@ def patch_modules(): - Don't croak when reading streams without `name` attribute. """ + import frictionless.formats.excel.parsers import frictionless.formats.json.parsers import frictionless.formats.pandas.plugin import frictionless.schemes.aws.loaders.s3 @@ -37,3 +39,4 @@ def patch_modules(): frictionless.formats.pandas.plugin.PandasPlugin.create_parser = create_parser frictionless.schemes.aws.loaders.s3.S3Loader.read_byte_stream_create = s3_read_byte_stream_create frictionless.schemes.stream.loader.StreamLoader.read_byte_stream_create = read_byte_stream_create + frictionless.formats.excel.parsers.XlsxParser.read_loader = read_loader diff --git a/skeem/frictionless/parser_xlsx.py b/skeem/frictionless/parser_xlsx.py new file mode 100644 index 0000000..b5d69b5 --- /dev/null +++ b/skeem/frictionless/parser_xlsx.py @@ -0,0 +1,52 @@ +import atexit +import os +import shutil +import tempfile + +from frictionless.formats.excel.control import ExcelControl +from frictionless.resource.resource import Resource +from frictionless.system import system + + +def read_loader(self): + """ + Patched for Python 3.12. + + https://github.com/frictionlessdata/frictionless-py/issues/1642 + https://github.com/frictionlessdata/frictionless-py/pull/1684 + """ + control = ExcelControl.from_dialect(self.resource.dialect) + loader = system.create_loader(self.resource) + if not loader.remote: + return loader.open() + + # Remote + # Create copy for remote source + # For remote stream we need local copy (will be deleted on close by Python) + # https://docs.python.org/3.5/library/tempfile.html#tempfile.TemporaryFile + if loader.remote: + path = self.resource.normpath + + # Cached + if control.workbook_cache is not None and path in control.workbook_cache: + # TODO: rebase on using resource without system? + resource = Resource(path, scheme="file", format="xlsx") + resource.infer(sample=False) + loader = system.create_loader(resource) + return loader.open() + + with loader as loader: + delete = control.workbook_cache is None + target = tempfile.NamedTemporaryFile(delete=delete) + shutil.copyfileobj(loader.byte_stream, target) + target.seek(0) + if not delete: + control.workbook_cache[path] = target.name # type: ignore + atexit.register(os.remove, target.name) + # TODO: rebase on using resource without system? + resource = Resource(target, scheme="stream", format="xlsx") + resource.infer(sample=False) + loader = system.create_loader(resource) + return loader.open() + + return None