From 1f32a7cd861c14b737a36f6af6f0019fcb750912 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Sat, 30 Dec 2023 19:01:01 +1300 Subject: [PATCH 01/25] Check passing pyarrow.array with string type to pygmt.text Ensure that pyarrow.array objects with string type can be read by pygmt.text. --- pygmt/tests/test_text.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/pygmt/tests/test_text.py b/pygmt/tests/test_text.py index 509dcd75cb1..bc28e180ef8 100644 --- a/pygmt/tests/test_text.py +++ b/pygmt/tests/test_text.py @@ -8,6 +8,12 @@ from pygmt import Figure from pygmt.exceptions import GMTCLibError, GMTInvalidInput from pygmt.helpers import GMTTempFile +from pygmt.helpers.testing import skip_if_no + +try: + import pyarrow as pa +except ImportError: + pa = None TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data") POINTS_DATA = os.path.join(TEST_DATA_DIR, "points.txt") @@ -47,8 +53,20 @@ def test_text_single_line_of_text(region, projection): @pytest.mark.benchmark -@pytest.mark.mpl_image_compare -def test_text_multiple_lines_of_text(region, projection): +@pytest.mark.mpl_image_compare(filename="test_text_multiple_lines_of_text.png") +@pytest.mark.parametrize( + "array_func", + [ + list, + pytest.param(np.array, id="numpy"), + pytest.param( + getattr(pa, "array", None), + marks=skip_if_no(package="pyarrow"), + id="pyarrow", + ), + ], +) +def test_text_multiple_lines_of_text(region, projection, array_func): """ Place multiple lines of text at their respective x, y locations. """ @@ -58,7 +76,7 @@ def test_text_multiple_lines_of_text(region, projection): projection=projection, x=[1.2, 1.6], y=[0.6, 0.3], - text=["This is a line of text", "This is another line of text"], + text=array_func(["This is a line of text", "This is another line of text"]), ) return fig From 4c4e064c4b4d7fd3edbfda2bb75414824063238c Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Sat, 30 Dec 2023 19:49:53 +1300 Subject: [PATCH 02/25] Check passing pyarrow.array with string type to virtualfile_from_vectors --- pygmt/tests/test_clib_virtualfiles.py | 28 ++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/pygmt/tests/test_clib_virtualfiles.py b/pygmt/tests/test_clib_virtualfiles.py index 92a0f98b85b..a98df1fbb09 100644 --- a/pygmt/tests/test_clib_virtualfiles.py +++ b/pygmt/tests/test_clib_virtualfiles.py @@ -12,8 +12,14 @@ from pygmt import clib from pygmt.exceptions import GMTCLibError, GMTInvalidInput from pygmt.helpers import GMTTempFile +from pygmt.helpers.testing import skip_if_no from pygmt.tests.test_clib import mock +try: + import pyarrow as pa +except ImportError: + pa = None + TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data") POINTS_DATA = os.path.join(TEST_DATA_DIR, "points.txt") @@ -210,16 +216,28 @@ def test_virtualfile_from_vectors(dtypes): assert output == expected -@pytest.mark.parametrize("dtype", [str, object]) -def test_virtualfile_from_vectors_one_string_or_object_column(dtype): +@pytest.mark.parametrize( + ("array_func", "dtype"), + [ + pytest.param(np.array, {"dtype": str}, id="str"), + pytest.param(np.array, {"dtype": object}, id="object"), + pytest.param( + getattr(pa, "array", None), + {}, # pa.string() + marks=skip_if_no(package="pyarrow"), + id="pyarrow", + ), + ], +) +def test_virtualfile_from_vectors_one_string_or_object_column(array_func, dtype): """ - Test passing in one column with string or object dtype into virtual file - dataset. + Test passing in one column with string (numpy/pyarrow) or object (numpy) + dtype into virtual file dataset. """ size = 5 x = np.arange(size, dtype=np.int32) y = np.arange(size, size * 2, 1, dtype=np.int32) - strings = np.array(["a", "bc", "defg", "hijklmn", "opqrst"], dtype=dtype) + strings = array_func(["a", "bc", "defg", "hijklmn", "opqrst"], **dtype) with clib.Session() as lib: with lib.virtualfile_from_vectors(x, y, strings) as vfile: with GMTTempFile() as outfile: From d379e4667c5f8fcc907a3a56543ae734b8c605eb Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 11 Oct 2024 17:27:06 +1300 Subject: [PATCH 03/25] Enable passing pyarrow.StringArray to clib.Session.put_strings Convert a pyarrow.StringArray via a Python list to a ctypes array in the strings_to_ctypes_array function. Updated docstrings and type hints in `clib.Session.put_strings` method and `clib.conversion.strings_to_ctypes_array` function. Added two parametrized unit tests to ensure that pyarrow.StringArray can be passed into the clib methods. --- pygmt/clib/conversion.py | 18 ++++++++--- pygmt/clib/session.py | 43 ++++++++++++++++----------- pygmt/tests/test_clib_put_strings.py | 25 ++++++++++++++-- pygmt/tests/test_clib_virtualfiles.py | 2 +- 4 files changed, 63 insertions(+), 25 deletions(-) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index 0739d767567..e924fad5332 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -9,6 +9,10 @@ import numpy as np from pygmt.exceptions import GMTInvalidInput +try: + import pyarrow as pa +except ImportError: + pa = None def dataarray_to_matrix(grid): """ @@ -263,14 +267,15 @@ def sequence_to_ctypes_array( return (ctype * size)(*sequence) -def strings_to_ctypes_array(strings: Sequence[str]) -> ctp.Array: +def strings_to_ctypes_array(strings: Sequence[str] | pa.StringArray) -> ctp.Array: """ - Convert a sequence (e.g., a list) of strings into a ctypes array. + Convert a sequence (e.g., a list) of strings or a pyarrow.StringArray into a ctypes + array. Parameters ---------- strings - A sequence of strings. + A sequence of strings or a pyarrow.StringArray. Returns ------- @@ -286,7 +291,12 @@ def strings_to_ctypes_array(strings: Sequence[str]) -> ctp.Array: >>> [s.decode() for s in ctypes_array] ['first', 'second', 'third'] """ - return (ctp.c_char_p * len(strings))(*[s.encode() for s in strings]) + try: + bytes_string_list = [s.encode() for s in strings] + except AttributeError: # 'pyarrow.StringScalar' object has no attribute 'encode' + # Convert pyarrow.StringArray to Python list first + bytes_string_list = [s.encode() for s in strings.to_pylist()] + return (ctp.c_char_p * len(strings))(*bytes_string_list) def array_to_datetime(array): diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index a96ca84baae..959baac1540 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -34,6 +34,11 @@ tempfile_from_image, ) +try: + import pyarrow as pa +except ImportError: + pa = None + FAMILIES = [ "GMT_IS_DATASET", # Entity is a data table "GMT_IS_GRID", # Entity is a grid @@ -936,39 +941,43 @@ def put_vector(self, dataset, column, vector): f"in column {column} of dataset." ) - def put_strings(self, dataset, family, strings): + def put_strings( + self, + dataset: ctp.c_void_p, + family: Literal["GMT_IS_VECTOR", "GMT_IS_MATRIX"], + strings: Sequence[str] | pa.StringArray, + ): """ - Attach a numpy 1-D array of dtype str as a column on a GMT dataset. + Attach a 1-D numpy array of dtype str or pyarrow.StringArray as a column on a + GMT dataset. - Use this function to attach string type numpy array data to a GMT - dataset and pass it to GMT modules. Wraps ``GMT_Put_Strings``. + Use this function to attach string type array data to a GMT dataset and pass it + to GMT modules. Wraps ``GMT_Put_Strings``. - The dataset must be created by :meth:`pygmt.clib.Session.create_data` - first. + The dataset must be created by :meth:`pygmt.clib.Session.create_data` first. .. warning:: - The numpy array must be C contiguous in memory. If it comes from a - column slice of a 2-D array, for example, you will have to make a - copy. Use :func:`numpy.ascontiguousarray` to make sure your vector - is contiguous (it won't copy if it already is). + The array must be C contiguous in memory. If it comes from a column slice of + a 2-D array, for example, you will have to make a copy. Use + :func:`numpy.ascontiguousarray` to make sure your vector is contiguous (it + won't copy if it already is). Parameters ---------- - dataset : :class:`ctypes.c_void_p` + dataset The ctypes void pointer to a ``GMT_Dataset``. Create it with :meth:`pygmt.clib.Session.create_data`. - family : str + family The family type of the dataset. Can be either ``GMT_IS_VECTOR`` or ``GMT_IS_MATRIX``. - strings : numpy 1-D array - The array that will be attached to the dataset. Must be a 1-D C - contiguous array. + strings + The array that will be attached to the dataset. Must be a 1-D C contiguous + array. Raises ------ GMTCLibError - If given invalid input or ``GMT_Put_Strings`` exits with - status != 0. + If given invalid input or ``GMT_Put_Strings`` exits with status != 0. """ c_put_strings = self.get_libgmt_func( "GMT_Put_Strings", diff --git a/pygmt/tests/test_clib_put_strings.py b/pygmt/tests/test_clib_put_strings.py index 92287be3e29..0db47ca4a8a 100644 --- a/pygmt/tests/test_clib_put_strings.py +++ b/pygmt/tests/test_clib_put_strings.py @@ -8,12 +8,31 @@ from pygmt import clib from pygmt.exceptions import GMTCLibError from pygmt.helpers import GMTTempFile +from pygmt.helpers.testing import skip_if_no + +try: + import pyarrow as pa +except ImportError: + pa = None @pytest.mark.benchmark -def test_put_strings(): +@pytest.mark.parametrize( + ("array_func", "dtype"), + [ + pytest.param(np.array, {"dtype": str}, id="str"), + pytest.param( + getattr(pa, "array", None), + {"type": pa.string()}, + marks=skip_if_no(package="pyarrow"), + id="pyarrow", + ), + ], +) +def test_put_strings(array_func, dtype): """ - Check that assigning a numpy array of dtype str to a dataset works. + Check that assigning a numpy array of dtype str, or a pyarrow.StringArray to a + dataset works. """ with clib.Session() as lib: dataset = lib.create_data( @@ -24,7 +43,7 @@ def test_put_strings(): ) x = np.array([1, 2, 3, 4, 5], dtype=np.int32) y = np.array([6, 7, 8, 9, 10], dtype=np.int32) - strings = np.array(["a", "bc", "defg", "hijklmn", "opqrst"], dtype=str) + strings = array_func(["a", "bc", "defg", "hijklmn", "opqrst"], **dtype) lib.put_vector(dataset, column=lib["GMT_X"], vector=x) lib.put_vector(dataset, column=lib["GMT_Y"], vector=y) lib.put_strings( diff --git a/pygmt/tests/test_clib_virtualfiles.py b/pygmt/tests/test_clib_virtualfiles.py index 0f70c06cb85..2d9b66b47d6 100644 --- a/pygmt/tests/test_clib_virtualfiles.py +++ b/pygmt/tests/test_clib_virtualfiles.py @@ -238,7 +238,7 @@ def test_virtualfile_from_vectors(dtypes): pytest.param(np.array, {"dtype": object}, id="object"), pytest.param( getattr(pa, "array", None), - {}, # pa.string() + {"type": pa.string()}, marks=skip_if_no(package="pyarrow"), id="pyarrow", ), From cfda386da577d0e2d2ab31a133f0e70770722393 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 11 Oct 2024 18:16:59 +1300 Subject: [PATCH 04/25] Use "string" instead of pyarrow.string() in case pyarrow not installed Fixes `AttributeError: 'NoneType' object has no attribute 'string'` --- pygmt/tests/test_clib_put_strings.py | 2 +- pygmt/tests/test_clib_virtualfiles.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pygmt/tests/test_clib_put_strings.py b/pygmt/tests/test_clib_put_strings.py index 0db47ca4a8a..41f0dc921d4 100644 --- a/pygmt/tests/test_clib_put_strings.py +++ b/pygmt/tests/test_clib_put_strings.py @@ -23,7 +23,7 @@ pytest.param(np.array, {"dtype": str}, id="str"), pytest.param( getattr(pa, "array", None), - {"type": pa.string()}, + {"type": "string"}, # pa.string() marks=skip_if_no(package="pyarrow"), id="pyarrow", ), diff --git a/pygmt/tests/test_clib_virtualfiles.py b/pygmt/tests/test_clib_virtualfiles.py index 2d9b66b47d6..f0160d8bb82 100644 --- a/pygmt/tests/test_clib_virtualfiles.py +++ b/pygmt/tests/test_clib_virtualfiles.py @@ -238,7 +238,7 @@ def test_virtualfile_from_vectors(dtypes): pytest.param(np.array, {"dtype": object}, id="object"), pytest.param( getattr(pa, "array", None), - {"type": pa.string()}, + {"type": "string"}, # pa.string() marks=skip_if_no(package="pyarrow"), id="pyarrow", ), From 0a6cda5285f68b607e0fc49436dc76c84f00e598 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 11 Oct 2024 18:17:42 +1300 Subject: [PATCH 05/25] Try to fix type hints --- pygmt/clib/conversion.py | 7 ++++++- pygmt/clib/session.py | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index e924fad5332..092ccab11bb 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -9,11 +9,16 @@ import numpy as np from pygmt.exceptions import GMTInvalidInput +StringArrayTypes = Sequence[str] + try: import pyarrow as pa + + StringArrayTypes |= pa.StringArray except ImportError: pa = None + def dataarray_to_matrix(grid): """ Transform an xarray.DataArray into a data 2-D array and metadata. @@ -267,7 +272,7 @@ def sequence_to_ctypes_array( return (ctype * size)(*sequence) -def strings_to_ctypes_array(strings: Sequence[str] | pa.StringArray) -> ctp.Array: +def strings_to_ctypes_array(strings: StringArrayTypes) -> ctp.Array: """ Convert a sequence (e.g., a list) of strings or a pyarrow.StringArray into a ctypes array. diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 959baac1540..fe55d241536 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -34,11 +34,16 @@ tempfile_from_image, ) +StringArrayTypes = Sequence[str] + try: import pyarrow as pa + + StringArrayTypes |= pa.StringArray except ImportError: pa = None + FAMILIES = [ "GMT_IS_DATASET", # Entity is a data table "GMT_IS_GRID", # Entity is a grid @@ -945,7 +950,7 @@ def put_strings( self, dataset: ctp.c_void_p, family: Literal["GMT_IS_VECTOR", "GMT_IS_MATRIX"], - strings: Sequence[str] | pa.StringArray, + strings: StringArrayTypes, ): """ Attach a 1-D numpy array of dtype str or pyarrow.StringArray as a column on a From f59f93cf86d9f48ba4a8d7c310f73f5716391539 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 11 Oct 2024 18:32:10 +1300 Subject: [PATCH 06/25] Add np.ndarray to StringArrayTypes and fix/ignore remaining type errors --- pygmt/clib/conversion.py | 4 ++-- pygmt/clib/session.py | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index 092ccab11bb..c9eb070d1d7 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -9,7 +9,7 @@ import numpy as np from pygmt.exceptions import GMTInvalidInput -StringArrayTypes = Sequence[str] +StringArrayTypes = Sequence[str] | np.ndarray try: import pyarrow as pa @@ -300,7 +300,7 @@ def strings_to_ctypes_array(strings: StringArrayTypes) -> ctp.Array: bytes_string_list = [s.encode() for s in strings] except AttributeError: # 'pyarrow.StringScalar' object has no attribute 'encode' # Convert pyarrow.StringArray to Python list first - bytes_string_list = [s.encode() for s in strings.to_pylist()] + bytes_string_list = [s.encode() for s in strings.to_pylist()] # type: ignore[union-attr] return (ctp.c_char_p * len(strings))(*bytes_string_list) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index fe55d241536..611b164715c 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -34,7 +34,7 @@ tempfile_from_image, ) -StringArrayTypes = Sequence[str] +StringArrayTypes = Sequence[str] | np.ndarray try: import pyarrow as pa @@ -1005,9 +1005,8 @@ def put_strings( self.session_pointer, family_int, dataset, strings_pointer ) if status != 0: - raise GMTCLibError( - f"Failed to put strings of type {strings.dtype} into dataset" - ) + dtype = strings.dtype if hasattr(strings, "dtype") else type(strings) + raise GMTCLibError(f"Failed to put strings of type {dtype} into dataset") def put_matrix(self, dataset, matrix, pad=0): """ From 17c1e9ca28acbb9f35223b0bb770a8dc8bb75843 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 11 Oct 2024 19:20:19 +1300 Subject: [PATCH 07/25] Move StringArrayTypes to pygmt/_typing.py Also improve docstring of strings_to_ctypes_array function to mention np.ndarray as supported input. --- pygmt/_typing.py | 10 ++++++++++ pygmt/clib/conversion.py | 16 ++++------------ pygmt/clib/session.py | 11 +---------- 3 files changed, 15 insertions(+), 22 deletions(-) diff --git a/pygmt/_typing.py b/pygmt/_typing.py index bbc7d596c65..4a57c3c7678 100644 --- a/pygmt/_typing.py +++ b/pygmt/_typing.py @@ -2,7 +2,17 @@ Type aliases for type hints. """ +import contextlib +import importlib +from collections.abc import Sequence from typing import Literal +import numpy as np + # Anchor codes AnchorCode = Literal["TL", "TC", "TR", "ML", "MC", "MR", "BL", "BC", "BR"] + +# String array types +StringArrayTypes = Sequence[str] | np.ndarray +with contextlib.suppress(ImportError): + StringArrayTypes |= importlib.import_module(name="pyarrow").StringArray diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index f4c8bb50fd2..5a3f708af1a 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -9,17 +9,9 @@ import numpy as np import pandas as pd from packaging.version import Version +from pygmt._typing import StringArrayTypes from pygmt.exceptions import GMTInvalidInput -StringArrayTypes = Sequence[str] | np.ndarray - -try: - import pyarrow as pa - - StringArrayTypes |= pa.StringArray -except ImportError: - pa = None - def dataarray_to_matrix(grid): """ @@ -292,13 +284,13 @@ def sequence_to_ctypes_array( def strings_to_ctypes_array(strings: StringArrayTypes) -> ctp.Array: """ - Convert a sequence (e.g., a list) of strings or a pyarrow.StringArray into a ctypes - array. + Convert a sequence (e.g., a list) or numpy.ndarray of strings or a + pyarrow.StringArray into a ctypes array. Parameters ---------- strings - A sequence of strings or a pyarrow.StringArray. + A sequence of strings, a numpy.ndarray of str dtype, or a pyarrow.StringArray. Returns ------- diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index af5678aef09..f53a5af2cf5 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -17,6 +17,7 @@ import numpy as np import pandas as pd import xarray as xr +from pygmt._typing import StringArrayTypes from pygmt.clib.conversion import ( array_to_datetime, dataarray_to_matrix, @@ -34,16 +35,6 @@ tempfile_from_image, ) -StringArrayTypes = Sequence[str] | np.ndarray - -try: - import pyarrow as pa - - StringArrayTypes |= pa.StringArray -except ImportError: - pa = None - - FAMILIES = [ "GMT_IS_DATASET", # Entity is a data table "GMT_IS_GRID", # Entity is a grid From 0105d64740a4f45d9a6337d28c4ebdda1dd7a795 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 11 Oct 2024 19:29:13 +1300 Subject: [PATCH 08/25] Add pyarrow to docs CI So that the pyarrow.StringArray type hint will show up on Readthedocs and the PyGMT docs page. --- .github/workflows/ci_docs.yml | 1 + ci/requirements/docs.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/ci_docs.yml b/.github/workflows/ci_docs.yml index 57cf9c8c75c..c0c8c8235e8 100644 --- a/.github/workflows/ci_docs.yml +++ b/.github/workflows/ci_docs.yml @@ -103,6 +103,7 @@ jobs: contextily geopandas<1.0 ipython + pyarrow rioxarray make pip diff --git a/ci/requirements/docs.yml b/ci/requirements/docs.yml index 8214dbac8fe..6ae1595fe28 100644 --- a/ci/requirements/docs.yml +++ b/ci/requirements/docs.yml @@ -16,6 +16,7 @@ dependencies: - contextily - geopandas<1.0 - ipython + - pyarrow - rioxarray # Development dependencies (general) - make From 3ad0c86c20cd1079e2de99a417fcce59fb7a5370 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 11 Oct 2024 20:40:04 +1300 Subject: [PATCH 09/25] Use np.asarray to convert pa.StringArray instead of .to_pylist() Faster for longer string arrays. Co-authored-by: Dongdong Tian --- pygmt/clib/conversion.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index 5a3f708af1a..5cd50cc0734 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -306,12 +306,7 @@ def strings_to_ctypes_array(strings: StringArrayTypes) -> ctp.Array: >>> [s.decode() for s in ctypes_array] ['first', 'second', 'third'] """ - try: - bytes_string_list = [s.encode() for s in strings] - except AttributeError: # 'pyarrow.StringScalar' object has no attribute 'encode' - # Convert pyarrow.StringArray to Python list first - bytes_string_list = [s.encode() for s in strings.to_pylist()] # type: ignore[union-attr] - return (ctp.c_char_p * len(strings))(*bytes_string_list) + return (ctp.c_char_p * len(strings))(*[s.encode() for s in np.asarray(strings)]) def array_to_datetime(array): From 4bea288e34aee5cab520fa267e6ef9e9828cfcbe Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 11 Oct 2024 20:48:21 +1300 Subject: [PATCH 10/25] Update note to say that PyArrow string types are now supported Still need to work on Duration and GeoArrow geometry types. --- doc/install.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/install.md b/doc/install.md index c64369c5b5a..c627cae853a 100644 --- a/doc/install.md +++ b/doc/install.md @@ -107,9 +107,10 @@ The following are optional dependencies: :::{note} If you have [PyArrow](https://arrow.apache.org/docs/python/index.html) installed, PyGMT does have some initial support for `pandas.Series` and `pandas.DataFrame` objects with -Apache Arrow-backed arrays. Specifically, only uint/int/float and date32/date64 dtypes -are supported for now. Support for string Arrow dtypes is still a work in progress. -For more details, see [issue #2800](https://github.com/GenericMappingTools/pygmt/issues/2800). +Apache Arrow-backed arrays. Specifically, only uint/int/float, date32/date64 and string +types are supported for now. Support for Duration types and GeoArrow geometry types is +still a work in progress. For more details, see +[issue #2800](https://github.com/GenericMappingTools/pygmt/issues/2800). ::: ## Installing GMT and other dependencies From 371174a61e811a3c13200e953eb0d75ee5a35ef6 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 11 Oct 2024 20:53:06 +1300 Subject: [PATCH 11/25] Add back pytest.mark.benchmark marker Accidentally removed from `test_virtualfile_from_vectors_one_string_or_object_column` during merge conflict handling. --- pygmt/tests/test_clib_virtualfiles.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pygmt/tests/test_clib_virtualfiles.py b/pygmt/tests/test_clib_virtualfiles.py index f0160d8bb82..a7f449cbf32 100644 --- a/pygmt/tests/test_clib_virtualfiles.py +++ b/pygmt/tests/test_clib_virtualfiles.py @@ -231,6 +231,7 @@ def test_virtualfile_from_vectors(dtypes): assert output == expected +@pytest.mark.benchmark @pytest.mark.parametrize( ("array_func", "dtype"), [ From b5887304bc955e4f60708e61456c29a4c0cad5d4 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 11 Oct 2024 20:57:21 +1300 Subject: [PATCH 12/25] Add intersphinx link for pyarrow --- doc/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/conf.py b/doc/conf.py index fc26641bd83..2024646c0d5 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -85,6 +85,7 @@ "contextily": ("https://contextily.readthedocs.io/en/stable/", None), "geopandas": ("https://geopandas.org/en/stable/", None), "numpy": ("https://numpy.org/doc/stable/", None), + "pyarrow": ("https://arrow.apache.org/docs/", None), "python": ("https://docs.python.org/3/", None), "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), "rasterio": ("https://rasterio.readthedocs.io/en/stable/", None), From faf2065c47b15eafbb79218346c0a8176a79b80c Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Wed, 6 Nov 2024 14:38:07 +1300 Subject: [PATCH 13/25] Apply suggestions from code review Co-authored-by: Dongdong Tian --- pygmt/clib/session.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index f53a5af2cf5..5a0eb003a89 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -934,7 +934,7 @@ def put_vector(self, dataset, column, vector): def put_strings( self, dataset: ctp.c_void_p, - family: Literal["GMT_IS_VECTOR", "GMT_IS_MATRIX"], + family: str, strings: StringArrayTypes, ): """ @@ -955,8 +955,8 @@ def put_strings( Parameters ---------- dataset - The ctypes void pointer to a ``GMT_Dataset``. Create it with - :meth:`pygmt.clib.Session.create_data`. + The ctypes void pointer to a ``GMT_VECTOR``/``GMT_MATRIX`` container. + Create it with :meth:`pygmt.clib.Session.create_data`. family The family type of the dataset. Can be either ``GMT_IS_VECTOR`` or ``GMT_IS_MATRIX``. From 9fd77dce62b10089670979b2a669faefd430b4cd Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Wed, 6 Nov 2024 15:01:26 +1300 Subject: [PATCH 14/25] format --- pygmt/clib/session.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index b795ec04efc..1248a09c6cb 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1001,10 +1001,7 @@ def put_vector(self, dataset: ctp.c_void_p, column: int, vector: np.ndarray): raise GMTCLibError(msg) def put_strings( - self, - dataset: ctp.c_void_p, - family: str, - strings: StringArrayTypes, + self, dataset: ctp.c_void_p, family: str, strings: StringArrayTypes ): """ Attach a 1-D numpy array of dtype str or pyarrow.StringArray as a column on a From a927202acfcdb9a236a765534079bb821b798189 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 15 Nov 2024 14:20:24 +1300 Subject: [PATCH 15/25] Revert "Enable passing pyarrow.StringArray to clib.Session.put_strings" This reverts commit d379e4667c5f8fcc907a3a56543ae734b8c605eb. --- pygmt/clib/conversion.py | 11 ++++---- pygmt/clib/session.py | 40 +++++++++++++-------------- pygmt/tests/test_clib_put_strings.py | 25 ++--------------- pygmt/tests/test_clib_virtualfiles.py | 2 +- 4 files changed, 28 insertions(+), 50 deletions(-) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index 87ebf17618b..68cddd63549 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -11,7 +11,6 @@ import pandas as pd import xarray as xr from packaging.version import Version -from pygmt._typing import StringArrayTypes from pygmt.exceptions import GMTInvalidInput @@ -279,15 +278,15 @@ def sequence_to_ctypes_array( return (ctype * size)(*sequence) -def strings_to_ctypes_array(strings: StringArrayTypes) -> ctp.Array: +def strings_to_ctypes_array(strings: Sequence[str] | np.ndarray) -> ctp.Array: """ - Convert a sequence (e.g., a list) or numpy.ndarray of strings or a - pyarrow.StringArray into a ctypes array. + Convert a sequence (e.g., a list) of strings or numpy.ndarray of strings into a + ctypes array. Parameters ---------- strings - A sequence of strings, a numpy.ndarray of str dtype, or a pyarrow.StringArray. + A sequence of strings, or a numpy.ndarray of str dtype. Returns ------- @@ -303,7 +302,7 @@ def strings_to_ctypes_array(strings: StringArrayTypes) -> ctp.Array: >>> [s.decode() for s in ctypes_array] ['first', 'second', 'third'] """ - return (ctp.c_char_p * len(strings))(*[s.encode() for s in np.asarray(strings)]) + return (ctp.c_char_p * len(strings))(*[s.encode() for s in strings]) def array_to_datetime(array: Sequence[Any] | np.ndarray) -> np.ndarray: diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 1248a09c6cb..7229ea170d8 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -16,7 +16,6 @@ import numpy as np import pandas as pd import xarray as xr -from pygmt._typing import StringArrayTypes from pygmt.clib.conversion import ( array_to_datetime, dataarray_to_matrix, @@ -1000,40 +999,39 @@ def put_vector(self, dataset: ctp.c_void_p, column: int, vector: np.ndarray): ) raise GMTCLibError(msg) - def put_strings( - self, dataset: ctp.c_void_p, family: str, strings: StringArrayTypes - ): + def put_strings(self, dataset: ctp.c_void_p, family: str, strings: np.ndarray): """ - Attach a 1-D numpy array of dtype str or pyarrow.StringArray as a column on a - GMT dataset. + Attach a numpy 1-D array of dtype str as a column on a GMT dataset. - Use this function to attach string type numpy array data to a GMT dataset and - pass it to GMT modules. Wraps ``GMT_Put_Strings``. + Use this function to attach string type numpy array data to a GMT + dataset and pass it to GMT modules. Wraps ``GMT_Put_Strings``. - The dataset must be created by :meth:`pygmt.clib.Session.create_data` first. + The dataset must be created by :meth:`pygmt.clib.Session.create_data` + first. .. warning:: - The numpy array must be C contiguous in memory. If it comes from a column - slice of a 2-D array, for example, you will have to make a copy. Use - :func:`numpy.ascontiguousarray` to make sure your vector is contiguous (it - won't copy if it already is). + The numpy array must be C contiguous in memory. If it comes from a + column slice of a 2-D array, for example, you will have to make a + copy. Use :func:`numpy.ascontiguousarray` to make sure your vector + is contiguous (it won't copy if it already is). Parameters ---------- - dataset - The ctypes void pointer to a ``GMT_VECTOR``/``GMT_MATRIX`` data container. - Create it with :meth:`pygmt.clib.Session.create_data`. - family + dataset : :class:`ctypes.c_void_p` + The ctypes void pointer to a ``GMT_Dataset``. Create it with + :meth:`pygmt.clib.Session.create_data`. + family : str The family type of the dataset. Can be either ``GMT_IS_VECTOR`` or ``GMT_IS_MATRIX``. - strings - The array that will be attached to the dataset. Must be a 1-D C contiguous - array. + strings : numpy 1-D array + The array that will be attached to the dataset. Must be a 1-D C + contiguous array. Raises ------ GMTCLibError - If given invalid input or ``GMT_Put_Strings`` exits with a non-zero status. + If given invalid input or ``GMT_Put_Strings`` exits with + status != 0. """ c_put_strings = self.get_libgmt_func( "GMT_Put_Strings", diff --git a/pygmt/tests/test_clib_put_strings.py b/pygmt/tests/test_clib_put_strings.py index ccc0189c780..bd2687bb7c0 100644 --- a/pygmt/tests/test_clib_put_strings.py +++ b/pygmt/tests/test_clib_put_strings.py @@ -8,31 +8,12 @@ from pygmt import clib from pygmt.exceptions import GMTCLibError from pygmt.helpers import GMTTempFile -from pygmt.helpers.testing import skip_if_no - -try: - import pyarrow as pa -except ImportError: - pa = None @pytest.mark.benchmark -@pytest.mark.parametrize( - ("array_func", "dtype"), - [ - pytest.param(np.array, {"dtype": np.str_}, id="str"), - pytest.param( - getattr(pa, "array", None), - {"type": "string"}, # pa.string() - marks=skip_if_no(package="pyarrow"), - id="pyarrow", - ), - ], -) -def test_put_strings(array_func, dtype): +def test_put_strings(): """ - Check that assigning a numpy array of dtype str, or a pyarrow.StringArray to a - dataset works. + Check that assigning a numpy array of dtype str to a dataset works. """ with clib.Session() as lib: dataset = lib.create_data( @@ -43,7 +24,7 @@ def test_put_strings(array_func, dtype): ) x = np.array([1, 2, 3, 4, 5], dtype=np.int32) y = np.array([6, 7, 8, 9, 10], dtype=np.int32) - strings = array_func(["a", "bc", "defg", "hijklmn", "opqrst"], **dtype) + strings = np.array(["a", "bc", "defg", "hijklmn", "opqrst"], dtype=str) lib.put_vector(dataset, column=lib["GMT_X"], vector=x) lib.put_vector(dataset, column=lib["GMT_Y"], vector=y) lib.put_strings( diff --git a/pygmt/tests/test_clib_virtualfiles.py b/pygmt/tests/test_clib_virtualfiles.py index 42284b08c78..6277f80e153 100644 --- a/pygmt/tests/test_clib_virtualfiles.py +++ b/pygmt/tests/test_clib_virtualfiles.py @@ -153,7 +153,7 @@ def test_open_virtual_file(): pytest.param(np.array, {"dtype": np.object_}, id="object"), pytest.param( getattr(pa, "array", None), - {"type": "string"}, # pa.string() + {}, # pa.string() marks=skip_if_no(package="pyarrow"), id="pyarrow", ), From 7b002488b7c46b9c793c6e522ae87312a020b2c0 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 15 Nov 2024 14:27:58 +1300 Subject: [PATCH 16/25] Reduce diff from messy revert handling --- pygmt/clib/session.py | 31 ++++++++++++++-------------- pygmt/tests/test_clib_put_strings.py | 2 +- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 7229ea170d8..b8d6ff6feb1 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1001,31 +1001,30 @@ def put_vector(self, dataset: ctp.c_void_p, column: int, vector: np.ndarray): def put_strings(self, dataset: ctp.c_void_p, family: str, strings: np.ndarray): """ - Attach a numpy 1-D array of dtype str as a column on a GMT dataset. + Attach a 1-D numpy array of dtype str as a column on a GMT dataset. - Use this function to attach string type numpy array data to a GMT - dataset and pass it to GMT modules. Wraps ``GMT_Put_Strings``. + Use this function to attach string type numpy array data to a GMT dataset and + pass it to GMT modules. Wraps ``GMT_Put_Strings``. - The dataset must be created by :meth:`pygmt.clib.Session.create_data` - first. + The dataset must be created by :meth:`pygmt.clib.Session.create_data` first. .. warning:: - The numpy array must be C contiguous in memory. If it comes from a - column slice of a 2-D array, for example, you will have to make a - copy. Use :func:`numpy.ascontiguousarray` to make sure your vector - is contiguous (it won't copy if it already is). + The numpy array must be C contiguous in memory. If it comes from a column + slice of a 2-D array, for example, you will have to make a copy. Use + :func:`numpy.ascontiguousarray` to make sure your vector is contiguous (it + won't copy if it already is). Parameters ---------- - dataset : :class:`ctypes.c_void_p` - The ctypes void pointer to a ``GMT_Dataset``. Create it with - :meth:`pygmt.clib.Session.create_data`. - family : str + dataset + The ctypes void pointer to a ``GMT_VECTOR``/``GMT_MATRIX`` data container. + Create it with :meth:`pygmt.clib.Session.create_data`. + family The family type of the dataset. Can be either ``GMT_IS_VECTOR`` or ``GMT_IS_MATRIX``. - strings : numpy 1-D array - The array that will be attached to the dataset. Must be a 1-D C - contiguous array. + strings + The array that will be attached to the dataset. Must be a 1-D C contiguous + array. Raises ------ diff --git a/pygmt/tests/test_clib_put_strings.py b/pygmt/tests/test_clib_put_strings.py index bd2687bb7c0..81b27b06117 100644 --- a/pygmt/tests/test_clib_put_strings.py +++ b/pygmt/tests/test_clib_put_strings.py @@ -24,7 +24,7 @@ def test_put_strings(): ) x = np.array([1, 2, 3, 4, 5], dtype=np.int32) y = np.array([6, 7, 8, 9, 10], dtype=np.int32) - strings = np.array(["a", "bc", "defg", "hijklmn", "opqrst"], dtype=str) + strings = np.array(["a", "bc", "defg", "hijklmn", "opqrst"], dtype=np.str_) lib.put_vector(dataset, column=lib["GMT_X"], vector=x) lib.put_vector(dataset, column=lib["GMT_Y"], vector=y) lib.put_strings( From 7dc353b61ee1c9393d87c79e91c23ab03fe30a93 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 15 Nov 2024 14:31:55 +1300 Subject: [PATCH 17/25] Revert support of pyarrow.array inputs to put_strings --- pygmt/clib/session.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index b8d6ff6feb1..10c8770adaa 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1029,8 +1029,7 @@ def put_strings(self, dataset: ctp.c_void_p, family: str, strings: np.ndarray): Raises ------ GMTCLibError - If given invalid input or ``GMT_Put_Strings`` exits with - status != 0. + If given invalid input or ``GMT_Put_Strings`` exits with a non-zero status. """ c_put_strings = self.get_libgmt_func( "GMT_Put_Strings", @@ -1051,8 +1050,7 @@ def put_strings(self, dataset: ctp.c_void_p, family: str, strings: np.ndarray): self.session_pointer, family_int, dataset, strings_pointer ) if status != 0: - dtype = strings.dtype if hasattr(strings, "dtype") else type(strings) - msg = f"Failed to put strings of type {dtype} into dataset." + msg = f"Failed to put strings of type {strings.dtype} into dataset." raise GMTCLibError(msg) def put_matrix(self, dataset: ctp.c_void_p, matrix: np.ndarray, pad: int = 0): From ce761520995677a5193007a4502fdb438ed2680a Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 15 Nov 2024 14:44:00 +1300 Subject: [PATCH 18/25] Remove StringArrayTypes type hint Reverts 0a6cda5285f68b607e0fc49436dc76c84f00e598, f59f93cf86d9f48ba4a8d7c310f73f5716391539, and 17c1e9ca28acbb9f35223b0bb770a8dc8bb75843 --- pygmt/_typing.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/pygmt/_typing.py b/pygmt/_typing.py index 4a57c3c7678..bbc7d596c65 100644 --- a/pygmt/_typing.py +++ b/pygmt/_typing.py @@ -2,17 +2,7 @@ Type aliases for type hints. """ -import contextlib -import importlib -from collections.abc import Sequence from typing import Literal -import numpy as np - # Anchor codes AnchorCode = Literal["TL", "TC", "TR", "ML", "MC", "MR", "BL", "BC", "BR"] - -# String array types -StringArrayTypes = Sequence[str] | np.ndarray -with contextlib.suppress(ImportError): - StringArrayTypes |= importlib.import_module(name="pyarrow").StringArray From ef431afd065f3c8af35d94c6c8c62f301bf698c7 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 15 Nov 2024 14:52:13 +1300 Subject: [PATCH 19/25] Revert "Remove StringArrayTypes type hint" This reverts commit ce761520995677a5193007a4502fdb438ed2680a. --- pygmt/_typing.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pygmt/_typing.py b/pygmt/_typing.py index bbc7d596c65..4a57c3c7678 100644 --- a/pygmt/_typing.py +++ b/pygmt/_typing.py @@ -2,7 +2,17 @@ Type aliases for type hints. """ +import contextlib +import importlib +from collections.abc import Sequence from typing import Literal +import numpy as np + # Anchor codes AnchorCode = Literal["TL", "TC", "TR", "ML", "MC", "MR", "BL", "BC", "BR"] + +# String array types +StringArrayTypes = Sequence[str] | np.ndarray +with contextlib.suppress(ImportError): + StringArrayTypes |= importlib.import_module(name="pyarrow").StringArray From acaf3509b169a18b62d919fd01adc83dba7c7937 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 15 Nov 2024 14:54:53 +1300 Subject: [PATCH 20/25] Improve type-hint of text parameter in pygmt.Figure.text Include pyarrow.StringArray besides Sequence[str] or np.ndarray. --- pygmt/src/text.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pygmt/src/text.py b/pygmt/src/text.py index 2ed475c9ac2..c15c6b2e499 100644 --- a/pygmt/src/text.py +++ b/pygmt/src/text.py @@ -5,7 +5,7 @@ from collections.abc import Sequence import numpy as np -from pygmt._typing import AnchorCode +from pygmt._typing import AnchorCode, StringArrayTypes from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput from pygmt.helpers import ( @@ -48,7 +48,7 @@ def text_( # noqa: PLR0912 x=None, y=None, position: AnchorCode | None = None, - text=None, + text: str | StringArrayTypes = None, angle=None, font=None, justify: bool | None | AnchorCode | Sequence[AnchorCode] = None, @@ -104,7 +104,7 @@ def text_( # noqa: PLR0912 For example, ``position="TL"`` plots the text at the Top Left corner of the map. - text : str or 1-D array + text The text string, or an array of strings to plot on the figure. angle: float, str, bool or list Set the angle measured in degrees counter-clockwise from From 6ad6eb96d7d0a0bcbe8e01386d80059a6d70a104 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 15 Nov 2024 14:58:44 +1300 Subject: [PATCH 21/25] Move pa.array parametrizations to another file The test_virtualfile_from_vectors_one_string_or_object_column unit test was moved to test_clib_virtualfile_from_vectors.py. Xref https://github.com/GenericMappingTools/pygmt/pull/3512 --- .../test_clib_virtualfile_from_vectors.py | 25 ++++++++++---- pygmt/tests/test_clib_virtualfiles.py | 34 ------------------- 2 files changed, 19 insertions(+), 40 deletions(-) diff --git a/pygmt/tests/test_clib_virtualfile_from_vectors.py b/pygmt/tests/test_clib_virtualfile_from_vectors.py index 041bc7a803c..453f222b6bc 100644 --- a/pygmt/tests/test_clib_virtualfile_from_vectors.py +++ b/pygmt/tests/test_clib_virtualfile_from_vectors.py @@ -53,17 +53,30 @@ def test_virtualfile_from_vectors(dtypes): @pytest.mark.benchmark -@pytest.mark.parametrize("dtype", [str, object]) -def test_virtualfile_from_vectors_one_string_or_object_column(dtype): - """ - Test passing in one column with string or object dtype into virtual file dataset. +@pytest.mark.parametrize( + ("array_func", "dtype"), + [ + pytest.param(np.array, {"dtype": np.str_}, id="str"), + pytest.param(np.array, {"dtype": np.object_}, id="object"), + pytest.param( + getattr(pa, "array", None), + {}, # pa.string() + marks=skip_if_no(package="pyarrow"), + id="pyarrow", + ), + ], +) +def test_virtualfile_from_vectors_one_string_or_object_column(array_func, dtype): + """ + Test passing in one column with string (numpy/pyarrow) or object (numpy) + dtype into virtual file dataset. """ size = 5 x = np.arange(size, dtype=np.int32) y = np.arange(size, size * 2, 1, dtype=np.int32) - strings = np.array(["a", "bc", "defg", "hijklmn", "opqrst"], dtype=dtype) + strings = array_func(["a", "bc", "defg", "hijklmn", "opqrst"], **dtype) with clib.Session() as lib: - with lib.virtualfile_from_vectors((x, y, strings)) as vfile: + with lib.virtualfile_from_vectors(x, y, strings) as vfile: with GMTTempFile() as outfile: lib.call_module("convert", [vfile, f"->{outfile.name}"]) output = outfile.read(keep_tabs=True) diff --git a/pygmt/tests/test_clib_virtualfiles.py b/pygmt/tests/test_clib_virtualfiles.py index 6277f80e153..c747f763024 100644 --- a/pygmt/tests/test_clib_virtualfiles.py +++ b/pygmt/tests/test_clib_virtualfiles.py @@ -143,37 +143,3 @@ def test_open_virtual_file(): bounds = "\t".join([f"<{col.min():.0f}/{col.max():.0f}>" for col in data.T]) expected = f": N = {shape[0]}\t{bounds}\n" assert output == expected - - -@pytest.mark.benchmark -@pytest.mark.parametrize( - ("array_func", "dtype"), - [ - pytest.param(np.array, {"dtype": np.str_}, id="str"), - pytest.param(np.array, {"dtype": np.object_}, id="object"), - pytest.param( - getattr(pa, "array", None), - {}, # pa.string() - marks=skip_if_no(package="pyarrow"), - id="pyarrow", - ), - ], -) -def test_virtualfile_from_vectors_one_string_or_object_column(array_func, dtype): - """ - Test passing in one column with string (numpy/pyarrow) or object (numpy) - dtype into virtual file dataset. - """ - size = 5 - x = np.arange(size, dtype=np.int32) - y = np.arange(size, size * 2, 1, dtype=np.int32) - strings = array_func(["a", "bc", "defg", "hijklmn", "opqrst"], **dtype) - with clib.Session() as lib: - with lib.virtualfile_from_vectors(x, y, strings) as vfile: - with GMTTempFile() as outfile: - lib.call_module("convert", [vfile, f"->{outfile.name}"]) - output = outfile.read(keep_tabs=True) - expected = "".join( - f"{i}\t{j}\t{k}\n" for i, j, k in zip(x, y, strings, strict=True) - ) - assert output == expected From d88accd2aa8ea47a61730fb2edc44d022deaf7d4 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 15 Nov 2024 15:00:38 +1300 Subject: [PATCH 22/25] Pass a tuple of vectors to virtualfile_from_vectors --- pygmt/tests/test_clib_virtualfile_from_vectors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/tests/test_clib_virtualfile_from_vectors.py b/pygmt/tests/test_clib_virtualfile_from_vectors.py index 453f222b6bc..238a0f5cb9c 100644 --- a/pygmt/tests/test_clib_virtualfile_from_vectors.py +++ b/pygmt/tests/test_clib_virtualfile_from_vectors.py @@ -76,7 +76,7 @@ def test_virtualfile_from_vectors_one_string_or_object_column(array_func, dtype) y = np.arange(size, size * 2, 1, dtype=np.int32) strings = array_func(["a", "bc", "defg", "hijklmn", "opqrst"], **dtype) with clib.Session() as lib: - with lib.virtualfile_from_vectors(x, y, strings) as vfile: + with lib.virtualfile_from_vectors(vectors=(x, y, strings)) as vfile: with GMTTempFile() as outfile: lib.call_module("convert", [vfile, f"->{outfile.name}"]) output = outfile.read(keep_tabs=True) From 265132ece545279bdf2e3734f9a4e736b357150f Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 15 Nov 2024 15:14:23 +1300 Subject: [PATCH 23/25] Move skip_if_no and pyarrow import to test_clib_virtualfile_from_vectors --- pygmt/tests/test_clib_virtualfile_from_vectors.py | 6 ++++++ pygmt/tests/test_clib_virtualfiles.py | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pygmt/tests/test_clib_virtualfile_from_vectors.py b/pygmt/tests/test_clib_virtualfile_from_vectors.py index 238a0f5cb9c..55702f114b6 100644 --- a/pygmt/tests/test_clib_virtualfile_from_vectors.py +++ b/pygmt/tests/test_clib_virtualfile_from_vectors.py @@ -11,6 +11,12 @@ from pygmt.clib.session import DTYPES_NUMERIC from pygmt.exceptions import GMTInvalidInput from pygmt.helpers import GMTTempFile +from pygmt.helpers.testing import skip_if_no + +try: + import pyarrow as pa +except ImportError: + pa = None @pytest.fixture(scope="module", name="dtypes") diff --git a/pygmt/tests/test_clib_virtualfiles.py b/pygmt/tests/test_clib_virtualfiles.py index c747f763024..a45a662de71 100644 --- a/pygmt/tests/test_clib_virtualfiles.py +++ b/pygmt/tests/test_clib_virtualfiles.py @@ -10,14 +10,8 @@ from pygmt.clib.session import DTYPES_NUMERIC from pygmt.exceptions import GMTCLibError, GMTInvalidInput from pygmt.helpers import GMTTempFile -from pygmt.helpers.testing import skip_if_no from pygmt.tests.test_clib import mock -try: - import pyarrow as pa -except ImportError: - pa = None - POINTS_DATA = Path(__file__).parent / "data" / "points.txt" From edb3438e81f0ac261973cc006dd0a40df366ffb0 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 15 Nov 2024 15:15:31 +1300 Subject: [PATCH 24/25] The text argument can be None --- pygmt/src/text.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/src/text.py b/pygmt/src/text.py index c15c6b2e499..ad98711824b 100644 --- a/pygmt/src/text.py +++ b/pygmt/src/text.py @@ -48,7 +48,7 @@ def text_( # noqa: PLR0912 x=None, y=None, position: AnchorCode | None = None, - text: str | StringArrayTypes = None, + text: str | StringArrayTypes | None = None, angle=None, font=None, justify: bool | None | AnchorCode | Sequence[AnchorCode] = None, From 8172102c4ea460e0dcc3de8a79cdc0cdf77de5fa Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 15 Nov 2024 15:32:05 +1300 Subject: [PATCH 25/25] Simplify to remove getattr(pa, "array", None) call Co-Authored-By: Dongdong Tian --- pygmt/tests/test_clib_virtualfile_from_vectors.py | 8 +++++--- pygmt/tests/test_text.py | 10 ++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pygmt/tests/test_clib_virtualfile_from_vectors.py b/pygmt/tests/test_clib_virtualfile_from_vectors.py index 55702f114b6..b76a9bfe168 100644 --- a/pygmt/tests/test_clib_virtualfile_from_vectors.py +++ b/pygmt/tests/test_clib_virtualfile_from_vectors.py @@ -15,8 +15,10 @@ try: import pyarrow as pa + + pa_array = pa.array except ImportError: - pa = None + pa_array = None @pytest.fixture(scope="module", name="dtypes") @@ -65,8 +67,8 @@ def test_virtualfile_from_vectors(dtypes): pytest.param(np.array, {"dtype": np.str_}, id="str"), pytest.param(np.array, {"dtype": np.object_}, id="object"), pytest.param( - getattr(pa, "array", None), - {}, # pa.string() + pa_array, + {}, # {"type": pa.string()} marks=skip_if_no(package="pyarrow"), id="pyarrow", ), diff --git a/pygmt/tests/test_text.py b/pygmt/tests/test_text.py index ce59d7d4bd6..593c07a7b4d 100644 --- a/pygmt/tests/test_text.py +++ b/pygmt/tests/test_text.py @@ -13,8 +13,10 @@ try: import pyarrow as pa + + pa_array = pa.array except ImportError: - pa = None + pa_array = None TEST_DATA_DIR = Path(__file__).parent / "data" POINTS_DATA = TEST_DATA_DIR / "points.txt" @@ -60,11 +62,7 @@ def test_text_single_line_of_text(region, projection): [ list, pytest.param(np.array, id="numpy"), - pytest.param( - getattr(pa, "array", None), - marks=skip_if_no(package="pyarrow"), - id="pyarrow", - ), + pytest.param(pa_array, marks=skip_if_no(package="pyarrow"), id="pyarrow"), ], ) def test_text_multiple_lines_of_text(region, projection, array_func):