diff --git a/doc/api/index.rst b/doc/api/index.rst index 8758ef10423..547ab15efb2 100644 --- a/doc/api/index.rst +++ b/doc/api/index.rst @@ -283,8 +283,8 @@ the :meth:`~pygmt.clib.Session.call_module` method: Passing memory blocks between Python data objects (e.g. :class:`numpy.ndarray`, :class:`pandas.Series`, :class:`xarray.DataArray`, etc) and GMT happens through -*virtual files*. These methods are context managers that automate the -conversion of Python variables to GMT virtual files: +*virtual files*. These methods are context managers that automate the conversion of +Python objects to and from GMT virtual files: .. autosummary:: :toctree: generated @@ -294,6 +294,7 @@ conversion of Python variables to GMT virtual files: clib.Session.virtualfile_from_grid clib.Session.virtualfile_in clib.Session.virtualfile_out + clib.Session.virtualfile_to_dataset Low level access (these are mostly used by the :mod:`pygmt.clib` package): diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index d52f66501af..e2feb9cf857 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1738,6 +1738,127 @@ def read_virtualfile( dtype = {"dataset": _GMT_DATASET, "grid": _GMT_GRID}[kind] return ctp.cast(pointer, ctp.POINTER(dtype)) + def virtualfile_to_dataset( + self, + output_type: Literal["pandas", "numpy", "file"], + vfname: str, + column_names: list[str] | None = None, + ) -> pd.DataFrame | np.ndarray | None: + """ + Output a tabular dataset stored in a virtual file to a different format. + + The format of the dataset is determined by the ``output_type`` parameter. + + Parameters + ---------- + output_type + Desired output type of the result data. + + - ``"pandas"`` will return a :class:`pandas.DataFrame` object. + - ``"numpy"`` will return a :class:`numpy.ndarray` object. + - ``"file"`` means the result was saved to a file and will return ``None``. + vfname + The virtual file name that stores the result data. Required for ``"pandas"`` + and ``"numpy"`` output type. + column_names + The column names for the :class:`pandas.DataFrame` output. + + Returns + ------- + result + The result dataset. If ``output_type="file"`` returns ``None``. + + Examples + -------- + >>> from pathlib import Path + >>> import numpy as np + >>> import pandas as pd + >>> + >>> from pygmt.helpers import GMTTempFile + >>> from pygmt.clib import Session + >>> + >>> with GMTTempFile(suffix=".txt") as tmpfile: + ... # prepare the sample data file + ... with open(tmpfile.name, mode="w") as fp: + ... print(">", file=fp) + ... print("1.0 2.0 3.0 TEXT1 TEXT23", file=fp) + ... print("4.0 5.0 6.0 TEXT4 TEXT567", file=fp) + ... print(">", file=fp) + ... print("7.0 8.0 9.0 TEXT8 TEXT90", file=fp) + ... print("10.0 11.0 12.0 TEXT123 TEXT456789", file=fp) + ... + ... # file output + ... with Session() as lib: + ... with GMTTempFile(suffix=".txt") as outtmp: + ... with lib.virtualfile_out( + ... kind="dataset", fname=outtmp.name + ... ) as vouttbl: + ... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td") + ... result = lib.virtualfile_to_dataset( + ... output_type="file", vfname=vouttbl + ... ) + ... assert result is None + ... assert Path(outtmp.name).stat().st_size > 0 + ... + ... # numpy output + ... with Session() as lib: + ... with lib.virtualfile_out(kind="dataset") as vouttbl: + ... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td") + ... outnp = lib.virtualfile_to_dataset( + ... output_type="numpy", vfname=vouttbl + ... ) + ... assert isinstance(outnp, np.ndarray) + ... + ... # pandas output + ... with Session() as lib: + ... with lib.virtualfile_out(kind="dataset") as vouttbl: + ... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td") + ... outpd = lib.virtualfile_to_dataset( + ... output_type="pandas", vfname=vouttbl + ... ) + ... assert isinstance(outpd, pd.DataFrame) + ... + ... # pandas output with specified column names + ... with Session() as lib: + ... with lib.virtualfile_out(kind="dataset") as vouttbl: + ... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td") + ... outpd2 = lib.virtualfile_to_dataset( + ... output_type="pandas", + ... vfname=vouttbl, + ... column_names=["col1", "col2", "col3", "coltext"], + ... ) + ... assert isinstance(outpd2, pd.DataFrame) + >>> outnp + array([[1.0, 2.0, 3.0, 'TEXT1 TEXT23'], + [4.0, 5.0, 6.0, 'TEXT4 TEXT567'], + [7.0, 8.0, 9.0, 'TEXT8 TEXT90'], + [10.0, 11.0, 12.0, 'TEXT123 TEXT456789']], dtype=object) + >>> outpd + 0 1 2 3 + 0 1.0 2.0 3.0 TEXT1 TEXT23 + 1 4.0 5.0 6.0 TEXT4 TEXT567 + 2 7.0 8.0 9.0 TEXT8 TEXT90 + 3 10.0 11.0 12.0 TEXT123 TEXT456789 + >>> outpd2 + col1 col2 col3 coltext + 0 1.0 2.0 3.0 TEXT1 TEXT23 + 1 4.0 5.0 6.0 TEXT4 TEXT567 + 2 7.0 8.0 9.0 TEXT8 TEXT90 + 3 10.0 11.0 12.0 TEXT123 TEXT456789 + """ + if output_type == "file": # Already written to file, so return None + return None + + # Read the virtual file as a GMT dataset and convert to pandas.DataFrame + result = self.read_virtualfile(vfname, kind="dataset").contents.to_dataframe() + if output_type == "numpy": # numpy.ndarray output + return result.to_numpy() + + # Assign column names + if column_names is not None: + result.columns = column_names + return result # pandas.DataFrame output + def extract_region(self): """ Extract the WESN bounding box of the currently active figure.