GenericMappingTools · seisman · Mar 11, 2024 · Mar 7, 2024 · Mar 7, 2024 · Mar 7, 2024
diff --git a/doc/api/index.rst b/doc/api/index.rst
@@ -294,6 +294,7 @@ conversion of Python variables to GMT virtual files:
     clib.Session.virtualfile_from_grid
     clib.Session.virtualfile_in
     clib.Session.virtualfile_out
+    clib.Session.virtualfile_to_dataset
 
 Low level access (these are mostly used by the :mod:`pygmt.clib` package):
 

diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py
@@ -1738,6 +1738,127 @@ def read_virtualfile(
         dtype = {"dataset": _GMT_DATASET, "grid": _GMT_GRID}[kind]
         return ctp.cast(pointer, ctp.POINTER(dtype))
 
+    def virtualfile_to_dataset(
+        self,
+        output_type: Literal["pandas", "numpy", "file"],
-        output_type: Literal["pandas", "numpy", "file"],
+        output_type: Literal["pandas", "numpy", "file"] = "pandas",
-        output_type: Literal["pandas", "numpy", "file"],
+        output_type: Literal["pandas", "numpy", "file"] = "pandas",
+        vfile: str,
+        column_names: list[str] | None = None,
+    ) -> pd.DataFrame | np.ndarray | None:
+        """
+        Output a tabular dataset stored in a virtual file to a different format.
+
+        The format of the dataset is determined by the ``output_type`` parameter.
+
+        Parameters
+        ----------
+        output_type
+            Desired output type of the result data.
+
+            - ``"pandas"`` will return a :class:`pandas.DataFrame` object.
+            - ``"numpy"`` will return a :class:`numpy.ndarray` object.
+            - ``"file"`` means the result was saved to a file and will return ``None``.
+        vfile
+            The virtual file name that stores the result data. Required for ``"pandas"``
+            and ``"numpy"`` output type.
+        column_names
+            The column names for the :class:`pandas.DataFrame` output.
+
+        Returns
+        -------
+        result
+            The result dataset. If ``output_type="file"`` returns ``None``.
+
+        Examples
+        --------
+        >>> from pathlib import Path
+        >>> import numpy as np
+        >>> import pandas as pd
+        >>>
+        >>> from pygmt.helpers import GMTTempFile
+        >>> from pygmt.clib import Session
+        >>>
+        >>> with GMTTempFile(suffix=".txt") as tmpfile:
+        ...     # prepare the sample data file
+        ...     with open(tmpfile.name, mode="w") as fp:
+        ...         print(">", file=fp)
+        ...         print("1.0 2.0 3.0 TEXT1 TEXT23", file=fp)
+        ...         print("4.0 5.0 6.0 TEXT4 TEXT567", file=fp)
+        ...         print(">", file=fp)
+        ...         print("7.0 8.0 9.0 TEXT8 TEXT90", file=fp)
+        ...         print("10.0 11.0 12.0 TEXT123 TEXT456789", file=fp)
+        ...
+        ...     # file output
+        ...     with Session() as lib:
+        ...         with GMTTempFile(suffix=".txt") as outtmp:
+        ...             with lib.virtualfile_out(
+        ...                 kind="dataset", fname=outtmp.name
+        ...             ) as vouttbl:
+        ...                 lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td")
+        ...                 result = lib.virtualfile_to_dataset(
+        ...                     output_type="file", vfile=vouttbl
+        ...                 )
+        ...                 assert result is None
+        ...                 assert Path(outtmp.name).stat().st_size > 0
+        ...
+        ...     # numpy output
+        ...     with Session() as lib:
+        ...         with lib.virtualfile_out(kind="dataset") as vouttbl:
+        ...             lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td")
+        ...             outnp = lib.virtualfile_to_dataset(
+        ...                 output_type="numpy", vfile=vouttbl
+        ...             )
+        ...     assert isinstance(outnp, np.ndarray)
+        ...
+        ...     # pandas output
+        ...     with Session() as lib:
+        ...         with lib.virtualfile_out(kind="dataset") as vouttbl:
+        ...             lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td")
+        ...             outpd = lib.virtualfile_to_dataset(
+        ...                 output_type="pandas", vfile=vouttbl
+        ...             )
+        ...     assert isinstance(outpd, pd.DataFrame)
+        ...
+        ...     # pandas output with specified column names
+        ...     with Session() as lib:
+        ...         with lib.virtualfile_out(kind="dataset") as vouttbl:
+        ...             lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td")
+        ...             outpd2 = lib.virtualfile_to_dataset(
+        ...                 output_type="pandas",
+        ...                 vfile=vouttbl,
+        ...                 column_names=["col1", "col2", "col3", "coltext"],
+        ...             )
+        ...     assert isinstance(outpd2, pd.DataFrame)
+        >>> outnp
+        array([[1.0, 2.0, 3.0, 'TEXT1 TEXT23'],
+               [4.0, 5.0, 6.0, 'TEXT4 TEXT567'],
+               [7.0, 8.0, 9.0, 'TEXT8 TEXT90'],
+               [10.0, 11.0, 12.0, 'TEXT123 TEXT456789']], dtype=object)
+        >>> outpd
+              0     1     2                   3
+        0   1.0   2.0   3.0        TEXT1 TEXT23
+        1   4.0   5.0   6.0       TEXT4 TEXT567
+        2   7.0   8.0   9.0        TEXT8 TEXT90
+        3  10.0  11.0  12.0  TEXT123 TEXT456789
+        >>> outpd2
+           col1  col2  col3             coltext
+        0   1.0   2.0   3.0        TEXT1 TEXT23
+        1   4.0   5.0   6.0       TEXT4 TEXT567
+        2   7.0   8.0   9.0        TEXT8 TEXT90
+        3  10.0  11.0  12.0  TEXT123 TEXT456789
+        """
+        if output_type == "file":  # Already written to file, so return None
+            return None
+
+        # Read the virtual file as a GMT dataset and convert to pandas.DataFrame
+        result = self.read_virtualfile(vfile, kind="dataset").contents.to_dataframe()
+        if output_type == "numpy":  # numpy.ndarray output
+            return result.to_numpy()
+
+        # Assign column names
+        if column_names is not None:
+            result.columns = column_names
+        return result  # pandas.DataFrame output
+
     def extract_region(self):
         """
         Extract the WESN bounding box of the currently active figure.