-
Notifications
You must be signed in to change notification settings - Fork 219
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
clib: Add virtualfile_to_dataset method for converting virtualfile to a dataset #3083
Changes from 10 commits
6827c82
bd166fe
d376a74
193bd05
c9e482a
ce029b2
9640c26
9f9f08d
796f1cc
711142c
aee0499
cd5b31d
bc7d844
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -1738,6 +1738,127 @@ def read_virtualfile( | |||||
dtype = {"dataset": _GMT_DATASET, "grid": _GMT_GRID}[kind] | ||||||
return ctp.cast(pointer, ctp.POINTER(dtype)) | ||||||
|
||||||
def virtualfile_to_dataset( | ||||||
self, | ||||||
output_type: Literal["pandas", "numpy", "file"], | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we set a default output type here? It looks like we're using
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It makes no differences because we always call the function with the
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, it doesn't make any difference in the PyGMT modules, but this is a good central location to document that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, not saying that |
||||||
vfile: str, | ||||||
column_names: list[str] | None = None, | ||||||
) -> pd.DataFrame | np.ndarray | None: | ||||||
""" | ||||||
Output a tabular dataset stored in a virtual file to a different format. | ||||||
|
||||||
The format of the dataset is determined by the ``output_type`` parameter. | ||||||
|
||||||
Parameters | ||||||
---------- | ||||||
output_type | ||||||
Desired output type of the result data. | ||||||
|
||||||
- ``"pandas"`` will return a :class:`pandas.DataFrame` object. | ||||||
- ``"numpy"`` will return a :class:`numpy.ndarray` object. | ||||||
- ``"file"`` means the result was saved to a file and will return ``None``. | ||||||
vfile | ||||||
The virtual file name that stores the result data. Required for ``"pandas"`` | ||||||
and ``"numpy"`` output type. | ||||||
column_names | ||||||
The column names for the :class:`pandas.DataFrame` output. | ||||||
|
||||||
Returns | ||||||
------- | ||||||
result | ||||||
The result dataset. If ``output_type="file"`` returns ``None``. | ||||||
|
||||||
Examples | ||||||
-------- | ||||||
>>> from pathlib import Path | ||||||
>>> import numpy as np | ||||||
>>> import pandas as pd | ||||||
>>> | ||||||
>>> from pygmt.helpers import GMTTempFile | ||||||
>>> from pygmt.clib import Session | ||||||
>>> | ||||||
>>> with GMTTempFile(suffix=".txt") as tmpfile: | ||||||
... # prepare the sample data file | ||||||
... with open(tmpfile.name, mode="w") as fp: | ||||||
... print(">", file=fp) | ||||||
... print("1.0 2.0 3.0 TEXT1 TEXT23", file=fp) | ||||||
... print("4.0 5.0 6.0 TEXT4 TEXT567", file=fp) | ||||||
... print(">", file=fp) | ||||||
... print("7.0 8.0 9.0 TEXT8 TEXT90", file=fp) | ||||||
... print("10.0 11.0 12.0 TEXT123 TEXT456789", file=fp) | ||||||
... | ||||||
... # file output | ||||||
... with Session() as lib: | ||||||
... with GMTTempFile(suffix=".txt") as outtmp: | ||||||
... with lib.virtualfile_out( | ||||||
... kind="dataset", fname=outtmp.name | ||||||
... ) as vouttbl: | ||||||
... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td") | ||||||
... result = lib.virtualfile_to_dataset( | ||||||
... output_type="file", vfile=vouttbl | ||||||
... ) | ||||||
... assert result is None | ||||||
... assert Path(outtmp.name).stat().st_size > 0 | ||||||
... | ||||||
... # numpy output | ||||||
... with Session() as lib: | ||||||
... with lib.virtualfile_out(kind="dataset") as vouttbl: | ||||||
... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td") | ||||||
... outnp = lib.virtualfile_to_dataset( | ||||||
... output_type="numpy", vfile=vouttbl | ||||||
... ) | ||||||
... assert isinstance(outnp, np.ndarray) | ||||||
... | ||||||
... # pandas output | ||||||
... with Session() as lib: | ||||||
... with lib.virtualfile_out(kind="dataset") as vouttbl: | ||||||
... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td") | ||||||
... outpd = lib.virtualfile_to_dataset( | ||||||
... output_type="pandas", vfile=vouttbl | ||||||
... ) | ||||||
... assert isinstance(outpd, pd.DataFrame) | ||||||
... | ||||||
... # pandas output with specified column names | ||||||
... with Session() as lib: | ||||||
... with lib.virtualfile_out(kind="dataset") as vouttbl: | ||||||
... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td") | ||||||
... outpd2 = lib.virtualfile_to_dataset( | ||||||
... output_type="pandas", | ||||||
... vfile=vouttbl, | ||||||
... column_names=["col1", "col2", "col3", "coltext"], | ||||||
... ) | ||||||
... assert isinstance(outpd2, pd.DataFrame) | ||||||
>>> outnp | ||||||
array([[1.0, 2.0, 3.0, 'TEXT1 TEXT23'], | ||||||
[4.0, 5.0, 6.0, 'TEXT4 TEXT567'], | ||||||
[7.0, 8.0, 9.0, 'TEXT8 TEXT90'], | ||||||
[10.0, 11.0, 12.0, 'TEXT123 TEXT456789']], dtype=object) | ||||||
>>> outpd | ||||||
0 1 2 3 | ||||||
0 1.0 2.0 3.0 TEXT1 TEXT23 | ||||||
1 4.0 5.0 6.0 TEXT4 TEXT567 | ||||||
2 7.0 8.0 9.0 TEXT8 TEXT90 | ||||||
3 10.0 11.0 12.0 TEXT123 TEXT456789 | ||||||
>>> outpd2 | ||||||
col1 col2 col3 coltext | ||||||
0 1.0 2.0 3.0 TEXT1 TEXT23 | ||||||
1 4.0 5.0 6.0 TEXT4 TEXT567 | ||||||
2 7.0 8.0 9.0 TEXT8 TEXT90 | ||||||
3 10.0 11.0 12.0 TEXT123 TEXT456789 | ||||||
""" | ||||||
if output_type == "file": # Already written to file, so return None | ||||||
return None | ||||||
|
||||||
# Read the virtual file as a GMT dataset and convert to pandas.DataFrame | ||||||
result = self.read_virtualfile(vfile, kind="dataset").contents.to_dataframe() | ||||||
if output_type == "numpy": # numpy.ndarray output | ||||||
return result.to_numpy() | ||||||
|
||||||
# Assign column names | ||||||
if column_names is not None: | ||||||
result.columns = column_names | ||||||
return result # pandas.DataFrame output | ||||||
|
||||||
def extract_region(self): | ||||||
""" | ||||||
Extract the WESN bounding box of the currently active figure. | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At L287 above, could you change the sentence to read "These methods are context managers that automate the conversion of Python variables to and from GMT virtual files"? Since we can convert GMT virtualfiles to Python objects now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done in aee0499.