Skip to content

Commit

Permalink
153 add a new "create vgp features from gpmdb" command (#155)
Browse files Browse the repository at this point in the history
* add a new subcommand -- gpmdb

* future-proof column names

* update gpmdb description
  • Loading branch information
michaelchin authored Dec 10, 2023
1 parent 0d39ae7 commit e50b40a
Show file tree
Hide file tree
Showing 2 changed files with 292 additions and 0 deletions.
9 changes: 9 additions & 0 deletions gplately/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
convert_xy_to_gplates,
diagnose_rotations,
fix_crossovers,
gpmdb,
remove_plate_rotations,
resolve_topologies,
rotation_tools,
Expand Down Expand Up @@ -237,6 +238,14 @@ def main():
)
subduction_convergence.add_arguments(subduction_convergence_cmd)

# add gpmdb sub-command
gpmdb_cmd = subparser.add_parser(
"gpmdb",
help="retrieve data from https://www.gpmdb.net and create VGP features",
add_help=True,
)
gpmdb.add_arguments(gpmdb_cmd)

# combine command arguments
combine_cmd.set_defaults(func=_run_combine_feature_collections)
combine_cmd.add_argument("combine_first_input_file", type=str)
Expand Down
283 changes: 283 additions & 0 deletions gplately/ptt/gpmdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,283 @@
import argparse
import json
import math
import os
import sys
from pathlib import Path

import numpy as np
import pandas as pd
import pygplates
import requests
from plate_model_manager import PlateModelManager

# you need pygplates and pandas to run this script
# using gplately env is recommended
# `micromamba activate gplately`

DATA_CACHE_DIR = "data-cache"
QUERY_DATA_URL = "https://www.gpmdb.net/get_query_data/"
QUERY_DATA_FILENAME = "query-data.json"
PMAG_RESULT_URL = "https://gpmdb.net/get_PMAGRESULT_data/?fmt=json"
PMAG_RESULT_FILENAME = "pmag-result.json"


def create_vgp_feature(
RESULTNO,
PLAT,
PLONG,
INC,
DEC,
DP,
DM,
LOMAGAGE,
HIMAGAGE,
LOWAGE,
HIGHAGE,
sample_site_position,
plate_id,
):
"""function to create VGP feature"""
# Paper reference or URL/DOI.
PAPER_URL = "https://doi.org/10.1016/j.earscirev.2022.104258"

# Create feature
vgp_feature = pygplates.Feature(pygplates.FeatureType.gpml_virtual_geomagnetic_pole)
vgp_feature.set_name(str(RESULTNO))
vgp_feature.set_description(PAPER_URL)
vgp_feature.set(
pygplates.PropertyName.gpml_average_sample_site_position,
pygplates.GmlPoint(sample_site_position),
)
vgp_feature.set(
pygplates.PropertyName.gpml_pole_position,
pygplates.GmlPoint(pygplates.PointOnSphere(PLAT, PLONG)),
)
vgp_feature.set(
pygplates.PropertyName.gpml_pole_a95, pygplates.XsDouble(math.sqrt(DP + DM))
)
vgp_feature.set(pygplates.PropertyName.gpml_pole_dp, pygplates.XsDouble(DP))
vgp_feature.set(pygplates.PropertyName.gpml_pole_dm, pygplates.XsDouble(DM))
vgp_feature.set(
pygplates.PropertyName.gpml_average_inclination, pygplates.XsDouble(INC)
)
vgp_feature.set(
pygplates.PropertyName.gpml_average_declination, pygplates.XsDouble(DEC)
)
vgp_feature.set(
pygplates.PropertyName.gpml_average_age,
pygplates.XsDouble((LOMAGAGE + HIMAGAGE) / 2.0),
)
vgp_feature.set_valid_time(HIGHAGE, LOWAGE)

vgp_feature.set_reconstruction_plate_id(plate_id)

return vgp_feature


def assign_plate_id(df, static_polygon_file, rotation_file):
"""assign plate ids for sites"""
pids = []
sites = []
plate_partitioner = pygplates.PlatePartitioner(
static_polygon_file,
rotation_file,
)
for index, row in df.iterrows():
# print(index)
sample_site_position = pygplates.PointOnSphere(row["SLAT"], row["SLONG"])
reconstructed_static_polygon = plate_partitioner.partition_point(
sample_site_position
)
if reconstructed_static_polygon:
plate_id = (
reconstructed_static_polygon.get_feature().get_reconstruction_plate_id()
)
else:
plate_id = 0
sites.append(sample_site_position)
pids.append(plate_id)
return sites, pids


class ArgParser(argparse.ArgumentParser):
def error(self, message):
sys.stderr.write(f"error: {message}\n")
self.print_help()
sys.exit(1)


def add_arguments(parser: argparse.ArgumentParser):
"""add command line argument parser"""
parser.formatter_class = argparse.RawDescriptionHelpFormatter
parser.description = __description__

parser.set_defaults(func=main)

parser.add_argument(
"-m", "--model", type=str, dest="model_name", default="Muller2022"
)
parser.add_argument("-o", "--outfile", type=str, dest="outfile")


__description__ = """Retrieve paleomagnetic data from https://www.gpmdb.net, create GPlates-compatible VGP features and save the VGP features in a .gpmlz file.
The two URLs being used are
- https://www.gpmdb.net/get_query_data/
- https://www.gpmdb.net/get_PMAGRESULT_data/?fmt=json
This command will create two files.
- the .gpmlz file -- contains the GPlates-compatible VGP features
- the pmag-result.csv -- contains the raw paleomagnetic data
Usage example: gplately gpmdb -m zahirovic2022 -o test.gpmlz
The default reconstruction model being used to assign plate IDs is "Muller2022". User can choose to specify the model with "-m/--model".
The avaliable model names can be found with command `pmm ls` (plate-model-manager https://pypi.org/project/plate-model-manager/; use gplately conda env).
User can specify the output .gpmlz file name with "-o/--outfile". By default, the output file name will be "vgp_features_{model name}.gmplz".
The `gplately gpmdb` will use model "Muller2022" and create the output file "vgp_features_Muller2022.gmplz".
The file name for the raw paleomagnetic data is always "pmag-result.csv".
"""


def main(args):
Path(DATA_CACHE_DIR).mkdir(parents=True, exist_ok=True)

# get query data
if not os.path.isfile(f"{DATA_CACHE_DIR}/{QUERY_DATA_FILENAME}"):
response = requests.get(QUERY_DATA_URL, verify=False)
query_data = response.json()
with open(f"{DATA_CACHE_DIR}/{QUERY_DATA_FILENAME}", "w+") as outfile:
outfile.write(json.dumps(query_data))
else:
with open(f"{DATA_CACHE_DIR}/{QUERY_DATA_FILENAME}", "r") as infile:
query_data = json.load(infile)

columns = []
for c in query_data["columns"]:
if isinstance(c, list):
columns += c
elif isinstance(c, str):
columns.append(c)
else:
raise Exception(f"Invalid comlumn type: {type(c)}")

df_query = pd.DataFrame(np.array(query_data["data"]), columns=columns)
df_query = df_query.sort_values(by=["RESULTNO"], ignore_index=True)

# get pmag-result data
if not os.path.isfile(f"{DATA_CACHE_DIR}/{PMAG_RESULT_FILENAME}"):
response = requests.get(PMAG_RESULT_URL, verify=False)
pmagresult_data = response.json()
with open(f"{DATA_CACHE_DIR}/{PMAG_RESULT_FILENAME}", "w+") as outfile:
outfile.write(json.dumps(pmagresult_data))
else:
with open(f"{DATA_CACHE_DIR}/{PMAG_RESULT_FILENAME}", "r") as infile:
pmagresult_data = json.load(infile)

columns = []
for c in pmagresult_data["columns"]:
if isinstance(c, list):
columns += c
elif isinstance(c, str):
columns.append(c)
else:
raise Exception(f"Invalid comlumn type: {type(c)}")

df_pmagresult = pd.DataFrame(np.array(pmagresult_data["data"]), columns=columns)
df_pmagresult = df_pmagresult.sort_values(by=["RESULTNO"], ignore_index=True)

df_pmagresult["LOWAGE"] = df_query["LOWAGE"]
df_pmagresult["HIGHAGE"] = df_query["HIGHAGE"]

df = df_pmagresult[
[
"RESULTNO",
"SLAT",
"SLONG",
"PLAT",
"PLONG",
"INC",
"DEC",
"DP",
"DM",
"LOMAGAGE",
"HIMAGAGE",
"LOWAGE",
"HIGHAGE",
]
]

pm_manager = PlateModelManager()
model = pm_manager.get_model(args.model_name, data_dir=".")

sites, pids = assign_plate_id(
df,
static_polygon_file=model.get_static_polygons(),
rotation_file=model.get_rotation_model(),
)

count = 0
vgp_features = []
for index, row in df.iterrows():
# print(index)
if row["DP"] is not None and row["DM"] is not None and row["INC"] is not None:
vgp_feature = create_vgp_feature(
RESULTNO=row["RESULTNO"],
PLAT=row["PLAT"],
PLONG=row["PLONG"],
INC=row["INC"],
DEC=row["DEC"],
DP=row["DP"],
DM=row["DM"],
LOMAGAGE=row["LOMAGAGE"],
HIMAGAGE=row["HIMAGAGE"],
LOWAGE=row["LOWAGE"],
HIGHAGE=row["HIGHAGE"],
sample_site_position=sites[index],
plate_id=pids[index],
)

# Add VGP feature to list.
vgp_features.append(vgp_feature)
else:
count += 1
# print(f"ignore row: {row}")

print(f"{count} rows have been ignored due to None values.")

# Save VGP features to file.
if not args.outfile:
outfile_name = f"vgp_features_{args.model_name}.gpmlz"
else:
outfile_name = args.outfile

pygplates.FeatureCollection(vgp_features).write(outfile_name)

df_pmagresult = df_pmagresult.drop(["ROCKUNITNO"], axis=1)
df_pmagresult.to_csv("pmag-result.csv", index=False)

print(
f"The files {outfile_name} and pmag-result.csv have been created successfully."
)


if __name__ == "__main__":
# The command-line parser.
parser = argparse.ArgumentParser(
description=__description__,
formatter_class=argparse.RawDescriptionHelpFormatter,
)

# add arguments
add_arguments(parser)

# Parse command-line options.
args = parser.parse_args()

# call main function
main(args)

0 comments on commit e50b40a

Please sign in to comment.