From e50b40acefd0a09199fe939e0ed3b0729d591362 Mon Sep 17 00:00:00 2001 From: michaelchin Date: Mon, 11 Dec 2023 10:41:24 +1100 Subject: [PATCH] 153 add a new "create vgp features from gpmdb" command (#155) * add a new subcommand -- gpmdb * future-proof column names * update gpmdb description --- gplately/__main__.py | 9 ++ gplately/ptt/gpmdb.py | 283 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 292 insertions(+) create mode 100644 gplately/ptt/gpmdb.py diff --git a/gplately/__main__.py b/gplately/__main__.py index ff7621ec..fdc8302c 100644 --- a/gplately/__main__.py +++ b/gplately/__main__.py @@ -13,6 +13,7 @@ convert_xy_to_gplates, diagnose_rotations, fix_crossovers, + gpmdb, remove_plate_rotations, resolve_topologies, rotation_tools, @@ -237,6 +238,14 @@ def main(): ) subduction_convergence.add_arguments(subduction_convergence_cmd) + # add gpmdb sub-command + gpmdb_cmd = subparser.add_parser( + "gpmdb", + help="retrieve data from https://www.gpmdb.net and create VGP features", + add_help=True, + ) + gpmdb.add_arguments(gpmdb_cmd) + # combine command arguments combine_cmd.set_defaults(func=_run_combine_feature_collections) combine_cmd.add_argument("combine_first_input_file", type=str) diff --git a/gplately/ptt/gpmdb.py b/gplately/ptt/gpmdb.py new file mode 100644 index 00000000..c8ea3e00 --- /dev/null +++ b/gplately/ptt/gpmdb.py @@ -0,0 +1,283 @@ +import argparse +import json +import math +import os +import sys +from pathlib import Path + +import numpy as np +import pandas as pd +import pygplates +import requests +from plate_model_manager import PlateModelManager + +# you need pygplates and pandas to run this script +# using gplately env is recommended +# `micromamba activate gplately` + +DATA_CACHE_DIR = "data-cache" +QUERY_DATA_URL = "https://www.gpmdb.net/get_query_data/" +QUERY_DATA_FILENAME = "query-data.json" +PMAG_RESULT_URL = "https://gpmdb.net/get_PMAGRESULT_data/?fmt=json" +PMAG_RESULT_FILENAME = "pmag-result.json" + + +def create_vgp_feature( + RESULTNO, + PLAT, + PLONG, + INC, + DEC, + DP, + DM, + LOMAGAGE, + HIMAGAGE, + LOWAGE, + HIGHAGE, + sample_site_position, + plate_id, +): + """function to create VGP feature""" + # Paper reference or URL/DOI. + PAPER_URL = "https://doi.org/10.1016/j.earscirev.2022.104258" + + # Create feature + vgp_feature = pygplates.Feature(pygplates.FeatureType.gpml_virtual_geomagnetic_pole) + vgp_feature.set_name(str(RESULTNO)) + vgp_feature.set_description(PAPER_URL) + vgp_feature.set( + pygplates.PropertyName.gpml_average_sample_site_position, + pygplates.GmlPoint(sample_site_position), + ) + vgp_feature.set( + pygplates.PropertyName.gpml_pole_position, + pygplates.GmlPoint(pygplates.PointOnSphere(PLAT, PLONG)), + ) + vgp_feature.set( + pygplates.PropertyName.gpml_pole_a95, pygplates.XsDouble(math.sqrt(DP + DM)) + ) + vgp_feature.set(pygplates.PropertyName.gpml_pole_dp, pygplates.XsDouble(DP)) + vgp_feature.set(pygplates.PropertyName.gpml_pole_dm, pygplates.XsDouble(DM)) + vgp_feature.set( + pygplates.PropertyName.gpml_average_inclination, pygplates.XsDouble(INC) + ) + vgp_feature.set( + pygplates.PropertyName.gpml_average_declination, pygplates.XsDouble(DEC) + ) + vgp_feature.set( + pygplates.PropertyName.gpml_average_age, + pygplates.XsDouble((LOMAGAGE + HIMAGAGE) / 2.0), + ) + vgp_feature.set_valid_time(HIGHAGE, LOWAGE) + + vgp_feature.set_reconstruction_plate_id(plate_id) + + return vgp_feature + + +def assign_plate_id(df, static_polygon_file, rotation_file): + """assign plate ids for sites""" + pids = [] + sites = [] + plate_partitioner = pygplates.PlatePartitioner( + static_polygon_file, + rotation_file, + ) + for index, row in df.iterrows(): + # print(index) + sample_site_position = pygplates.PointOnSphere(row["SLAT"], row["SLONG"]) + reconstructed_static_polygon = plate_partitioner.partition_point( + sample_site_position + ) + if reconstructed_static_polygon: + plate_id = ( + reconstructed_static_polygon.get_feature().get_reconstruction_plate_id() + ) + else: + plate_id = 0 + sites.append(sample_site_position) + pids.append(plate_id) + return sites, pids + + +class ArgParser(argparse.ArgumentParser): + def error(self, message): + sys.stderr.write(f"error: {message}\n") + self.print_help() + sys.exit(1) + + +def add_arguments(parser: argparse.ArgumentParser): + """add command line argument parser""" + parser.formatter_class = argparse.RawDescriptionHelpFormatter + parser.description = __description__ + + parser.set_defaults(func=main) + + parser.add_argument( + "-m", "--model", type=str, dest="model_name", default="Muller2022" + ) + parser.add_argument("-o", "--outfile", type=str, dest="outfile") + + +__description__ = """Retrieve paleomagnetic data from https://www.gpmdb.net, create GPlates-compatible VGP features and save the VGP features in a .gpmlz file. + + The two URLs being used are + - https://www.gpmdb.net/get_query_data/ + - https://www.gpmdb.net/get_PMAGRESULT_data/?fmt=json + + This command will create two files. + - the .gpmlz file -- contains the GPlates-compatible VGP features + - the pmag-result.csv -- contains the raw paleomagnetic data + + Usage example: gplately gpmdb -m zahirovic2022 -o test.gpmlz + + The default reconstruction model being used to assign plate IDs is "Muller2022". User can choose to specify the model with "-m/--model". + The avaliable model names can be found with command `pmm ls` (plate-model-manager https://pypi.org/project/plate-model-manager/; use gplately conda env). + + User can specify the output .gpmlz file name with "-o/--outfile". By default, the output file name will be "vgp_features_{model name}.gmplz". + + The `gplately gpmdb` will use model "Muller2022" and create the output file "vgp_features_Muller2022.gmplz". + + The file name for the raw paleomagnetic data is always "pmag-result.csv". + + """ + + +def main(args): + Path(DATA_CACHE_DIR).mkdir(parents=True, exist_ok=True) + + # get query data + if not os.path.isfile(f"{DATA_CACHE_DIR}/{QUERY_DATA_FILENAME}"): + response = requests.get(QUERY_DATA_URL, verify=False) + query_data = response.json() + with open(f"{DATA_CACHE_DIR}/{QUERY_DATA_FILENAME}", "w+") as outfile: + outfile.write(json.dumps(query_data)) + else: + with open(f"{DATA_CACHE_DIR}/{QUERY_DATA_FILENAME}", "r") as infile: + query_data = json.load(infile) + + columns = [] + for c in query_data["columns"]: + if isinstance(c, list): + columns += c + elif isinstance(c, str): + columns.append(c) + else: + raise Exception(f"Invalid comlumn type: {type(c)}") + + df_query = pd.DataFrame(np.array(query_data["data"]), columns=columns) + df_query = df_query.sort_values(by=["RESULTNO"], ignore_index=True) + + # get pmag-result data + if not os.path.isfile(f"{DATA_CACHE_DIR}/{PMAG_RESULT_FILENAME}"): + response = requests.get(PMAG_RESULT_URL, verify=False) + pmagresult_data = response.json() + with open(f"{DATA_CACHE_DIR}/{PMAG_RESULT_FILENAME}", "w+") as outfile: + outfile.write(json.dumps(pmagresult_data)) + else: + with open(f"{DATA_CACHE_DIR}/{PMAG_RESULT_FILENAME}", "r") as infile: + pmagresult_data = json.load(infile) + + columns = [] + for c in pmagresult_data["columns"]: + if isinstance(c, list): + columns += c + elif isinstance(c, str): + columns.append(c) + else: + raise Exception(f"Invalid comlumn type: {type(c)}") + + df_pmagresult = pd.DataFrame(np.array(pmagresult_data["data"]), columns=columns) + df_pmagresult = df_pmagresult.sort_values(by=["RESULTNO"], ignore_index=True) + + df_pmagresult["LOWAGE"] = df_query["LOWAGE"] + df_pmagresult["HIGHAGE"] = df_query["HIGHAGE"] + + df = df_pmagresult[ + [ + "RESULTNO", + "SLAT", + "SLONG", + "PLAT", + "PLONG", + "INC", + "DEC", + "DP", + "DM", + "LOMAGAGE", + "HIMAGAGE", + "LOWAGE", + "HIGHAGE", + ] + ] + + pm_manager = PlateModelManager() + model = pm_manager.get_model(args.model_name, data_dir=".") + + sites, pids = assign_plate_id( + df, + static_polygon_file=model.get_static_polygons(), + rotation_file=model.get_rotation_model(), + ) + + count = 0 + vgp_features = [] + for index, row in df.iterrows(): + # print(index) + if row["DP"] is not None and row["DM"] is not None and row["INC"] is not None: + vgp_feature = create_vgp_feature( + RESULTNO=row["RESULTNO"], + PLAT=row["PLAT"], + PLONG=row["PLONG"], + INC=row["INC"], + DEC=row["DEC"], + DP=row["DP"], + DM=row["DM"], + LOMAGAGE=row["LOMAGAGE"], + HIMAGAGE=row["HIMAGAGE"], + LOWAGE=row["LOWAGE"], + HIGHAGE=row["HIGHAGE"], + sample_site_position=sites[index], + plate_id=pids[index], + ) + + # Add VGP feature to list. + vgp_features.append(vgp_feature) + else: + count += 1 + # print(f"ignore row: {row}") + + print(f"{count} rows have been ignored due to None values.") + + # Save VGP features to file. + if not args.outfile: + outfile_name = f"vgp_features_{args.model_name}.gpmlz" + else: + outfile_name = args.outfile + + pygplates.FeatureCollection(vgp_features).write(outfile_name) + + df_pmagresult = df_pmagresult.drop(["ROCKUNITNO"], axis=1) + df_pmagresult.to_csv("pmag-result.csv", index=False) + + print( + f"The files {outfile_name} and pmag-result.csv have been created successfully." + ) + + +if __name__ == "__main__": + # The command-line parser. + parser = argparse.ArgumentParser( + description=__description__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + + # add arguments + add_arguments(parser) + + # Parse command-line options. + args = parser.parse_args() + + # call main function + main(args)