From d1bafbe4395f946de52bb1f38907b65a8ee96257 Mon Sep 17 00:00:00 2001 From: Scott Collins Date: Fri, 6 Dec 2024 08:56:37 -0800 Subject: [PATCH 1/2] Initial addition of the cmor_data_as_csv_demo.ipynb for SBN users --- notebooks/sbn/cmor_data_as_csv_demo.ipynb | 390 ++++++++++++++++++++++ 1 file changed, 390 insertions(+) create mode 100644 notebooks/sbn/cmor_data_as_csv_demo.ipynb diff --git a/notebooks/sbn/cmor_data_as_csv_demo.ipynb b/notebooks/sbn/cmor_data_as_csv_demo.ipynb new file mode 100644 index 0000000..f026fc9 --- /dev/null +++ b/notebooks/sbn/cmor_data_as_csv_demo.ipynb @@ -0,0 +1,390 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e38f8e6e-9970-49cf-84d5-9c4ec97375c8", + "metadata": {}, + "source": [ + "# Acquiring CMOR Meteoroid Stream Survey Data from the PDS Registry via Peppi" + ] + }, + { + "cell_type": "markdown", + "id": "4ebc7c38-ce25-42a0-86cc-b00a053d9fcc", + "metadata": {}, + "source": [ + "This notebook demonstrates how to download data from the SBN Canadian Meteor Orbit Radar (CMOR) PDS Bundle, and covert the results to both a `pandas` dataframe (for working with in memory), as well as a `.csv` file (for serialization to local disk).\n", + "\n", + "The `peppi` library is used as the primary interface for querying and retrieving data from the PDS Registry API." + ] + }, + { + "cell_type": "markdown", + "id": "316d2544-2560-46e1-aa79-6912aaf3bf4b", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f84531e-1e60-43f0-bd35-f05327678866", + "metadata": {}, + "outputs": [], + "source": [ + "# If the pds.peppi module has not been installed yet, you can run \"pip install pds.peppi\" within\n", + "# your development environment to get the latest version.\n", + "import pds.peppi as peppi\n", + "\n", + "import pdr\n", + "import os\n", + "\n", + "from pprint import pprint\n", + "from os.path import abspath, join\n", + "from urllib.request import urlretrieve" + ] + }, + { + "cell_type": "markdown", + "id": "e0391046-586e-4719-8480-469954776bac", + "metadata": {}, + "source": [ + "## Peppi initialization" + ] + }, + { + "cell_type": "markdown", + "id": "9bb6495e-c899-4b7c-99c8-25f9fa7e80d0", + "metadata": {}, + "source": [ + "To use the `peppi` library to pull data from the PDS Registry API, you must first create the `peppi.PDSRegistryClient` object that defines the endpoint of the PDS Registry API. This client object can then be used to create an instance of the `peppi.Products` class, where queries can be defined and results retrieved iteratively.\n", + "\n", + "Typically, at minimum you will need to know the LIDVID for the Bundle or Collection containing the desired data. For this demo, we will start with the CMOR Bundle LIDVID (`urn:nasa:pds:gbo.meteoroid.cmor.radar-survey::1.0`), and use it to drill into an associated Collection, then into the data itself.\n", + "\n", + "`peppi.Products` objects provide a `get` function, which configures a query for all products that match the provided LIDVID." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b582731f-f208-44dc-98e6-1aca0542fa34", + "metadata": {}, + "outputs": [], + "source": [ + "cmor_bundle_lidvid = \"urn:nasa:pds:gbo.meteoroid.cmor.radar-survey::1.0\"\n", + "\n", + "client = peppi.PDSRegistryClient()\n", + "products = peppi.Products(client).get(cmor_bundle_lidvid)" + ] + }, + { + "cell_type": "markdown", + "id": "aac89dd3-4406-456e-8ea5-4eac047db6da", + "metadata": {}, + "source": [ + "## Retrieving Query Results" + ] + }, + { + "cell_type": "markdown", + "id": "8b1f8de1-0807-4325-86af-6363e20c9ece", + "metadata": {}, + "source": [ + "`peppi` uses \"lazy-evaluation\" of queries defined on `Products` instances, meaning that no query is performed until one iterates over the `Products` instance itself to retrieve results. Below we can see how to gather results of our `get` query into a seperate list of products matching our desired LIDVID." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d923140-f5ad-44a8-80f7-9bf3eb3732e1", + "metadata": {}, + "outputs": [], + "source": [ + "matching_products = [product for product in products]" + ] + }, + { + "cell_type": "markdown", + "id": "9ccd1edb-af98-4348-a279-01336a46e467", + "metadata": {}, + "source": [ + "We can now examine specific properties of the matching Bundles to determine the LIDVID(s) of a Collection associated to this Bundle." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93739253-dd09-491e-9a29-9167802aaa0a", + "metadata": {}, + "outputs": [], + "source": [ + "for matching_product in matching_products:\n", + " print(matching_product.properties['lidvid'], \n", + " matching_product.properties['ops:Label_File_Info.ops:file_ref'],\n", + " matching_product.properties['pds:Bundle_Member_Entry.pds:lidvid_reference'])\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "id": "f8810f24-19c6-45bd-8a3a-4cfc284eac79", + "metadata": {}, + "source": [ + "There should be a matching product for the Bundle hosted at `sbnarchive.psi.edu`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "05812654-02c4-469a-b497-174c4c70bccb", + "metadata": {}, + "outputs": [], + "source": [ + "cmor_bundle = matching_products[0]" + ] + }, + { + "cell_type": "markdown", + "id": "fb7ec948-95df-4d47-866b-e328bcdfd68d", + "metadata": {}, + "source": [ + "Next, we query for one of the Collections in the Bundle, using the associated Bundle Member LIDVID Reference." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc67a1f5-eca6-4c6b-9d70-4b175fa1fcd7", + "metadata": {}, + "outputs": [], + "source": [ + "cmor_collection_lidvid = cmor_bundle.properties['pds:Bundle_Member_Entry.pds:lidvid_reference'][0]\n", + "products = peppi.Products(client).get(cmor_collection_lidvid)" + ] + }, + { + "cell_type": "markdown", + "id": "e51948c0-ca3a-4d8e-a1e1-60e0508f93a9", + "metadata": {}, + "source": [ + "Again, to actualy perform the query we iterate over the `peppi.Products` instance and gather the results into a separate list" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a575aab0-0497-4b48-bfe5-f03b924838f9", + "metadata": {}, + "outputs": [], + "source": [ + "matching_products = [product for product in products]\n", + "\n", + "for matching_product in matching_products:\n", + " print(matching_product.properties['lidvid'], \n", + " matching_product.properties['ops:Label_File_Info.ops:file_ref'], \n", + " matching_product.properties['ops:Data_File_Info.ops:file_ref'])\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "id": "e3df1e4c-b2cc-4151-882c-d69551a5ff94", + "metadata": {}, + "source": [ + "We should see results for the query hosted at hosted at `sbnarchive.psi.edu`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8afd5c1d-ef0a-49ba-bdee-c699c8014b92", + "metadata": {}, + "outputs": [], + "source": [ + "cmor_collection = matching_products[0]" + ] + }, + { + "cell_type": "markdown", + "id": "552b517f-007a-4ab6-a629-5a79ff388a78", + "metadata": {}, + "source": [ + "## Downloading Data From a Collection" + ] + }, + { + "cell_type": "markdown", + "id": "2326dc4a-2f12-4b33-8ee5-27e86a436252", + "metadata": {}, + "source": [ + "The `collection_gbo.meteoroid.cmor.radar-survey_data_inventory.csv` file referenced by the `ops:Data_File_Info.ops:file_ref` property contains a listing of LIDVIDs pointing to the individual data products that comprise this Collection. We now download both the PDS Label of the Collection, as well as the `.csv` data product, then use the `pdr` library to read the contents of the `.csv` file into a `pandas.DataFrame`. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0cc74e6f-5856-41a2-afcf-9aa887a3fe4d", + "metadata": {}, + "outputs": [], + "source": [ + "# Download the PDS4 label to current user's home directory\n", + "remote_label_path = cmor_collection.properties['ops:Label_File_Info.ops:file_ref'][0]\n", + "local_label_path = join(os.environ['HOME'], remote_label_path.split('/')[-1])\n", + "\n", + "print(f\"Downloading {remote_label_path} to {abspath(local_label_path)}\")\n", + "result = urlretrieve(remote_label_path, local_label_path)\n", + "\n", + "# Download the .csv product to current user's home directory\n", + "remote_data_path = cmor_collection.properties['ops:Data_File_Info.ops:file_ref'][0]\n", + "local_data_path = join(os.environ['HOME'], remote_data_path.split('/')[-1])\n", + "\n", + "print(f\"Downloading {remote_data_path} to {abspath(local_data_path)}\")\n", + "result = urlretrieve(remote_data_path, local_data_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4153621a-7aa7-4222-8e93-d8e3361911ea", + "metadata": {}, + "outputs": [], + "source": [ + "# Use Planetary Data Reader to read the data into a pandas DataFrame\n", + "data = pdr.read(abspath(local_label_path))\n", + "print(data.keys())\n", + "print(data['TABLE_0'].keys())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e01875d7-632b-49c4-b9f9-69e1bbb62e1d", + "metadata": {}, + "outputs": [], + "source": [ + "# Extract LIDVIDs of Data products from DataFrame\n", + "cmor_data_lidvids = data['TABLE_0']['LIDVID_LID']\n", + "print(list(cmor_data_lidvids))" + ] + }, + { + "cell_type": "markdown", + "id": "efa93fc3-bbc4-4e71-8a5c-0b778679a699", + "metadata": {}, + "source": [ + "Finally, we use one of these Data LIDVIDs (`urn:nasa:pds:gbo.meteoroid.cmor.radar-survey:data:complexes_tab::1.0`) to repeat the process of query and download for both the PDS4 Label and associated Data product." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "840d2440-64ea-4305-b723-5b7e44e68766", + "metadata": {}, + "outputs": [], + "source": [ + "cmor_data_lidvid = cmor_data_lidvids[0]\n", + "products = peppi.Products(client).get(cmor_data_lidvid)\n", + "\n", + "matching_products = [p for p in products]\n", + "\n", + "for matching_product in matching_products:\n", + " print(matching_product.properties['lidvid'], \n", + " matching_product.properties['ops:Label_File_Info.ops:file_ref'], \n", + " matching_product.properties['ops:Data_File_Info.ops:file_ref'])\n", + " print()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80ccd832-630d-4671-9eb2-4cac596efe7c", + "metadata": {}, + "outputs": [], + "source": [ + "cmor_complexes = matching_products[0]\n", + "\n", + "# Download the PDS4 label to current user's home directory\n", + "remote_label_path = cmor_complexes.properties['ops:Label_File_Info.ops:file_ref'][0]\n", + "local_label_path = join(os.environ['HOME'], remote_label_path.split('/')[-1])\n", + "\n", + "print(f\"Downloading {remote_label_path} to {abspath(local_label_path)}\")\n", + "result = urlretrieve(remote_label_path, local_label_path)\n", + "\n", + "# Download the complexes.tab product to current user's home directory\n", + "remote_data_path = cmor_complexes.properties['ops:Data_File_Info.ops:file_ref'][0]\n", + "local_data_path = join(os.environ['HOME'], remote_data_path.split('/')[-1])\n", + "\n", + "print(f\"Downloading {remote_data_path} to {abspath(local_data_path)}\")\n", + "result = urlretrieve(remote_data_path, local_data_path)" + ] + }, + { + "cell_type": "markdown", + "id": "18e7ae5a-c70c-4610-ab5a-07550fda308a", + "metadata": {}, + "source": [ + "The Planetary Data Reader library can again be used to pull the label and data information into a `DataFrame`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79f1fe47-3dd7-4a8d-96f6-64c3d09cc916", + "metadata": {}, + "outputs": [], + "source": [ + "# use Planetary Data Reader to read the data into a pandas DataFrame\n", + "data = pdr.read(abspath(local_label_path))\n", + "print(data.keys())\n", + "data['table']" + ] + }, + { + "cell_type": "markdown", + "id": "e00c9e35-08ef-4050-b835-583116ba07f8", + "metadata": {}, + "source": [ + "From here, we can leverage `pandas` to commit the Data to disk in `.csv` format." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "957ad980-3d60-4474-858e-f11c80dfeafe", + "metadata": {}, + "outputs": [], + "source": [ + "print(f'Outputting CSV {local_data_path}.csv')\n", + "data['table'].to_csv(f'{local_data_path}.csv')" + ] + }, + { + "cell_type": "markdown", + "id": "42f916b1-d03d-4bc1-a4b0-d7656198b903", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 8aa3e5e308ec3a1323285b288626c5b60c2b8367 Mon Sep 17 00:00:00 2001 From: Sean Kelly Date: Fri, 6 Dec 2024 11:18:56 -0600 Subject: [PATCH 2/2] Adjust requirements so it can pip install --- requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 37e725b..cabb2c7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,6 @@ leafmap==0.23.3 jupyterlab==3.6.5 matplotlib==3.3.4 pds4-tools==1.3 -pds.api-client==1.3.0 tqdm==4.59.0 spiceypy==5.1.2 requests==2.32.2 @@ -16,4 +15,4 @@ pds4-tools==1.3 pywwt==0.20.0 wwt-kernel-data-relay==0.2.0 pdr~=1.3.0 -pds.peppi +pds.peppi~=0.4.0