Skip to content

Commit

Permalink
feat: add editable parameters for CMR queries (#33)
Browse files Browse the repository at this point in the history
  • Loading branch information
tsutterley authored May 2, 2024
1 parent 55f64e3 commit b653c29
Showing 1 changed file with 78 additions and 1 deletion.
79 changes: 78 additions & 1 deletion grounding_zones/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,67 @@ def pgc_list(
# return the list of column names and last modified times
return (colnames, lastmod, None)

# PURPOSE: filter the CMR json response for desired data files
def cmr_filter_json(
search_results: dict,
endpoint: str = "data",
request_type: str = r"application/x-hdf(eos|5)",
readable_granule_pattern: str = r''
):
"""
Filter the CMR json response for desired data files
Parameters
----------
search_results: dict
json response from CMR query
endpoint: str, default 'data'
url endpoint type
- ``'data'``: NASA Earthdata https archive
- ``'opendap'``: NASA Earthdata OPeNDAP archive
- ``'s3'``: NASA Earthdata Cumulus AWS S3 bucket
request_type: str, default 'application/x-hdfeos'
data type for reducing CMR query
readable_granule_pattern: str, default ''
regular expression pattern for reducing list
Returns
-------
producer_granule_ids: list
ICESat-2 granules
granule_urls: list
ICESat-2 granule urls from NSIDC
"""
# output list of granule ids and urls
producer_granule_ids = []
granule_urls = []
# check that there are urls for request
if ('feed' not in search_results) or ('entry' not in search_results['feed']):
return (producer_granule_ids, granule_urls)
# descriptor links for each endpoint
rel = {}
rel['data'] = "http://esipfed.org/ns/fedsearch/1.1/data#"
rel['opendap'] = "http://esipfed.org/ns/fedsearch/1.1/service#"
rel['s3'] = "http://esipfed.org/ns/fedsearch/1.1/s3#"
# iterate over references and get cmr location
for entry in search_results['feed']['entry']:
producer_granule_ids.append(entry['producer_granule_id'])
for link in entry['links']:
# skip links without descriptors
if ('rel' not in link.keys()):
continue
if ('type' not in link.keys()):
continue
# append if selected endpoint and request type
if (link['rel'] == rel[endpoint]) and \
re.match(request_type, link['type']) and \
re.search(readable_granule_pattern, link['href']):
granule_urls.append(link['href'])
break
# return the list of urls and granule ids
return (producer_granule_ids, granule_urls)

# PURPOSE: cmr queries for orbital parameters
def cmr(
product: str | None = None,
Expand All @@ -234,6 +295,8 @@ def cmr(
end_date: str | None = None,
provider: str = 'NSIDC_ECS',
endpoint: str = 'data',
readable_granule_name: str | list = [],
readable_granule_pattern: str = r'',
request_type: str = r"application/x-hdf(eos|5)",
opener = None,
verbose: bool = False,
Expand Down Expand Up @@ -263,6 +326,10 @@ def cmr(
- ``'data'``: NASA Earthdata https archive
- ``'opendap'``: NASA Earthdata OPeNDAP archive
- ``'s3'``: NASA Earthdata Cumulus AWS S3 bucket
readable_granule_name: str or list, default []
readable granule name(s) to query from CMR
readable_granule_pattern: str, default ''
regular expression pattern for reducing list
request_type: str, default 'application/x-hdfeos'
data type for reducing CMR query
opener: obj or NoneType, default None
Expand Down Expand Up @@ -320,6 +387,15 @@ def cmr(
if bbox is not None:
bounding_box = ','.join([str(b) for b in bbox])
CMR_KEYS.append(f'&bounding_box={bounding_box}')
# verify that readable_granule_name is a list
if isinstance(readable_granule_name, str):
readable_granule_name = [readable_granule_name]
# append keys for querying specific granules
if any(readable_granule_name):
CMR_KEYS.append("&options[readable_granule_name][pattern]=true")
CMR_KEYS.append("&options[spatial][or]=true")
for gran in readable_granule_name:
CMR_KEYS.append(f"&readable_granule_name[]={gran}")
# full CMR query url
cmr_query_url = "".join([posixpath.join(*CMR_HOST),*CMR_KEYS])
logging.info(f'CMR request={cmr_query_url}')
Expand All @@ -339,7 +415,8 @@ def cmr(
# read the CMR search as JSON
search_page = json.loads(response.read().decode('utf-8'))
ids,urls = cmr_filter_json(search_page,
endpoint=endpoint, request_type=request_type)
endpoint=endpoint, request_type=request_type,
readable_granule_pattern=readable_granule_pattern)
if not urls:
break
# extend lists
Expand Down

0 comments on commit b653c29

Please sign in to comment.