diff --git a/eof/cli.py b/eof/cli.py index 1835387..396b829 100644 --- a/eof/cli.py +++ b/eof/cli.py @@ -36,12 +36,13 @@ help="Optionally specify Sentinel satellite to download (default: gets both S1A and S1B)", ) @click.option( - "--use-scihub/--no-use-scihub", - default=True, - help="Use SciHub as primary provider to download orbits", - show_default=True, + "--orbit-type", + type=click.Choice(["precise", "restituted"]), + default="precise", + help="Optionally specify the type of orbit file to get " + "(default: precise (POEORB), but fallback to restituted (RESORB))", ) -def cli(search_path, save_dir, sentinel_file, date, mission, use_scihub): +def cli(search_path, save_dir, sentinel_file, date, mission, orbit_type): """Download Sentinel precise orbit files. Saves files to `save-dir` (default = current directory) @@ -57,5 +58,5 @@ def cli(search_path, save_dir, sentinel_file, date, mission, use_scihub): sentinel_file=sentinel_file, mission=mission, date=date, - use_scihub=use_scihub, + orbit_type=orbit_type, ) diff --git a/eof/download.py b/eof/download.py index b7a124e..e95acbd 100644 --- a/eof/download.py +++ b/eof/download.py @@ -27,26 +27,16 @@ import itertools import requests from multiprocessing.pool import ThreadPool -from datetime import timedelta from dateutil.parser import parse -from .parsing import EOFLinkFinder +from .scihubclient import ASFClient, ScihubGnssClient from .products import Sentinel, SentinelOrbit from .log import logger -MAX_WORKERS_STEP = 6 # step.esa.int servers have stricter requirements - -# mirror server maintained by STEP team -# This page has links with relative urls in the tags, such as: -# S1A_OPER_AUX_POEORB_OPOD_20210318T121438_V20210225T225942_20210227T005942.EOF.zip -STEP_URL = "http://step.esa.int/auxdata/orbits/Sentinel-1/{orbit_type}/{mission}/{dt}/" -DT_FMT = "%Y/%m" - -PRECISE_ORBIT = "POEORB" -RESTITUTED_ORBIT = "RESORB" +MAX_WORKERS = 6 # workers to download in parallel (for ASF backup) def download_eofs(orbit_dts=None, missions=None, sentinel_file=None, save_dir=".", - use_scihub: bool = True): + orbit_type="precise"): """Downloads and saves EOF files for specific dates Args: @@ -55,8 +45,7 @@ def download_eofs(orbit_dts=None, missions=None, sentinel_file=None, save_dir=". No input downloads both, must be same len as orbit_dts sentinel_file (str): path to Sentinel-1 filename to download one .EOF for save_dir (str): directory to save the EOF files into - use_scihub (bool): use SciHub to download orbits - (if False, SciHUb is used only as a fallback) + orbit_type (str): precise or restituted Returns: list[str]: all filenames of saved orbit files @@ -80,206 +69,73 @@ def download_eofs(orbit_dts=None, missions=None, sentinel_file=None, save_dir=". orbit_dts = [parse(dt) if isinstance(dt, str) else dt for dt in orbit_dts] filenames = [] - remaining_dates = [] + scihub_successful = False + client = ScihubGnssClient() - if use_scihub: + # First, check that Scihub isn't having issues + if client.server_is_up(): # try to search on scihub - from .scihubclient import ScihubGnssClient - client = ScihubGnssClient() - query = {} if sentinel_file: - query.update(client.query_orbit_for_product(sentinel_file)) + query = client.query_orbit_for_product(sentinel_file, orbit_type=orbit_type) else: - for mission, dt in zip(missions, orbit_dts): - found_result = False - products = client.query_orbit(dt - ScihubGnssClient.T0, - dt + ScihubGnssClient.T1, - mission, - product_type='AUX_POEORB') - result = (client._select_orbit(products, dt, dt + timedelta(minutes=1)) - if products else None) - if result: - found_result = True - query.update(result) - else: - # try with RESORB - products = client.query_orbit(dt - timedelta(hours=2), - dt + timedelta(hours=2), - mission, - product_type='AUX_RESORB') - result = (client._select_orbit(products, dt, dt + timedelta(minutes=1)) - if products else None) - if result: - found_result = True - query.update(result) - - if not found_result: - remaining_dates.append((mission, dt)) + query = client.query_orbit_by_dt(orbit_dts, missions, orbit_type=orbit_type) if query: result = client.download_all(query, directory_path=save_dir) filenames.extend( item['path'] for item in result.downloaded.values() ) - else: - # If forcing avoidance of scihub, all downloads remain - remaining_dates = zip(missions, orbit_dts) + scihub_successful = True - # For failures from scihub, try step.esa.int - if remaining_dates: + # For failures from scihub, try ASF + if not scihub_successful: + logger.warning("Scihub failed, trying ASF") + asfclient = ASFClient() + urls = asfclient.get_download_urls(orbit_dts, missions, orbit_type=orbit_type) # Download and save all links in parallel - pool = ThreadPool(processes=MAX_WORKERS_STEP) - result_dt_dict = { - pool.apply_async(_download_and_write, (mission, dt, save_dir)): dt - for mission, dt in remaining_dates + pool = ThreadPool(processes=MAX_WORKERS) + result_url_dict = { + pool.apply_async(_download_and_write, (url,)): url + for url in urls } - for result, dt in result_dt_dict.items(): + for result, url in result_url_dict.items(): cur_filenames = result.get() if cur_filenames is None: - logger.error("Failed to download orbit for %s", dt.date()) + logger.error("Failed to download orbit for %s", url) else: - logger.info("Finished %s, saved to %s", dt.date(), cur_filenames) - filenames.extend(cur_filenames) + logger.info("Finished %s, saved to %s", url, cur_filenames) + filenames.append(cur_filenames) return filenames -def eof_list(start_dt, mission, orbit_type=PRECISE_ORBIT): - """Download the list of .EOF files for a specific date - - Args: - start_dt (str or datetime): Year month day of validity start for orbit file - - Returns: - list: urls of EOF files - - Raises: - ValueError: if start_dt returns no results - - Usage: - >>> from datetime import datetime - >>> eof_list(datetime(2021, 3, 4), "S1A") - (['http://step.esa.int/auxdata/orbits/Sentinel-1/POEORB/S1A/2021/03/\ -S1A_OPER_AUX_POEORB_OPOD_20210325T121917_V20210304T225942_20210306T005942.EOF.zip'], 'POEORB') - """ - # The step.esa.int/auxdata page stotes all files for one month, but they start, e.g.: - # ...V20190501T225942_... - # with validity time at 22:59 on the 1st of the month - # If the desired data is on day 1 of a month, but starts before 22:59, - # need to search the previous month's page - if start_dt.day == 1 and start_dt.hour < 23: - search_dt = start_dt - timedelta(days=1) - else: - search_dt = start_dt - - url = STEP_URL.format( - orbit_type=orbit_type, mission=mission, dt=search_dt.strftime(DT_FMT) - ) - - logger.info("Searching for EOFs at {}".format(url)) - response = requests.get(url) - if response.status_code == 404: - if orbit_type == PRECISE_ORBIT: - logger.warning( - "Precise orbits not avilable yet for {}, trying RESORB".format( - search_dt - ) - ) - return eof_list(start_dt, mission, orbit_type=RESTITUTED_ORBIT) - else: - raise ValueError("Orbits not avilable yet for {}".format(search_dt)) - # Check for any other problem - response.raise_for_status() - - parser = EOFLinkFinder() - parser.feed(response.text) - # Append the test url, since the links on the page are relative (don't contain full url) - # Now the URL separates S1A and S1B, so no need for this - # links = [url + link for link in parser.eof_links if link.startswith(mission)] - links = [url + link for link in parser.eof_links] - - if len(links) < 1: - if orbit_type == PRECISE_ORBIT: - logger.warning( - "No precise orbit files found for {} on {}, searching RESORB".format( - mission, start_dt.strftime(DT_FMT) - ) - ) - return eof_list(start_dt, mission, orbit_type=RESTITUTED_ORBIT) - - raise ValueError( - "No EOF files found for {} on {} at {}".format( - start_dt.strftime(DT_FMT), mission, url - ) - ) - return links, orbit_type - - -def _dedupe_links(links): - out = [links[0]] - orb1 = SentinelOrbit(links[0].split("/")[-1]) - for link in links[1:]: - if SentinelOrbit(link.split("/")[-1]).date != orb1.date: - out.append(link) - return out - - -def _pick_precise_file(links, sent_date): - """Choose the precise file with (sent_date - 1, sent_date + 1)""" - out = [] - for link in links: - so = SentinelOrbit(link.split("/")[-1]) - # hotfix until I figure out what the RAW processor is doing with the orbtimings - if (so.start_time.date() == (sent_date - timedelta(days=1)).date()) and ( - so.stop_time.date() == (sent_date + timedelta(days=1)).date() - ): - out.append(link) - return out - - -def _download_and_write(mission, dt, save_dir="."): +def _download_and_write(url, save_dir="."): """Wrapper function to run the link downloading in parallel Args: - mission (str): Sentinel mission: either S1A or S1B - dt (datetime): datetime of Sentinel product + url (str): url of orbit file to download save_dir (str): directory to save the EOF files into Returns: list[str]: Filenames to which the orbit files have been saved """ - try: - cur_links, orbit_type = eof_list(dt, mission) - except ValueError as e: # 0 found for date - logger.warning(e.args[0]) - logger.warning("Skipping {}".format(dt.strftime("%Y-%m-%d"))) - return - - cur_links = _dedupe_links(cur_links) - if orbit_type == PRECISE_ORBIT: - cur_links = _pick_precise_file(cur_links, dt) - - # RESORB has multiple overlapping - saved_files = [] - for link in cur_links: - fname = os.path.join(save_dir, link.split("/")[-1]) - if os.path.isfile(fname): - logger.info("%s already exists, skipping download.", link) - return [fname] - - logger.info("Downloading %s", link) - response = requests.get(link) - response.raise_for_status() - logger.info("Saving to %s", fname) - with open(fname, "wb") as f: - f.write(response.content) - if fname.endswith(".zip"): - _extract_zip(fname, save_dir=save_dir) - # Pass the unzipped file ending in ".EOF", not the ".zip" - fname = fname.replace(".zip", "") - saved_files.append(fname) - return saved_files + fname = os.path.join(save_dir, url.split("/")[-1]) + if os.path.isfile(fname): + logger.info("%s already exists, skipping download.", url) + return [fname] + + logger.info("Downloading %s", url) + response = requests.get(url) + response.raise_for_status() + logger.info("Saving to %s", fname) + with open(fname, "wb") as f: + f.write(response.content) + if fname.endswith(".zip"): + _extract_zip(fname, save_dir=save_dir) + # Pass the unzipped file ending in ".EOF", not the ".zip" + fname = fname.replace(".zip", "") + return fname def _extract_zip(fname_zipped, save_dir=None, delete=True): @@ -353,8 +209,7 @@ def find_scenes_to_download(search_path="./", save_dir="./"): return orbit_dts, missions -def main(search_path=".", save_dir=",", sentinel_file=None, mission=None, date=None, - use_scihub: bool = True): +def main(search_path=".", save_dir=",", sentinel_file=None, mission=None, date=None, orbit_type="precise"): """Function used for entry point to download eofs""" if not os.path.exists(save_dir): @@ -386,5 +241,5 @@ def main(search_path=".", save_dir=",", sentinel_file=None, mission=None, date=N missions=missions, sentinel_file=sentinel_file, save_dir=save_dir, - use_scihub=use_scihub, + orbit_type=orbit_type, ) diff --git a/eof/scihubclient.py b/eof/scihubclient.py index 777f859..a0e92b7 100644 --- a/eof/scihubclient.py +++ b/eof/scihubclient.py @@ -1,39 +1,38 @@ """sentinelsat based client to get orbit files form scihub.copernicu.eu.""" +import os import logging +import requests import datetime import operator -import collections from typing import Sequence from .products import SentinelOrbit, Sentinel as S1Product from sentinelsat import SentinelAPI +from sentinelsat.exceptions import ServerError - -_log = logging.getLogger(__name__) +# logger = logging.getLogger(__name__) +from .log import logger class ValidityError(ValueError): pass -def get_validity_info(products: Sequence[str]) -> Sequence[SentinelOrbit]: - return [SentinelOrbit(product_id) for product_id in products] - - -def lastval_cover(t0: datetime.datetime, t1: datetime.datetime, - data: Sequence[SentinelOrbit]) -> str: +def lastval_cover( + t0: datetime.datetime, t1: datetime.datetime, data: Sequence[SentinelOrbit] +) -> str: candidates = [ - item for item in data - if item.start_time <= t0 and item.stop_time >= t1 + item for item in data if item.start_time <= t0 and item.stop_time >= t1 ] if not candidates: raise ValidityError( - f'none of the input products completely covers the requested ' - f'time interval: [t0={t0}, t1={t1}]') + f"none of the input products completely covers the requested " + f"time interval: [t0={t0}, t1={t1}]" + ) - candidates.sort(key=operator.attrgetter('created_time'), reverse=True) + candidates.sort(key=operator.attrgetter("created_time"), reverse=True) return candidates[0].filename @@ -46,50 +45,128 @@ class ScihubGnssClient: T0 = datetime.timedelta(days=1) T1 = datetime.timedelta(days=1) - def __init__(self, user: str = "gnssguest", password: str = "gnssguest", - api_url: str = "https://scihub.copernicus.eu/gnss/", - **kwargs): - self._api = SentinelAPI(user=user, password=password, api_url=api_url, - **kwargs) + def __init__( + self, + user: str = "gnssguest", + password: str = "gnssguest", + api_url: str = "https://scihub.copernicus.eu/gnss/", + **kwargs, + ): + self._api = SentinelAPI(user=user, password=password, api_url=api_url, **kwargs) - def query_orbit(self, t0, t1, satellite_id: str, - product_type: str = 'AUX_POEORB'): - assert satellite_id in {'S1A', 'S1B'} - assert product_type in {'AUX_POEORB', 'AUX_RESORB'} + def query_orbit(self, t0, t1, satellite_id: str, product_type: str = "AUX_POEORB"): + assert satellite_id in {"S1A", "S1B"} + assert product_type in {"AUX_POEORB", "AUX_RESORB"} query_params = dict( producttype=product_type, platformserialidentifier=satellite_id[1:], - date=[t0, t1], + # this has weird endpoint inclusion + # https://github.com/sentinelsat/sentinelsat/issues/551#issuecomment-992344180 + # date=[t0, t1], + # use the following instead + beginposition=(None, t1), + endposition=(t0, None), ) - _log.debug('query parameter: %s', query_params) + logger.debug("query parameter: %s", query_params) products = self._api.query(**query_params) return products @staticmethod def _select_orbit(products, t0, t1): - orbit_products = [p['identifier'] for p in products.values()] - validity_info = get_validity_info(orbit_products) + if not products: + return {} + orbit_products = [p["identifier"] for p in products.values()] + validity_info = [SentinelOrbit(product_id) for product_id in orbit_products] product_id = lastval_cover(t0, t1, validity_info) - return collections.OrderedDict( - (k, v) for k, v in products.items() - if v['identifier'] == product_id - ) + return {k: v for k, v in products.items() if v["identifier"] == product_id} - def query_orbit_for_product(self, product, - product_type: str = 'AUX_POEORB', - t0_margin: datetime.timedelta = T0, - t1_margin: datetime.timedelta = T1): + def query_orbit_for_product( + self, + product, + orbit_type: str = "precise", + t0_margin: datetime.timedelta = T0, + t1_margin: datetime.timedelta = T1, + ): if isinstance(product, str): product = S1Product(product) - t0 = product.start_time - t1 = product.stop_time + return self.query_orbit_by_dt( + [product.start_time], + [product.mission], + orbit_type=orbit_type, + t0_margin=t0_margin, + t1_margin=t1_margin, + ) + + def query_orbit_by_dt( + self, + orbit_dts, + missions, + orbit_type: str = "precise", + t0_margin: datetime.timedelta = T0, + t1_margin: datetime.timedelta = T1, + ): + """Query the Scihub api for product info for the specified missions/orbit_dts. - products = self.query_orbit(t0 - t0_margin, t1 + t1_margin, - satellite_id=product.mission, - product_type=product_type) - return self._select_orbit(products, t0, t1) + Args: + orbit_dts (list[datetime.datetime]): list of orbit datetimes + missions (list[str]): list of mission names + orbit_type (str, optional): Type of orbit to prefer in search. Defaults to "precise". + t0_margin (datetime.timedelta, optional): Margin used in searching for early bound + for orbit. Defaults to 1 day. + t1_margin (datetime.timedelta, optional): Margin used in searching for late bound + for orbit. Defaults to 1 day. + + Returns: + query (dict): API info from scihub with the requested products + """ + remaining_dates = [] + query = {} + for dt, mission in zip(orbit_dts, missions): + found_result = False + # Only check for previse orbits if that is what we want + if orbit_type == "precise": + products = self.query_orbit( + dt - t0_margin, + dt + t1_margin, + mission, + product_type="AUX_POEORB", + ) + result = ( + self._select_orbit(products, dt, dt + datetime.timedelta(minutes=1)) + if products + else None + ) + else: + result = None + + if result: + found_result = True + query.update(result) + else: + # try with RESORB + products = self.query_orbit( + dt - datetime.timedelta(hours=1), + dt + datetime.timedelta(hours=1), + mission, + product_type="AUX_RESORB", + ) + result = ( + self._select_orbit(products, dt, dt + datetime.timedelta(minutes=1)) + if products + else None + ) + if result: + found_result = True + query.update(result) + + if not found_result: + remaining_dates.append((mission, dt)) + + if remaining_dates: + logger.warning("The following dates were not found: %s", remaining_dates) + return query def download(self, uuid, **kwargs): """Download a single orbit product. @@ -106,3 +183,129 @@ def download_all(self, products, **kwargs): of arguments. """ return self._api.download_all(products, **kwargs) + + def server_is_up(self): + """Ping the ESA server using sentinelsat to verify the connection.""" + try: + self._api.query(producttype="AUX_POEORB", platformserialidentifier="S1A") + return True + except ServerError as e: + logger.warning("Cannot connect to the server: %s", e) + return False + + +class ASFClient: + precise_url = "https://s1qc.asf.alaska.edu/aux_poeorb/" + res_url = "https://s1qc.asf.alaska.edu/aux_resorb/" + urls = {"precise": precise_url, "restituted": res_url} + eof_lists = {"precise": None, "restituted": None} + + def get_full_eof_list(self, orbit_type="precise", max_dt=None): + """Get the list of orbit files from the ASF server.""" + from .parsing import EOFLinkFinder + + if orbit_type not in self.urls.keys(): + raise ValueError(f"Unknown orbit type: {orbit_type}") + + if self.eof_lists.get(orbit_type) is not None: + return self.eof_lists[orbit_type] + # Try to see if we have the list of EOFs in the cache + elif os.path.exists(self._get_filename_cache_path(orbit_type)): + eof_list = self._get_cached_filenames(orbit_type) + # Need to clear it if it's older than what we're looking for + max_saved = max([e.start_time for e in eof_list]) + if max_saved < max_dt: + logger.warning(f"Clearing cached {orbit_type} EOF list:") + logger.warning(f"{max_saved} is older than requested {max_dt}") + self._clear_cache(orbit_type) + else: + logger.info("Using cached EOF list") + self.eof_lists[orbit_type] = eof_list + return eof_list + + logger.info("Downloading all filenames from ASF (may take awhile)") + resp = requests.get(self.urls.get(orbit_type)) + finder = EOFLinkFinder() + finder.feed(resp.text) + eof_list = [SentinelOrbit(f) for f in finder.eof_links] + self.eof_lists[orbit_type] = eof_list + self._write_cached_filenames(orbit_type, eof_list) + return eof_list + + def get_download_urls(self, orbit_dts, missions, orbit_type="precise"): + """Find the URL for an orbit file covering the specified datetime + + Args: + dt (datetime): requested + Args: + orbit_dts (list[str] or list[datetime.datetime]): datetime for orbit coverage + missions (list[str]): specify S1A or S1B + + Returns: + str: URL for the orbit file + """ + eof_list = self.get_full_eof_list(orbit_type=orbit_type, max_dt=max(orbit_dts)) + # Split up for quicker parsing of the latest one + mission_to_eof_list = { + "S1A": [eof for eof in eof_list if eof.mission == "S1A"], + "S1B": [eof for eof in eof_list if eof.mission == "S1B"], + } + remaining_orbits = [] + urls = [] + for dt, mission in zip(orbit_dts, missions): + try: + filename = lastval_cover(dt, dt, mission_to_eof_list[mission]) + urls.append(self.urls[orbit_type] + filename) + except ValidityError: + remaining_orbits.append((dt, mission)) + + if remaining_orbits: + logger.warning("The following dates were not found: %s", remaining_orbits) + if orbit_type == "precise": + logger.warning( + "Attempting to download the restituted orbits for these dates." + ) + remaining_dts, remaining_missions = zip(*remaining_orbits) + urls.extend( + self.get_download_urls( + remaining_dts, remaining_missions, orbit_type="restituted" + ) + ) + + return urls + + def _get_cached_filenames(self, orbit_type="precise"): + """Get the cache path for the ASF orbit files.""" + filepath = self._get_filename_cache_path(orbit_type) + if os.path.exists(filepath): + with open(filepath, "r") as f: + return [SentinelOrbit(f) for f in f.read().splitlines()] + return None + + def _write_cached_filenames(self, orbit_type="precise", eof_list=[]): + """Cache the ASF orbit files.""" + filepath = self._get_filename_cache_path(orbit_type) + with open(filepath, "w") as f: + for e in eof_list: + f.write(e.filename + "\n") + + def _clear_cache(self, orbit_type="precise"): + """Clear the cache for the ASF orbit files.""" + filepath = self._get_filename_cache_path(orbit_type) + os.remove(filepath) + + @staticmethod + def _get_filename_cache_path(orbit_type="precise"): + fname = f"{orbit_type.lower()}_filenames.txt" + return os.path.join(ASFClient.get_cache_dir(), fname) + + @staticmethod + def get_cache_dir(): + """Find location of directory to store .hgt downloads + Assuming linux, uses ~/.cache/sardem/ + """ + path = os.getenv("XDG_CACHE_HOME", os.path.expanduser("~/.cache")) + path = os.path.join(path, "sentineleof") # Make subfolder for our downloads + if not os.path.exists(path): + os.makedirs(path) + return path diff --git a/setup.py b/setup.py index 22347fd..0aae8f9 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="sentineleof", - version="0.5.7", + version="0.6.0", author="Scott Staniewicz", author_email="scott.stanie@utexas.com", description="Download precise orbit files for Sentinel 1 products",