Skip to content

Commit

Permalink
Catch HTTP 429 errors from CDSE (#54)
Browse files Browse the repository at this point in the history
* add catch for 429 error from CDSE

also download in paralel

may work to help dbekaert/RAiDER#610

* raise for other http errors

* bump version
  • Loading branch information
scottstanie authored Nov 8, 2023
1 parent 070b4d5 commit c82f52d
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 12 deletions.
8 changes: 8 additions & 0 deletions eof/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@
is_flag=True,
help="save credentials provided interactively in the ~/.netrc file if necessary",
)
@click.option(
"--max-workers",
type=int,
default=3,
help="Number of parallel downloads to run. Note that CDSE has a limit of 4",
)
def cli(
search_path: str,
save_dir: str,
Expand All @@ -107,6 +113,7 @@ def cli(
cdse_password: str = "",
ask_password: bool = False,
update_netrc: bool = False,
max_workers: int = 3,
):
"""Download Sentinel precise orbit files.
Expand Down Expand Up @@ -136,4 +143,5 @@ def cli(
asf_password=asf_password,
cdse_user=cdse_user,
cdse_password=cdse_password,
max_workers=max_workers,
)
44 changes: 36 additions & 8 deletions eof/dataspace_client.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Client to get orbit files from dataspace.copernicus.eu ."""
from __future__ import annotations

from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timedelta
from pathlib import Path
from typing import Optional
Expand Down Expand Up @@ -164,13 +165,19 @@ def query_orbit_by_dt(
logger.warning("The following dates were not found: %s", remaining_dates)
return all_results

def download_all(self, query_results: list[dict], output_directory: Filename):
def download_all(
self,
query_results: list[dict],
output_directory: Filename,
max_workers: int = 3,
):
"""Download all the specified orbit products."""
return download_all(
query_results,
output_directory=output_directory,
username=self._username,
password=self._password,
max_workers=max_workers,
)


Expand Down Expand Up @@ -362,6 +369,7 @@ def download_orbit_file(
if chunk:
outfile.write(chunk)

logger.info(f"Orbit file downloaded to {output_orbit_file_path}")
return output_orbit_file_path


Expand All @@ -370,6 +378,7 @@ def download_all(
output_directory: Filename,
username: str = "",
password: str = "",
max_workers: int = 3,
) -> list[Path]:
"""Download all the specified orbit products.
Expand All @@ -383,6 +392,9 @@ def download_all(
CDSE username
password : str
CDSE password
max_workers : int, default = 3
Maximum parallel downloads from CDSE.
Note that >4 connections will result in a HTTP 429 Error
"""
downloaded_paths: list[Path] = []
Expand All @@ -391,22 +403,38 @@ def download_all(
# query_results, start_time, stop_time
# )
# Obtain an access token the download request from the provided credentials

access_token = get_access_token(username, password)
output_names = []
download_urls = []
for query_result in query_results:
orbit_file_name = query_result["Name"]
orbit_file_request_id = query_result["Id"]

# Construct the URL used to download the Orbit file
download_url = f"{DOWNLOAD_URL}({orbit_file_request_id})/$value"
download_urls.append(download_url)

logger.info(
orbit_file_name = query_result["Name"]
output_names.append(orbit_file_name)

logger.debug(
f"Downloading Orbit file {orbit_file_name} from service endpoint "
f"{download_url}"
)
output_orbit_file_path = download_orbit_file(
download_url, output_directory, orbit_file_name, access_token
)

logger.info(f"Orbit file downloaded to {output_orbit_file_path}")
downloaded_paths.append(output_orbit_file_path)
downloaded_paths = []
with ThreadPoolExecutor(max_workers=max_workers) as exc:
futures = [
exc.submit(
download_orbit_file,
request_url=u,
output_directory=output_directory,
orbit_file_name=n,
access_token=access_token,
)
for (u, n) in zip(download_urls, output_names)
]
for f in futures:
downloaded_paths.append(f.result())

return downloaded_paths
17 changes: 14 additions & 3 deletions eof/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from pathlib import Path

from dateutil.parser import parse
from requests.exceptions import HTTPError

from .asf_client import ASFClient
from .dataspace_client import DataspaceClient
Expand Down Expand Up @@ -102,9 +103,19 @@ def download_eofs(

if query:
logger.info("Attempting download from SciHub")
results = client.download_all(query, output_directory=save_dir)
filenames.extend(results)
dataspace_successful = True
try:
results = client.download_all(
query, output_directory=save_dir, max_workers=max_workers
)
filenames.extend(results)
dataspace_successful = True
except HTTPError as e:
assert e.response is not None
if e.response.status_code == 429:
logger.warning(f"Failed due to too many requests: {e.args}")
# Dataspace failed -> try asf
else:
raise

# For failures from scihub, try ASF
if not dataspace_successful:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="sentineleof",
version="0.9.3",
version="0.9.4",
author="Scott Staniewicz",
author_email="scott.stanie@gmail.com",
description="Download precise orbit files for Sentinel 1 products",
Expand Down

0 comments on commit c82f52d

Please sign in to comment.