Skip to content

Commit

Permalink
Merge pull request #351 from jrbourbeau/raise-more-in-store
Browse files Browse the repository at this point in the history
Raise exceptions more
  • Loading branch information
betolink authored Nov 22, 2023
2 parents a0a58aa + 95be04c commit f01b311
Showing 1 changed file with 31 additions and 45 deletions.
76 changes: 31 additions & 45 deletions earthaccess/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,10 +230,9 @@ def get_s3fs_session(
self.initial_ts = datetime.datetime.now()
return deepcopy(self.s3_fs)
else:
print(
raise ValueError(
"A valid Earthdata login instance is required to retrieve S3 credentials"
)
return None

@lru_cache
def get_fsspec_session(self) -> fsspec.AbstractFileSystem:
Expand Down Expand Up @@ -269,7 +268,7 @@ def open(
self,
granules: Union[List[str], List[DataGranule]],
provider: Optional[str] = None,
) -> Union[List[Any], None]:
) -> List[Any]:
"""Returns a list of fsspec file-like objects that can be used to access files
hosted on S3 or HTTPS by third party libraries like xarray.
Expand All @@ -280,15 +279,14 @@ def open(
"""
if len(granules):
return self._open(granules, provider)
print("The granules list is empty, moving on...")
return None
return []

@singledispatchmethod
def _open(
self,
granules: Union[List[str], List[DataGranule]],
provider: Optional[str] = None,
) -> Union[List[Any], None]:
) -> List[Any]:
"""Returns a list of fsspec file-like objects that can be used to access files
hosted on S3 or HTTPS by third party libraries like xarray.
Expand All @@ -305,17 +303,16 @@ def _open_granules(
granules: List[DataGranule],
provider: Optional[str] = None,
threads: Optional[int] = 8,
) -> Union[List[Any], None]:
) -> List[Any]:
fileset: List = []
data_links: List = []
total_size = round(sum([granule.size() for granule in granules]) / 1024, 2)
print(f" Opening {len(granules)} granules, approx size: {total_size} GB")
print(f"Opening {len(granules)} granules, approx size: {total_size} GB")

if self.auth is None:
print(
raise ValueError(
"A valid Earthdata login instance is required to retrieve credentials"
)
return None

if self.running_in_aws:
if granules[0].cloud_hosted:
Expand Down Expand Up @@ -347,13 +344,12 @@ def _open_granules(
fs=s3_fs,
threads=threads,
)
except Exception:
print(
"An exception occurred while trying to access remote files on S3: "
"This may be caused by trying to access the data outside the us-west-2 region"
except Exception as e:
raise RuntimeError(
"An exception occurred while trying to access remote files on S3. "
"This may be caused by trying to access the data outside the us-west-2 region."
f"Exception: {traceback.format_exc()}"
)
return None
) from e
else:
fileset = self._open_urls_https(data_links, granules, threads=threads)
return fileset
Expand All @@ -373,7 +369,7 @@ def _open_urls(
granules: List[str],
provider: Optional[str] = None,
threads: Optional[int] = 8,
) -> Union[List[Any], None]:
) -> List[Any]:
fileset: List = []
data_links: List = []

Expand All @@ -384,15 +380,13 @@ def _open_urls(
provider = provider
data_links = granules
else:
print(
raise ValueError(
f"Schema for {granules[0]} is not recognized, must be an HTTP or S3 URL"
)
return None
if self.auth is None:
print(
raise ValueError(
"A valid Earthdata login instance is required to retrieve S3 credentials"
)
return None

if self.running_in_aws and granules[0].startswith("s3"):
if provider is not None:
Expand All @@ -405,27 +399,24 @@ def _open_urls(
fs=s3_fs,
threads=threads,
)
except Exception:
print(
"An exception occurred while trying to access remote files on S3: "
"This may be caused by trying to access the data outside the us-west-2 region"
except Exception as e:
raise RuntimeError(
"An exception occurred while trying to access remote files on S3. "
"This may be caused by trying to access the data outside the us-west-2 region."
f"Exception: {traceback.format_exc()}"
)
return None
) from e
else:
print(f"Provider {provider} has no valid cloud credentials")
return fileset
else:
print(
raise ValueError(
"earthaccess cannot derive the DAAC provider from URLs only, a provider is needed e.g. POCLOUD"
)
return None
else:
if granules[0].startswith("s3"):
print(
raise ValueError(
"We cannot open S3 links when we are not in-region, try using HTTPS links"
)
return None
fileset = self._open_urls_https(data_links, granules, threads)
return fileset

Expand All @@ -435,7 +426,7 @@ def get(
local_path: Optional[str] = None,
provider: Optional[str] = None,
threads: int = 8,
) -> Union[None, List[str]]:
) -> List[str]:
"""Retrieves data granules from a remote storage system.
* If we run this in the cloud we are moving data from S3 to a cloud compute instance (EC2, AWS Lambda)
Expand Down Expand Up @@ -463,8 +454,7 @@ def get(
files = self._get(granules, local_path, provider, threads)
return files
else:
print("List of URLs or DataGranule isntances expected")
return None
raise ValueError("List of URLs or DataGranule isntances expected")

@singledispatchmethod
def _get(
Expand All @@ -473,7 +463,7 @@ def _get(
local_path: str,
provider: Optional[str] = None,
threads: int = 8,
) -> Union[None, List[str]]:
) -> List[str]:
"""Retrieves data granules from a remote storage system.
* If we run this in the cloud we are moving data from S3 to a cloud compute instance (EC2, AWS Lambda)
Expand All @@ -491,8 +481,7 @@ def _get(
Returns:
None
"""
print("List of URLs or DataGranule isntances expected")
return None
raise NotImplementedError(f"Cannot _get {granules}")

@_get.register
def _get_urls(
Expand All @@ -501,15 +490,14 @@ def _get_urls(
local_path: str,
provider: Optional[str] = None,
threads: int = 8,
) -> Union[None, List[str]]:
) -> List[str]:
data_links = granules
downloaded_files: List = []
if provider is None and self.running_in_aws and "cumulus" in data_links[0]:
print(
raise ValueError(
"earthaccess can't yet guess the provider for cloud collections, "
"we need to use one from earthaccess.list_cloud_providers()"
)
return None
if self.running_in_aws and data_links[0].startswith("s3"):
print(f"Accessing cloud dataset using provider: {provider}")
s3_fs = self.get_s3fs_session(provider=provider)
Expand All @@ -532,7 +520,7 @@ def _get_granules(
local_path: str,
provider: Optional[str] = None,
threads: int = 8,
) -> Union[None, List[str]]:
) -> List[str]:
data_links: List = []
downloaded_files: List = []
provider = granules[0]["meta"]["provider-id"]
Expand Down Expand Up @@ -615,13 +603,11 @@ def _download_onprem_granules(
:returns: None
"""
if urls is None:
print("The granules didn't provide a valid GET DATA link")
return None
raise ValueError("The granules didn't provide a valid GET DATA link")
if self.auth is None:
print(
raise ValueError(
"We need to be logged into NASA EDL in order to download data granules"
)
return []
if not os.path.exists(directory):
os.makedirs(directory)

Expand Down

0 comments on commit f01b311

Please sign in to comment.