Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add asf s3 orbits #66

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 117 additions & 0 deletions eof/_asf_s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import logging
from functools import cache
from typing import Optional, Literal

import requests
import xml.etree.ElementTree as ET

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

ASF_BUCKET_NAME = "s1-orbits"


@cache
def list_public_bucket(bucket_name: str, prefix: str = "") -> list[str]:
"""List all objects in a public S3 bucket.

Parameters
----------
bucket_name : str
Name of the S3 bucket.
prefix : str, optional
Prefix to filter objects, by default "".

Returns
-------
list[str]
list of object keys in the bucket.

Raises
------
requests.RequestException
If there's an error in the HTTP request.
"""
endpoint = f"https://{bucket_name}.s3.amazonaws.com"
marker: Optional[str] = None
keys: list[str] = []

while True:
params = {"prefix": prefix}
if marker:
params["marker"] = marker

try:
response = requests.get(endpoint, params=params)
response.raise_for_status()
except requests.RequestException as e:
logger.error(f"Error fetching bucket contents: {e}")
raise

root = ET.fromstring(response.content)
for contents in root.findall(
"{http://s3.amazonaws.com/doc/2006-03-01/}Contents"
):
key = contents.find("{http://s3.amazonaws.com/doc/2006-03-01/}Key")
if key is not None:
keys.append(key.text or "")
logger.debug(f"Found key: {key}")

is_truncated = root.find("{http://s3.amazonaws.com/doc/2006-03-01/}IsTruncated")
if (
is_truncated is not None
and is_truncated.text
and is_truncated.text.lower() == "true"
):
next_marker = root.find(
"{http://s3.amazonaws.com/doc/2006-03-01/}NextMarker"
)
if next_marker is not None:
marker = next_marker.text
else:
found_keys = root.findall(
"{http://s3.amazonaws.com/doc/2006-03-01/}Contents/{http://s3.amazonaws.com/doc/2006-03-01/}Key"
)
if found_keys:
marker = found_keys[-1].text
else:
break
else:
break

return keys


def get_orbit_files(orbit_type: Literal["precise", "restituted"]) -> list[str]:
"""Get a list of precise or restituted orbit files.

Parameters
----------
orbit_type : Literal["precise", "restituted"]
Type of orbit files to retrieve.

Returns
-------
list[str]
list of orbit file keys.

Raises
------
ValueError
If an invalid orbit_type is provided.
"""
prefix = (
"AUX_POEORB"
if orbit_type == "precise"
else "AUX_RESORB"
if orbit_type == "restituted"
else None
)
if prefix is None:
raise ValueError("orbit_type must be either 'precise' or 'restituted'")

all_keys = list_public_bucket(ASF_BUCKET_NAME)
orbit_files = [key for key in all_keys if key.startswith(prefix)]

logger.info(f"Found {len(orbit_files)} {orbit_type} orbit files")
return orbit_files
650 changes: 650 additions & 0 deletions eof/tests/cassettes/test_asf_s3/test_get_orbit_files.yaml

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions eof/tests/test_asf_s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pytest
from eof._asf_s3 import get_orbit_files


@pytest.mark.vcr()
def test_get_orbit_files():
"""
Test the get_orbit_files function using pytest and vcr.
"""
precise_orbits = get_orbit_files("precise")
restituted_orbits = get_orbit_files("restituted")

assert len(precise_orbits) > 0, "No precise orbit files found"
assert len(restituted_orbits) > 0, "No restituted orbit files found"
assert all(
orbit.startswith("AUX_POEORB") for orbit in precise_orbits
), "Invalid precise orbit file name"
assert all(
orbit.startswith("AUX_RESORB") for orbit in restituted_orbits
), "Invalid restituted orbit file name"
Loading