Skip to content

Commit

Permalink
Merge pull request #24 from umr-lops/s3protocol
Browse files Browse the repository at this point in the history
S3protocol
  • Loading branch information
agrouaze authored Apr 18, 2024
2 parents 66d941a + 739859e commit 3070725
Show file tree
Hide file tree
Showing 7 changed files with 88 additions and 24 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,5 @@ __pycache__/
.coverage.*
.cache
/docs/_build/
localconfig.yml
.idea
13 changes: 13 additions & 0 deletions highleveltests/open_SLC_IW.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from safe_s1 import Sentinel1Reader, getconfig
import time
conf = getconfig.get_config()
subswath = conf['nfs_iw_grd_path']
print(subswath)
t0 = time.time()
sub_reader = Sentinel1Reader(subswath)
elapse_t = time.time()-t0

dt = sub_reader.datatree
print('out of the reader')
print(dt)
print('time to read the SAFE through S3: %1.2f sec'%elapse_t)
36 changes: 36 additions & 0 deletions highleveltests/open_SLC_IW_S3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# see https://stackoverflow.com/questions/69624867/no-such-file-error-when-trying-to-create-local-cache-of-s3-object
from safe_s1 import Sentinel1Reader,getconfig
import pdb
import os
import time
import logging
import fsspec
logging.basicConfig(level=logging.INFO)
logging.info('test start')
conf = getconfig.get_config()
access_key = conf['access_key']
secret_key = conf['secret_key']
entrypoint_url = conf['entrypoint_url']
s3 = fsspec.filesystem("s3", anon=False,
key=access_key,
secret=secret_key,
endpoint_url='https://'+entrypoint_url)

# this syntaxe works we can get content xml files but I would have to precise which subswath I want to decode in case of SLC
# safe2 = 's3:///eodata/Sentinel-1/SAR/SLC/2019/10/13/S1B_IW_SLC__1SDV_20191013T155948_20191013T160015_018459_022C6B_13A2.SAFE'
safe2 = 's3:///eodata/Sentinel-1/SAR/IW_GRDH_1S/2024/04/18/S1A_IW_GRDH_1SSH_20240418T080141_20240418T080210_053485_067D74_C073.SAFE'
# safe2 = conf['s3_iw_grd_path']
option = 'kwargs'
if option == 'kwargs':
storage_options = {"anon": False, "client_kwargs": {"endpoint_url": 'https://'+entrypoint_url, 'aws_access_key_id':access_key,
'aws_secret_access_key':secret_key}}
t0 = time.time()
sub_reader = Sentinel1Reader(safe2,backend_kwargs={"storage_options": storage_options})
elapse_t = time.time()-t0
print('time to read the SAFE through S3: %1.2f sec'%elapse_t)
else:
# this solution is not supported.
sub_reader = Sentinel1Reader(s3.get_mapper(safe2)) # botocore.errorfactory.NoSuchKey: An error occurred (NoSuchKey) when calling the GetObject operation: Unknown
dt = sub_reader.datatree
print('out of the reader')
print(dt)
23 changes: 23 additions & 0 deletions safe_s1/getconfig.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import yaml
import os
import logging
import safe_s1
from pathlib import Path
# determine the config file we will use (config.yml by default, and a local config if one is present) and retrieve
# the products names
def get_config():
local_config_pontential_path = os.path.join(os.path.dirname(safe_s1.__file__), 'localconfig.yml')
logging.info('potential local config: %s',local_config_pontential_path)
#local_config_pontential_path = Path(os.path.join('~', 'xarray-safe-s1', 'localconfig.yml')).expanduser()
if os.path.exists(local_config_pontential_path):
logging.info('localconfig used')
config_path = local_config_pontential_path
with open(config_path) as config_content:
conf = yaml.load(config_content, Loader=yaml.SafeLoader)
else:
logging.info('default config')
config_path = Path(os.path.join(os.path.dirname(safe_s1.__file__), 'config.yml'))
with open(config_path) as config_content:
conf = yaml.load(config_content, Loader=yaml.SafeLoader)
return conf

13 changes: 9 additions & 4 deletions safe_s1/metadata.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
import re

import pdb
import dask
import fsspec
import numpy as np
Expand All @@ -21,7 +21,7 @@ class Sentinel1Reader:

def __init__(self, name, backend_kwargs=None):
if not isinstance(name, (str, os.PathLike)):
raise ValueError(f"cannot deal with object of type {type(name)}: {name}")
raise ValueError(f"cannot deal with object of type {type(name)}: {name}")
# gdal dataset name
if not name.startswith('SENTINEL1_DS:'):
name = 'SENTINEL1_DS:%s:' % name
Expand All @@ -39,10 +39,13 @@ def __init__(self, name, backend_kwargs=None):
"""Dataset path"""
self.safe = os.path.basename(self.path)

self.path = os.fspath(self.path)

if backend_kwargs is None:
backend_kwargs = {}
self.path = os.fspath(self.path)

storage_options = backend_kwargs.get("storage_options", {})

mapper = fsspec.get_mapper(self.path, **storage_options)
self.xml_parser = XmlParser(
xpath_mappings=sentinel1_xml_mappings.xpath_mappings,
Expand Down Expand Up @@ -89,7 +92,6 @@ def __init__(self, name, backend_kwargs=None):
'geolocationGrid': None,
}
if not self.multidataset:

self._dict = {
'geolocationGrid': self.geoloc,
'orbit': self.orbit,
Expand All @@ -105,6 +107,9 @@ def __init__(self, name, backend_kwargs=None):
}
self.dt = datatree.DataTree.from_dict(self._dict)
assert self.dt==self.datatree
else:
print('multidataset')
raise Exception()

def load_digital_number(self, resolution=None, chunks=None, resampling=rasterio.enums.Resampling.rms):
"""
Expand Down
2 changes: 1 addition & 1 deletion safe_s1/sentinel1_xml_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ def df_files(annotation_files, measurement_files, noise_files, calibration_files
def xsd_files_func(xsd_product_file):
"""
return a xarray Dataset with path of the different xsd files
:param xsd_product:
:param xsd_product: str
:return:
"""
ds = xr.Dataset()
Expand Down
23 changes: 4 additions & 19 deletions test/test_s1reader.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,15 @@
import safe_s1
from safe_s1 import sentinel1_xml_mappings, Sentinel1Reader
import os
from safe_s1 import sentinel1_xml_mappings, Sentinel1Reader, getconfig
import logging
from pathlib import Path
import yaml


logging.basicConfig()
logging.captureWarnings(True)

logger = logging.getLogger('s1_reader_test')
logger.setLevel(logging.DEBUG)


# determine the config file we will use (config.yml by default, and a local config if one is present) and retrieve
# the products names
local_config_pontential_path = Path(os.path.join('~', 'xarray-safe-s1', 'localconfig.yml')).expanduser()
if local_config_pontential_path.exists():
config_path = local_config_pontential_path
with open(config_path) as config_content:
products = yaml.load(config_content, Loader=yaml.SafeLoader)['product_paths']
else:
config_path = Path(os.path.join(os.path.dirname(safe_s1.__file__), 'config.yml'))
with open(config_path) as config_content:
raw_products = yaml.load(config_content, Loader=yaml.SafeLoader)['product_paths']
products = [sentinel1_xml_mappings.get_test_file(filename) for filename in raw_products]

conf = getconfig.get_config()
products = [sentinel1_xml_mappings.get_test_file(filename) for filename in conf['product_paths']]

# Try to apply the reader on different products
def test_reader():
Expand Down

0 comments on commit 3070725

Please sign in to comment.