Skip to content

Commit

Permalink
Merge pull request #298 from openclimatefix/fix-non-hrv-filename
Browse files Browse the repository at this point in the history
put utc back in, add option to strip hrv or not
  • Loading branch information
peterdudfield authored Sep 25, 2024
2 parents 94f8f25 + 28512b8 commit cb1d0ff
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 9 deletions.
7 changes: 6 additions & 1 deletion satip/filenames.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pandas as pd


def get_datetime_from_filename(filename: str) -> pd.Timestamp:
def get_datetime_from_filename(filename: str, strip_hrv: bool = False) -> pd.Timestamp:
"""Extract time from filename
For example:
Expand All @@ -14,12 +14,17 @@ def get_datetime_from_filename(filename: str) -> pd.Timestamp:

filename = filename.replace("iodc_", "")
filename = filename.replace("15_", "")

if strip_hrv:
filename = filename.replace("hrv_", "")

filename = filename.split(".zarr.zip")[0]
date = filename.split("/")[-1]

file_time = pd.to_datetime(
date,
format="%Y%m%d%H%M",
errors="ignore",
utc=True
)
return file_time
11 changes: 5 additions & 6 deletions satip/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -790,7 +790,8 @@ def filter_dataset_ids_on_current_files(datasets: list, save_dir: str) -> list:
finished_files = finished_files_not_latest + finished_files_latest
log.debug(f"Found {len(finished_files)} already downloaded")

datetimes = [pd.Timestamp(eumetsat_filename_to_datetime(idx)).round("5 min") for idx in ids]
datetimes = [pd.Timestamp(eumetsat_filename_to_datetime(idx), tz='UTC').round("5 min")
for idx in ids]
if not datetimes: # Empty list
log.debug("No datetimes to download")
return []
Expand Down Expand Up @@ -858,9 +859,6 @@ def move_older_files_to_different_location(save_dir: str, history_time: pd.Times

filesystem = fsspec.open(save_dir).fs

# remove timezone from history_time
history_time = history_time.tz_localize(None)

# Now to move into latest
finished_files = filesystem.glob(f"{save_dir}/*.zarr.zip")

Expand All @@ -873,7 +871,8 @@ def move_older_files_to_different_location(save_dir: str, history_time: pd.Times
if "latest.zarr" in date or "tmp" in date:
continue

file_time = get_datetime_from_filename(date)
file_time = get_datetime_from_filename(date, strip_hrv=True)


if file_time > history_time:
log.debug(f"Moving file into {LATEST_DIR_NAME} folder")
Expand All @@ -896,7 +895,7 @@ def move_older_files_to_different_location(save_dir: str, history_time: pd.Times
if "latest.zarr" in date or "latest_15.zarr" in date or "tmp" in date:
continue

file_time = get_datetime_from_filename(date)
file_time = get_datetime_from_filename(date, strip_hrv=True)

if file_time < history_time:
log.debug(f"Moving file out of {LATEST_DIR_NAME} folder")
Expand Down
10 changes: 8 additions & 2 deletions tests/unit_test/test_filenames.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,19 @@
def test_get_time_from_filename():

datetime = get_datetime_from_filename("folder/iodc_202408281115.zarr.zip")
assert datetime == pd.Timestamp("2024-08-28 11:15")
assert datetime == pd.Timestamp("2024-08-28 11:15", tz="UTC")

datetime = get_datetime_from_filename("folder/202006011205.zarr.zip")
assert datetime == pd.Timestamp("2020-06-01 12:05")
assert datetime == pd.Timestamp("2020-06-01 12:05", tz="UTC")

datetime = get_datetime_from_filename("folder/hrv_202408261815.zarr.zip")
assert datetime == "hrv_202408261815"

datetime = get_datetime_from_filename("folder/15_hrv_202408261815.zarr.zip")
assert datetime == "hrv_202408261815"

datetime = get_datetime_from_filename("folder/hrv_202408261815.zarr.zip", strip_hrv=True)
assert datetime == pd.Timestamp("2024-08-26 18:15", tz="UTC")

datetime = get_datetime_from_filename("folder/15_hrv_202408261815.zarr.zip", strip_hrv=True)
assert datetime == pd.Timestamp("2024-08-26 18:15", tz="UTC")

0 comments on commit cb1d0ff

Please sign in to comment.