From a160e983151bb62dfe46f3902dee207905303390 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Wed, 24 Jul 2024 15:12:52 -0600 Subject: [PATCH 1/3] Passthrough earthdata credential envvars to container --- compose.yml | 4 ++++ doc/operation.md | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/compose.yml b/compose.yml index 9f82149..4eb6d41 100644 --- a/compose.yml +++ b/compose.yml @@ -8,3 +8,7 @@ services: - "./database/:/app/database/" - "./plots/:/app/plots/" - "./baseline_datasets/:/app/baseline_datasets/:ro" + environment: + # NOTE: null value indicates passthrough from host. + EARTHDATA_USERNAME: null + EARTHDATA_PASSWORD: null diff --git a/doc/operation.md b/doc/operation.md index 6adc551..bf18329 100644 --- a/doc/operation.md +++ b/doc/operation.md @@ -43,6 +43,14 @@ These steps will use Docker, as we expect that to be the main operation mode. Se "Detailed steps" section below for examples of running the code without Docker. +### Setup + +``` +export EARTHDATA_USERNAME="myusername" +export EARTHDATA_PASSWORD="mypassword" +``` + + ### Initialize the database and generate all plots ```bash From 1a6542a2536cc5dd4604b414c6ded4f194aec583 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Wed, 24 Jul 2024 17:35:15 -0600 Subject: [PATCH 2/3] Read an external storage location from environment variable The `database` and `plots` directory are the only directories supported for now. --- Dockerfile | 4 +- antarctica_today/__init__.py | 5 ++ antarctica_today/config.py | 41 ++++++++++++ antarctica_today/constants/paths.py | 16 ----- .../generate_antarctica_today_map.py | 17 +++-- .../generate_plots_for_given_day.py | 3 +- .../plot_daily_melt_and_climatology.py | 14 +++-- antarctica_today/tb_file_data.py | 6 +- compose.yml | 4 +- conda-lock.yml | 62 +++++++++++++------ doc/operation.md | 5 ++ environment.yml | 2 + 12 files changed, 124 insertions(+), 55 deletions(-) create mode 100644 antarctica_today/config.py diff --git a/Dockerfile b/Dockerfile index c68768d..032253c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,15 @@ FROM mambaorg/micromamba:1.5.8-alpine3.20 +VOLUME /data WORKDIR /app # Activate the conda environment during build process ARG MAMBA_DOCKERFILE_ACTIVATE=1 -COPY ./conda-lock.yml . +# Install dependencies # NOTE: `-p` is important to install to the "base" env +COPY ./conda-lock.yml . RUN micromamba install -y \ -p /opt/conda \ -f conda-lock.yml \ diff --git a/antarctica_today/__init__.py b/antarctica_today/__init__.py index 624a501..59c4fb9 100644 --- a/antarctica_today/__init__.py +++ b/antarctica_today/__init__.py @@ -1,6 +1,11 @@ import os import sys +from antarctica_today.config import Settings + +config = Settings() + + # IMPORTANT: If we don't specify this setting, then the projection we want to use will # be replaced with another (and this warning will be printed)! # diff --git a/antarctica_today/config.py b/antarctica_today/config.py new file mode 100644 index 0000000..d5ddd9e --- /dev/null +++ b/antarctica_today/config.py @@ -0,0 +1,41 @@ +from functools import cached_property + +from pydantic import DirectoryPath, computed_field +from pydantic_settings import BaseSettings, SettingsConfigDict + +from antarctica_today.constants.paths import REPO_DIR + + +class Settings(BaseSettings): + """Configuration required to download brightness temperature data.""" + + model_config = SettingsConfigDict( + env_prefix="ANTARCTICA_TODAY_", + env_file=".env", + extra="ignore", + ) + + # New data directories: + # - Keep data included in the repo separate from runtime data. We don't want to require + # a specific directory structure that may not work on every computer. For example, on + # NSIDC VMs, we have limited direct storage, and need to use mounts to access larger + # storage devices. + # - Use environment variables to enable override; in this case I think we only need one + # for the root storage directory. Default to an in-repo storage location so if the + # envvars are not populated, system pollution doesn't occur. + # - Migrate more things iteratively :) + STORAGE_BASEDIR: DirectoryPath = REPO_DIR + + @computed_field # type:ignore[misc] + @cached_property + def db_dir(self) -> DirectoryPath: + directory = self.STORAGE_BASEDIR / "database" + directory.mkdir(parents=True, exist_ok=True) + return directory + + @computed_field # type:ignore[misc] + @cached_property + def plots_dir(self) -> DirectoryPath: + directory = self.STORAGE_BASEDIR / "plots" + directory.mkdir(parents=True, exist_ok=True) + return directory diff --git a/antarctica_today/constants/paths.py b/antarctica_today/constants/paths.py index a631ee8..46a013f 100644 --- a/antarctica_today/constants/paths.py +++ b/antarctica_today/constants/paths.py @@ -10,27 +10,11 @@ # The root of the Git repository (directory containing `.git`) REPO_DIR: Final = PACKAGE_DIR.parent -# New data directories: -# - Keep data included in the repo separate from runtime data. We don't want to require -# a specific directory structure that may not work on every computer. For example, on -# NSIDC VMs, we have limited direct storage, and need to use mounts to access larger -# storage devices. -# - Use environment variables to enable override; in this case I think we only need one -# for the root storage directory. Default to an in-repo storage location so if the -# envvars are not populated, system pollution doesn't occur. -# - Migrate more things iteratively :) -_default_storage_dir = REPO_DIR -STORAGE_DIR: Final = Path( - os.environ.get("ANTARCTICA_TODAY_STORAGE_DIR", _default_storage_dir) -) -DATA_DATABASE_DIR: Final = STORAGE_DIR / "database" -# DATA_OUTPUT_DIR: Final = STORAGE_DIR / "output" # Legacy data directories DATA_DIR: Final = REPO_DIR / "data" DATA_QGIS_DIR: Final = REPO_DIR / "qgis" DATA_TB_DIR: Final = REPO_DIR / "Tb" -DATA_PLOTS_DIR: Final = REPO_DIR / "plots" DATA_BASELINE_DATASETS_DIR: Final = REPO_DIR / "baseline_datasets" diff --git a/antarctica_today/generate_antarctica_today_map.py b/antarctica_today/generate_antarctica_today_map.py index ff98d70..6c14af9 100644 --- a/antarctica_today/generate_antarctica_today_map.py +++ b/antarctica_today/generate_antarctica_today_map.py @@ -23,12 +23,11 @@ from loguru import logger from osgeo import gdal -from antarctica_today import read_NSIDC_bin_file, write_NSIDC_bin_to_gtif +from antarctica_today import config, read_NSIDC_bin_file, write_NSIDC_bin_to_gtif from antarctica_today.compute_mean_climatology import ( create_partial_year_melt_anomaly_tif, read_annual_melt_anomaly_tif, ) -from antarctica_today.constants.paths import DATA_DIR, DATA_PLOTS_DIR # import svgclip from antarctica_today.map_filedata import ( @@ -67,7 +66,7 @@ def main(): # for fmt in ("png", "pdf", "svg"): fig, ax = m.generate_annual_melt_map( outfile_template=( - DATA_PLOTS_DIR + config.plots_dir / "annual_maps_sum" / f"R{region}_{year}-{year+1}_sum.{fmt}" ), @@ -83,7 +82,7 @@ def main(): fig, ax = m.generate_anomaly_melt_map( outfile_template=( - DATA_PLOTS_DIR + config.plots_dir / "annual_maps_anomaly" / f"R{region}_{year}-{year+1}_anomaly.{fmt}" ), @@ -97,7 +96,7 @@ def main(): plt.close(fig) - # fig, ax = m.generate_anomaly_melt_map(DATA_PLOTS_DIR / "annual_maps_anomaly/R0_2021-2022.04.30_text.png", + # fig, ax = m.generate_anomaly_melt_map(config.plots_dir / "annual_maps_anomaly/R0_2021-2022.04.30_text.png", # year=2021+1, # dpi=300, # include_scalebar=True, @@ -106,12 +105,12 @@ def main(): # reset_picklefile=False) # fig, ax = m.generate_daily_melt_map(DATA_DIR / "v2.5/antarctica_melt_S3B_2010-2020_20200129/antarctica_melt_20100101_S3B_20210129.bin", - # outfile = DATA_PLOTS_DIR / "v2.5/daily_maps/20100101_daily.jpg", dpi=150) + # outfile = config.plots_dir / "v2.5/daily_maps/20100101_daily.jpg", dpi=150) # print (m._get_current_axes_position(ax)) # for fmt in ("png", "svg"): - # m.generate_annual_melt_map(outfile_template=DATA_PLOTS_DIR / "v2.5/annual_maps/R{1}_{0}-{3}." + fmt, + # m.generate_annual_melt_map(outfile_template=config.plots_dir / "v2.5/annual_maps/R{1}_{0}-{3}." + fmt, # region_number=0, # year=2020, # dpi=600, @@ -121,7 +120,7 @@ def main(): # # include_current_date_label=True) # # m.generate_anomaly_melt_map(year="all", reset_picklefile=True) - # m.generate_anomaly_melt_map(outfile_template=DATA_PLOTS_DIR / "v2.5/anomaly_maps/R{1}_{0}-{3}." + fmt, + # m.generate_anomaly_melt_map(outfile_template=config.plots_dir / "v2.5/anomaly_maps/R{1}_{0}-{3}." + fmt, # year=2020, # region_number=0, # message_below_year="through 16 February,\n relative to 1990-2020", @@ -130,7 +129,7 @@ def main(): # # reset_picklefile=True) # for melt_code in range(2,8+1): - # m.generate_cumulative_melt_map(outfile_template = DATA_PLOTS_DIR / "v2.5/annual_maps/{0}_region{1}_level{2}.jpg", + # m.generate_cumulative_melt_map(outfile_template = config.plots_dir / "v2.5/annual_maps/{0}_region{1}_level{2}.jpg", # melt_code_threshold=melt_code, # year="all") # # year=2015) diff --git a/antarctica_today/generate_plots_for_given_day.py b/antarctica_today/generate_plots_for_given_day.py index 752f2d9..1055844 100644 --- a/antarctica_today/generate_plots_for_given_day.py +++ b/antarctica_today/generate_plots_for_given_day.py @@ -7,6 +7,7 @@ import os import re import shutil +from pathlib import Path import dateutil.parser import matplotlib.pyplot @@ -105,7 +106,7 @@ def generate_maps_and_plots_for_a_date( region_num=region_num, gap_filled=True, dpi=dpi, - outfile=lineplot_outfile, + outfile=Path(lineplot_outfile), ) # Close the current plots open in matplotlib. (Keeps them from accumulating.) diff --git a/antarctica_today/plot_daily_melt_and_climatology.py b/antarctica_today/plot_daily_melt_and_climatology.py index d8774ce..4836495 100755 --- a/antarctica_today/plot_daily_melt_and_climatology.py +++ b/antarctica_today/plot_daily_melt_and_climatology.py @@ -7,6 +7,8 @@ import datetime import os +from pathlib import Path +from typing import Optional import matplotlib as mpl import matplotlib.pyplot as plt @@ -212,7 +214,7 @@ def plot_current_year_melt_over_baseline_stats( region_num=0, doy_start=(10, 1), doy_end=(4, 30), - outfile=None, + outfile: Optional[Path] = None, gap_filled=True, add_max_line=False, dpi=300, @@ -365,7 +367,7 @@ def _plot_current_year_and_baseline( current_year_percents, fraction_x, region_num=0, - outfile=None, + outfile: Optional[Path] = None, gap_filled=True, add_max_line=False, dpi=300, @@ -434,12 +436,14 @@ def _plot_current_year_and_baseline( # _add_region_area_at_bottom(fig, ax, region_number=region_num) if outfile: + outfile.parent.mkdir(parents=True, exist_ok=True) + if gap_filled and os.path.split(outfile)[1].find("gap_filled") == -1: base, ext = os.path.splitext(outfile) - outfile = base + "_gap_filled" + ext + outfile = Path(base + "_gap_filled" + ext) logger.debug(f"Plotting {outfile}") - if os.path.splitext(outfile)[1].lower() == ".eps": + if outfile.suffix.lower() == ".eps": fig.savefig(outfile, dpi=dpi, format="eps") else: fig.savefig(outfile, dpi=dpi) @@ -1266,7 +1270,7 @@ def special_figure_REG5_FEB_APR_2022(outfile): plot_current_year_melt_over_baseline_stats( current_date=datetime.datetime(year + 1, 4, 30), region_num=region_num, - outfile=fname, + outfile=Path(fname), dpi=1200, add_max_line=False, gap_filled=True, diff --git a/antarctica_today/tb_file_data.py b/antarctica_today/tb_file_data.py index 2bbbdcd..e64c1d6 100755 --- a/antarctica_today/tb_file_data.py +++ b/antarctica_today/tb_file_data.py @@ -9,11 +9,10 @@ import os +from antarctica_today import config from antarctica_today.constants.paths import ( DATA_BASELINE_DATASETS_DIR, - DATA_DATABASE_DIR, DATA_DIR, - DATA_PLOTS_DIR, DATA_TB_DIR, ) @@ -63,6 +62,7 @@ def recurse_directory(directory, ignore="thresholds", target=".bin", sorted=True NSIDC_0080_file_dir = DATA_TB_DIR / "nsidc-0080" +DATA_DATABASE_DIR = config.db_dir # TODO: More consistent names; "model_results" doesn't tell us much about where these # files live. These live in external storage, but others live in repo (for now). Pick # either "dir" or "folder". Do we need "v3" in the filenames? The code only supports one @@ -89,7 +89,7 @@ def recurse_directory(directory, ignore="thresholds", target=".bin", sorted=True model_results_v3_dir = DATA_DIR model_results_dir = model_results_v3_dir / "daily_melt_bin_files" -model_results_plot_directory = DATA_PLOTS_DIR +model_results_plot_directory = config.plots_dir # output_tifs_directory = os.path.join(model_results_v3_dir, "sample_results") outputs_annual_tifs_directory = os.path.join(model_results_v3_dir, "annual_sum_geotifs") outputs_annual_plots_directory = os.path.join( diff --git a/compose.yml b/compose.yml index 4eb6d41..ac63d11 100644 --- a/compose.yml +++ b/compose.yml @@ -3,12 +3,12 @@ services: image: "nsidc/antarctica_today:${ANTARCTICA_TODAY_VERSION:-latest}" user: "root" volumes: - - "./Tb/:/app/Tb/" - "./data/:/app/data/" - - "./database/:/app/database/" - "./plots/:/app/plots/" - "./baseline_datasets/:/app/baseline_datasets/:ro" + - "${ANTARCTICA_TODAY_STORAGE_BASEDIR:-/tmp/antarctica_today}:/data" environment: # NOTE: null value indicates passthrough from host. EARTHDATA_USERNAME: null EARTHDATA_PASSWORD: null + ANTARCTICA_TODAY_STORAGE_BASEDIR: "/data" diff --git a/conda-lock.yml b/conda-lock.yml index 4383a27..350ba2d 100644 --- a/conda-lock.yml +++ b/conda-lock.yml @@ -13,7 +13,7 @@ version: 1 metadata: content_hash: - linux-64: 5a8ecb85578447b396175b83bbbb9299310098579a3245564715c5d96ec659ba + linux-64: 2776cacc965c5d19b575a4bd2f43c9cca65bb10ec3b2184c661574caac7bb442 channels: - url: conda-forge used_env_vars: [] @@ -506,16 +506,16 @@ package: category: main optional: false - name: c-ares - version: 1.32.2 + version: 1.32.3 manager: conda platform: linux-64 dependencies: __glibc: '>=2.17,<3.0.a0' libgcc-ng: '>=12' - url: https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.32.2-h4bc722e_0.conda + url: https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.32.3-h4bc722e_0.conda hash: - md5: 8024af1ee7078e37fa3101c0a0296af2 - sha256: d1b01f9e3d10b97fd09e19fda0caf9bfad3c884a6b19fb3f654a9aed02a70b58 + md5: 7624e34ee6baebfc80d67bac76cc9d9d + sha256: 3c5a844bb60b0d52d89c3f1bd828c9856417fe33a6102fd8bbd5c13c3351704a category: main optional: false - name: ca-certificates @@ -1423,16 +1423,16 @@ package: category: main optional: false - name: importlib-metadata - version: 8.0.0 + version: 8.1.0 manager: conda platform: linux-64 dependencies: python: '>=3.8' zipp: '>=0.5' - url: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.0.0-pyha770c72_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.1.0-pyha770c72_0.conda hash: - md5: 3286556cdd99048d198f72c3f6f69103 - sha256: e40d7e71c37ec95df9a19d39f5bb7a567c325be3ccde06290a71400aab719cac + md5: 03da20bf8b1f1021102633d2e9cee84e + sha256: b56e95c96d0afb1efd39cca3801bb5dd41506ffe996a02263d1805353f9b248e category: main optional: false - name: importlib-resources @@ -1449,15 +1449,15 @@ package: category: main optional: false - name: importlib_metadata - version: 8.0.0 + version: 8.1.0 manager: conda platform: linux-64 dependencies: - importlib-metadata: '>=8.0.0,<8.0.1.0a0' - url: https://conda.anaconda.org/conda-forge/noarch/importlib_metadata-8.0.0-hd8ed1ab_0.conda + importlib-metadata: '>=8.1.0,<8.1.1.0a0' + url: https://conda.anaconda.org/conda-forge/noarch/importlib_metadata-8.1.0-hd8ed1ab_0.conda hash: - md5: 5f8c8ebbe6413a7838cf6ecf14d5d31b - sha256: f786f67bcdd6debb6edc2bc496e2899a560bbcc970e66727d42a805a1a5bf9a3 + md5: 1e7ab80593a1fdae4225f4df4fd150d0 + sha256: b816a3bbe911adfb9d906bd096e61817a4ea4defffff0485ab5a0c1d94a249ca category: main optional: false - name: importlib_resources @@ -2414,7 +2414,7 @@ package: category: main optional: false - name: mapclassify - version: 2.6.1 + version: 2.7.0 manager: conda platform: linux-64 dependencies: @@ -2424,10 +2424,10 @@ package: python: '>=3.9' scikit-learn: '>=1.0' scipy: '>=1.8' - url: https://conda.anaconda.org/conda-forge/noarch/mapclassify-2.6.1-pyhd8ed1ab_0.conda + url: https://conda.anaconda.org/conda-forge/noarch/mapclassify-2.7.0-pyhd8ed1ab_0.conda hash: - md5: 6aceae1ad4f16cf7b73ee04189947f98 - sha256: 204ab8b242229d422b33cfec07ea61cefa8bd22375a16658afbabaafce031d64 + md5: 014ab9453f9e9fb915937b46830d48e8 + sha256: 7b0be62b175db5cc36bcca1b995fccd4a22cd1ffdf612edc188f329087f048f7 category: main optional: false - name: markupsafe @@ -3037,6 +3037,20 @@ package: sha256: 203918a51383ab42161763317e44f505e2526aac4451613acae4d83633cf2676 category: main optional: false +- name: pydantic-settings + version: 2.3.4 + manager: conda + platform: linux-64 + dependencies: + pydantic: '>=2.7.0' + python: '>=3.8' + python-dotenv: '>=0.21.0' + url: https://conda.anaconda.org/conda-forge/noarch/pydantic-settings-2.3.4-pyhd8ed1ab_0.conda + hash: + md5: 1f18bd4881aa003740072706b832e855 + sha256: bf7536a6a5e2b7fb271921e7e4c7e0cb37cebb4f5323dd7047d5dd5fe6b7fdd3 + category: main + optional: false - name: pylev version: 1.4.0 manager: conda @@ -3156,6 +3170,18 @@ package: sha256: f3ceef02ac164a8d3a080d0d32f8e2ebe10dd29e3a685d240e38b3599e146320 category: main optional: false +- name: python-dotenv + version: 1.0.1 + manager: conda + platform: linux-64 + dependencies: + python: '>=3.8' + url: https://conda.anaconda.org/conda-forge/noarch/python-dotenv-1.0.1-pyhd8ed1ab_0.conda + hash: + md5: c2997ea9360ac4e015658804a7a84f94 + sha256: 2d4c80364f03315d606a50eddd493dbacc078e21412c2462c0f781eec49b572c + category: main + optional: false - name: python_abi version: '3.11' manager: conda diff --git a/doc/operation.md b/doc/operation.md index bf18329..e5bbaed 100644 --- a/doc/operation.md +++ b/doc/operation.md @@ -48,8 +48,13 @@ These steps will use Docker, as we expect that to be the main operation mode. Se ``` export EARTHDATA_USERNAME="myusername" export EARTHDATA_PASSWORD="mypassword" +export ANTARCTICA_TODAY_STORAGE_BASEDIR="/path/to/storage" ``` +The storage directory specified here will be used to store large pickle files generated +by this code and imagery generated by this code. In the future, this configurable +storage directory will be used to store all data independently of the code repository. + ### Initialize the database and generate all plots diff --git a/environment.yml b/environment.yml index c529f8a..4926f4b 100644 --- a/environment.yml +++ b/environment.yml @@ -10,6 +10,8 @@ dependencies: # -------------------- - click ~=8.1 - loguru ~=0.7.2 + - pydantic ~=2.8.2 + - pydantic-settings - earthaccess ~=0.10.0 - gdal ~=3.5 - pandas ~=1.4 From 7f12285395795356e6f3553bcfadb5ac5fd4b6f0 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Wed, 24 Jul 2024 18:15:40 -0600 Subject: [PATCH 3/3] Fixup incorrect type and check more functions --- antarctica_today/main.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/antarctica_today/main.py b/antarctica_today/main.py index 8428de3..9989aaf 100644 --- a/antarctica_today/main.py +++ b/antarctica_today/main.py @@ -7,6 +7,7 @@ import argparse import datetime import os +from pathlib import Path import click @@ -20,7 +21,7 @@ ) -def preprocessing_main(): +def preprocessing_main() -> None: """When we get new data (or new versions of the data), do all the things to get it ingested. 1) Read all the .bin files and put them into the array picklefile @@ -57,7 +58,7 @@ def preprocessing_main(): compute_mean_climatology.create_annual_melt_anomaly_tif(year, gap_filled=True) -def generate_all_plots_and_maps_main(): +def generate_all_plots_and_maps_main() -> None: """After all the preprocessing, re-generate all the plots and maps. 4) Re-run the climatology & daily-melt plots for each year. @@ -74,7 +75,7 @@ def generate_all_plots_and_maps_main(): plot_daily_melt_and_climatology.plot_current_year_melt_over_baseline_stats( datetime.datetime(year=year + 1, month=4, day=30), region_num=region, - outfile=fname, + outfile=Path(fname), ) # 5) Get a quick status check on the dates coverage.