Skip to content

Commit

Permalink
Merge pull request #231 from nasa/feature/issue-230-allow-single-file…
Browse files Browse the repository at this point in the history
…-netcdf-filepath-input

Feature/issue 230 allow single file netcdf filepath input
  • Loading branch information
danielfromearth authored Aug 13, 2024
2 parents 77435ea + c0efe9e commit e852dc3
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 18 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Common Changelog](https://common-changelog.org/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## Unreleased

### Changed
- Allow single netCDF file input in addition to single text file listings ([#230](https://github.com/nasa/stitchee/issues/230))([**@danielfromearth**](https://github.com/danielfromearth))


## [1.3.0] - 2024-07-11

### Changed
Expand Down
17 changes: 15 additions & 2 deletions concatenator/file_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

logger = logging.getLogger(__name__)

netcdf_extensions = [".nc", ".nc4", ".netcdf"]


def add_label_to_path(x: str, label="_flat_intermediate") -> str:
"""Constructs new filepath with label at end."""
Expand Down Expand Up @@ -55,7 +57,15 @@ def validate_output_path(filepath: str, overwrite: bool = False) -> str:


def validate_input_path(path_or_paths: list[str]) -> list[str]:
"""Checks whether input is a valid directory, list of files, or a text file containing paths."""
"""Checks whether input is a list of files, a directory, or a text file containing paths.
If the input is...
- a list of filepaths, then use those filepaths.
- a valid directory, then get the paths for all the files in the directory.
- a single file:
- that is a valid text file, then get the names of the files from each row in the text file.
- that is a valid netCDF file, then use that one filepath
"""
print(f"parsed_input === {path_or_paths}")
if len(path_or_paths) > 1:
input_files = path_or_paths
Expand All @@ -64,7 +74,10 @@ def validate_input_path(path_or_paths: list[str]) -> list[str]:
if directory_or_path.is_dir():
input_files = _get_list_of_filepaths_from_dir(directory_or_path)
elif directory_or_path.is_file():
input_files = _get_list_of_filepaths_from_file(directory_or_path)
if directory_or_path.suffix in netcdf_extensions:
input_files = [str(directory_or_path)]
else:
input_files = _get_list_of_filepaths_from_file(directory_or_path)
else:
raise TypeError(
"If one path is provided for 'data_dir_or_file_or_filepaths', "
Expand Down
28 changes: 28 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
import numpy as np
import pytest

test_path = Path(__file__).parents[0].resolve()
data_path = test_path.joinpath("data")
harmony_path = data_path.joinpath("harmony")
granules_path = harmony_path.joinpath("granules")


class DataDirs(typing.NamedTuple):
test_path: Path
Expand Down Expand Up @@ -160,3 +165,26 @@ def ds_3dims_3vars_4coords_1group_part3(temp_toy_data_dir):
f.close()

return filepath


@pytest.fixture(scope="function")
def text_file_with_three_paths(temp_toy_data_dir) -> Path:
filepath = temp_toy_data_dir / "text_file_with_paths.txt"

paths = [
path_str(granules_path, x)
for x in [
"TEMPO_NO2_L2_V03_20240601T210934Z_S012G01_subsetted.nc4",
"TEMPO_NO2_L2_V03_20240601T211614Z_S012G02_subsetted.nc4",
"TEMPO_NO2_L2_V03_20240601T212254Z_S012G03_subsetted.nc4",
]
]

contents = f"""{paths[0]}
{paths[1]}
{paths[2]}
"""
with open(filepath, "w") as f:
f.write(contents)

return filepath
57 changes: 41 additions & 16 deletions tests/unit/test_run_stitchee.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import sys
from pathlib import Path
from unittest.mock import patch

import pytest

import concatenator
from concatenator.run_stitchee import parse_args

from ..conftest import path_str
from ..conftest import granules_path, path_str


def test_parser():
Expand All @@ -24,23 +23,13 @@ def test_parser():

@pytest.mark.usefixtures("pass_options")
class TestBatching:
__test_path = Path(__file__).parents[1].resolve()
__data_path = __test_path.joinpath("data")
__harmony_path = __data_path.joinpath("harmony")
__granules_path = __harmony_path.joinpath("granules")

def test_run_stitchee_cli_with_three_filepaths(self, temp_output_dir):
test_args = [
concatenator.run_stitchee.__file__,
path_str(
self.__granules_path, "TEMPO_NO2_L2_V03_20240601T210934Z_S012G01_subsetted.nc4"
),
path_str(
self.__granules_path, "TEMPO_NO2_L2_V03_20240601T211614Z_S012G02_subsetted.nc4"
),
path_str(
self.__granules_path, "TEMPO_NO2_L2_V03_20240601T212254Z_S012G03_subsetted.nc4"
),
path_str(granules_path, "TEMPO_NO2_L2_V03_20240601T210934Z_S012G01_subsetted.nc4"),
path_str(granules_path, "TEMPO_NO2_L2_V03_20240601T211614Z_S012G02_subsetted.nc4"),
path_str(granules_path, "TEMPO_NO2_L2_V03_20240601T212254Z_S012G03_subsetted.nc4"),
"--copy_input_files",
"--verbose",
"-o",
Expand All @@ -57,7 +46,43 @@ def test_run_stitchee_cli_with_three_filepaths(self, temp_output_dir):
def test_run_stitchee_cli_with_one_directorypath(self, temp_output_dir):
test_args = [
concatenator.run_stitchee.__file__,
str(self.__granules_path),
str(granules_path),
"--copy_input_files",
"--verbose",
"-o",
path_str(temp_output_dir, "test_run_stitchee_output.nc"),
"--concat_method",
"xarray-concat",
"--concat_dim",
"mirror_step",
]

with patch.object(sys, "argv", test_args):
concatenator.run_stitchee.main()

def test_run_stitchee_cli_with_one_netCDFpath(self, temp_output_dir):
test_args = [
concatenator.run_stitchee.__file__,
path_str(granules_path, "TEMPO_NO2_L2_V03_20240601T210934Z_S012G01_subsetted.nc4"),
"--copy_input_files",
"--verbose",
"-o",
path_str(temp_output_dir, "test_run_stitchee_output.nc"),
"--concat_method",
"xarray-concat",
"--concat_dim",
"mirror_step",
]

with patch.object(sys, "argv", test_args):
concatenator.run_stitchee.main()

def test_run_stitchee_cli_with_one_path_to_text_listing_of_three_files(
self, temp_output_dir, text_file_with_three_paths
):
test_args = [
concatenator.run_stitchee.__file__,
str(text_file_with_three_paths),
"--copy_input_files",
"--verbose",
"-o",
Expand Down

0 comments on commit e852dc3

Please sign in to comment.