Skip to content

Commit

Permalink
merge develop into issue-169 branch
Browse files Browse the repository at this point in the history
  • Loading branch information
danielfromearth committed Jul 10, 2024
2 parents 10c086a + d087507 commit 01b73ea
Show file tree
Hide file tree
Showing 34 changed files with 1,251 additions and 556 deletions.
8 changes: 8 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,19 @@ updates:
directory: "/"
schedule:
interval: "monthly"
groups:
pip-dependencies:
patterns:
- "*"
# Raise pull requests for version updates
# to pip against the `develop` branch
target-branch: "develop"
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "monthly"
groups:
gha-dependencies:
patterns:
- "*"
target-branch: "develop"
2 changes: 1 addition & 1 deletion .github/workflows/build-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ jobs:
- name: Build and push Docker image
if: ${{ !startsWith(github.ref, 'refs/heads/main/') }}
id: docker-push
uses: docker/build-push-action@v5
uses: docker/build-push-action@v6
with:
context: .
file: Dockerfile
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/run_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
poetry-version: ${{ env.POETRY_VERSION }}

- name: Install package
run: poetry install
run: poetry install --with=harmony --without integration

- name: Run linting
run: |
Expand Down
6 changes: 6 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
---
ci:
autoupdate_schedule: "monthly" # Like dependabot
autoupdate_commit_msg: "chore: update pre-commit hooks"
autoupdate_branch: "develop"
autofix_prs: false # Comment "pre-commit.ci autofix" on a PR to trigger

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [Issue #181](https://github.com/nasa/stitchee/issues/181): Add a group delimiter argument
- [Issue #134](https://github.com/nasa/stitchee/issues/134): Add an integration test that runs stitchee on files first subsetted by the operational Harmony subsetter
- [Issue #194](https://github.com/nasa/stitchee/issues/194): Add page about the SAMBAH service chain to the Readthedocs documentation
- [issue #193](https://github.com/nasa/stitchee/issues/193): Add autoupdate schedule for pre-commit
### Changed
- [Issue #206](https://github.com/nasa/stitchee/issues/206): Group dependabot updates into one PR
- [issue #208](https://github.com/nasa/stitchee/issues/208): Increase continuous integration/unit test coverage
- [issue #198](https://github.com/nasa/stitchee/issues/198): Use time variable instead of concat dim for ordering datasets
### Deprecated
### Removed
### Fixed
Expand Down
15 changes: 0 additions & 15 deletions concatenator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,24 +24,9 @@ def __getattr__(name): # type: ignore
"""
global _options

if name == "__options__":
return _options
if name == "group_delim":
return _options.group_delim
if name == "coord_delim":
return _options.coord_delim
else:
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")


def __setattr__(name, value): # type: ignore
"""Module-level setattr to handle setting of `concatenator.options`.
Other unhandled attributes raise as `AttributeError` as expected.
"""
if name == "group_delim":
_options.group_delim = value
elif name == "coord_delim":
_options.coord_delim = value
else:
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
24 changes: 14 additions & 10 deletions concatenator/attribute_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def regroup_coordinate_attribute(attribute_string: str) -> str:
Examples
--------
>>> coord_att = "__Time_and_Position__time __Time_and_Position__instrument_fov_latitude __Time_and_Position__instrument_fov_longitude"
>>> _flatten_coordinate_attribute(coord_att)
>>> flatten_string_with_groups(coord_att)
Time_and_Position/time Time_and_Position/instrument_fov_latitude Time_and_Position/instrument_fov_longitude
Parameters
Expand Down Expand Up @@ -54,44 +54,48 @@ def regroup_coordinate_attribute(attribute_string: str) -> str:
def flatten_coordinate_attribute_paths(
dataset: netCDF4.Dataset, var: netCDF4.Variable, variable_name: str
) -> None:
"""Flatten the paths of variables referenced in the coordinates attribute."""
"""Flatten the paths of variables referenced in the 'coordinates' attribute."""
if "coordinates" in var.ncattrs():
coord_att = var.getncattr("coordinates")

new_coord_att = _flatten_coordinate_attribute(coord_att)
new_coord_att = flatten_string_with_groups(coord_att)

dataset.variables[variable_name].setncattr("coordinates", new_coord_att)


def _flatten_coordinate_attribute(attribute_string: str) -> str:
"""Converts attributes that specify group membership via "/" to use new group delimiter, even for the root level.
def flatten_string_with_groups(str_with_groups: str) -> str:
"""Determine separator and flatten string specifying group membership via "/".
Applies to variable paths or attributes, even for the root level.
Examples
--------
>>> coord_att = "Time_and_Position/time Time_and_Position/instrument_fov_latitude Time_and_Position/instrument_fov_longitude"
>>> _flatten_coordinate_attribute(coord_att)
>>> flatten_string_with_groups(coord_att)
__Time_and_Position__time __Time_and_Position__instrument_fov_latitude __Time_and_Position__instrument_fov_longitude
Parameters
----------
attribute_string : str
str_with_groups : str
Returns
-------
str
"""
# Use the separator that's in the attribute string only if all separators in the string are the same.
# Otherwise, we will use our own default separator.
whitespaces = re.findall(r"\s+", attribute_string)
if len(set(whitespaces)) <= 1:
whitespaces = re.findall(r"\s+", str_with_groups)
if len(set(whitespaces)) == 0:
new_sep = ""
elif len(set(whitespaces)) == 1:
new_sep = whitespaces[0]
else:
new_sep = concatenator.coord_delim

# A new string is constructed.
return new_sep.join(
f'{concatenator.group_delim}{c.replace("/", concatenator.group_delim)}'
for c in attribute_string.split() # split on any whitespace
for c in str_with_groups.split() # split on any whitespace
)


Expand Down
9 changes: 5 additions & 4 deletions concatenator/dataset_and_group_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from __future__ import annotations

import re
from logging import Logger

import netCDF4 as nc
import numpy as np
Expand Down Expand Up @@ -273,7 +274,7 @@ def _get_nested_group(dataset: nc.Dataset, group_path: str) -> nc.Group:
return nested_group


def _calculate_chunks(dim_sizes: list, default_low_dim_chunksize=4000) -> tuple:
def _calculate_chunks(dim_sizes: list, default_low_dim_chunksize: int = 4000) -> tuple:
"""
For the given dataset, calculate if the size on any dimension is
worth chunking. Any dimension larger than 4000 will be chunked. This
Expand Down Expand Up @@ -324,8 +325,8 @@ def _get_dimension_size(dataset: nc.Dataset, dim_name: str) -> int:
return dim_size


def validate_workable_files(files, logger) -> tuple[list[str], int]:
"""Remove files from list that are not open-able as netCDF or that are empty."""
def validate_workable_files(files: list[str], logger: Logger) -> tuple[list[str], int]:
"""Remove files from a list that are not open-able as netCDF or that are empty."""
workable_files = []
for file in files:
try:
Expand All @@ -336,7 +337,7 @@ def validate_workable_files(files, logger) -> tuple[list[str], int]:
except OSError:
logger.debug("Error opening <%s> as a netCDF dataset. Skipping.", file)

# addressing the issue 153: propagate first empty file if all input files are empty
# addressing GitHub issue 153: propagate the first empty file if all input files are empty
if (len(workable_files)) == 0 and (len(files) > 0):
workable_files.append(files[0])

Expand Down
1 change: 1 addition & 0 deletions concatenator/harmony/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""A Harmony CLI wrapper around the concatenate-batcher"""

from argparse import ArgumentParser

import harmony
Expand Down
2 changes: 1 addition & 1 deletion concatenator/harmony/download_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,6 @@ def _download_worker(
dest_path = path.parent.joinpath(filename)
path = path.rename(dest_path)
else:
logger.warning("Origin filename could not be assertained - %s", url)
logger.warning("Origin filename could not be ascertained - %s", url)

path_list.append(str(path))
10 changes: 7 additions & 3 deletions concatenator/stitchee.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import xarray as xr

import concatenator
from concatenator.attribute_handling import flatten_string_with_groups
from concatenator.dataset_and_group_handling import (
flatten_grouped_dataset,
regroup_flattened_dataset,
Expand All @@ -39,6 +40,7 @@ def stitchee(
concat_method: str | None = "xarray-concat",
concat_dim: str = "",
concat_kwargs: dict | None = None,
time_variable: str = "geolocation/time",
history_to_append: str | None = None,
copy_input_files: bool = False,
overwrite_output_file: bool = False,
Expand Down Expand Up @@ -137,9 +139,11 @@ def stitchee(
decode_coords=False,
drop_variables=coord_vars,
) as xrds:
first_value = xrds[concatenator.group_delim + concat_dim].values.flatten()[
0
]
# Determine value for later dataset sorting.
first_value = xrds[
flatten_string_with_groups(time_variable)
].values.flatten()[0]
# first_value = xrds[concatenator.group_delim + concat_dim].values.flatten()[0]
concat_dim_order.append(first_value)

benchmark_log["flattening"] = time.time() - start_time
Expand Down
Loading

0 comments on commit 01b73ea

Please sign in to comment.