Skip to content

Commit

Permalink
Migrate common model handling to boilercore
Browse files Browse the repository at this point in the history
  • Loading branch information
blakeNaccarato committed Aug 15, 2023
1 parent a55657e commit 75bacfe
Show file tree
Hide file tree
Showing 9 changed files with 35 additions and 197 deletions.
3 changes: 2 additions & 1 deletion .tools/requirements/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ pyqtgraph==0.13.3
pyside6==6.5.2
pytz==2023.3
pyyaml==6.0.1
ruamel.yaml==0.17.32
scikit-image==0.21.0
scipy==1.11.1
seaborn==0.12.2
Expand All @@ -38,3 +37,5 @@ numpy==1.24.4
boilercine@git+https://github.com/blakeNaccarato/boilercine@main
# ! Pin the merge commit in master once it is merged
CMasher@git+https://github.com/1313e/CMasher@mpl_future_proofing
# ! boilercore specified differently in dev and CI to allow editable package in dev
# boilercore==...
8 changes: 8 additions & 0 deletions .tools/requirements/requirements_ci.txt
Original file line number Diff line number Diff line change
@@ -1 +1,9 @@
# * -------------------------------------------------------------------------------- * #
# * Changes below should persist in significant template updates.

boilercore@git+https://github.com/blakeNaccarato/boilercore@8229c65f8e31aa22fe3316cd07a394c036d2dcc9

# * -------------------------------------------------------------------------------- * #
# * Changes below may be lost in significant template updates.

-r requirements_both.txt
2 changes: 2 additions & 0 deletions .tools/requirements/requirements_dev.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# * -------------------------------------------------------------------------------- * #
# * Changes below should persist in significant template updates.

-e ../boilercore

# * -------------------------------------------------------------------------------- * #
# * Changes below may be lost in significant template updates.

Expand Down
8 changes: 6 additions & 2 deletions .tools/requirements/requirements_docs.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
# * -------------------------------------------------------------------------------- * #
# * Changes below should persist in significant template updates.

sphinxcontrib-bibtex==2.5.0
sphinxcontrib-mermaid==0.9.2

# * -------------------------------------------------------------------------------- * #
# * Changes below may be lost in significant template updates.

myst-parser==2.0.0
myst-nb==0.17.2
nbformat==5.9.2
sphinx-autobuild==2021.03.14
sphinx-book-theme==1.0.1
sphinx-design==0.5.0
sphinx-design==0.4.1
56 changes: 7 additions & 49 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
Expand Up @@ -134,38 +134,28 @@
"problemMatcher": []
},
{
"label": "dvc: repro force downstream (debug)",
"label": "dvc: repro (debug)",
"dependsOrder": "sequence",
"dependsOn": ["dvc: pre-repro"],
"type": "shell",
"options": {
"shell": { "executable": "pwsh", "args": ["-Command"] },
"env": { "BOILERCV_DEBUG": "True" }
},
"command": "dvc repro --force --downstream ${input:stage}",
"command": "dvc repro ${input:stage}",
"icon": { "id": "graph" },
"problemMatcher": []
},
{
"label": "dvc: repro (debug)",
"label": "dvc: repro force (debug)",
"dependsOrder": "sequence",
"dependsOn": ["dvc: pre-repro"],
"type": "shell",
"options": {
"shell": { "executable": "pwsh", "args": ["-Command"] },
"env": { "BOILERCV_DEBUG": "True" }
},
"command": "dvc repro ${input:stage}",
"icon": { "id": "graph" },
"problemMatcher": []
},
{
"label": "dvc: repro force downstream",
"dependsOrder": "sequence",
"dependsOn": ["dvc: pre-repro"],
"type": "shell",
"options": { "shell": { "executable": "pwsh", "args": ["-Command"] } },
"command": "dvc repro --force --downstream ${input:stage}",
"command": "dvc repro --force ${input:stage}",
"icon": { "id": "graph" },
"problemMatcher": []
},
Expand All @@ -180,36 +170,12 @@
"problemMatcher": []
},
{
"label": "dvc: repro force all",
"dependsOrder": "sequence",
"dependsOn": [
"setup: Update project",
"dvc: pre-repro",
"dvc: freeze",
"git: Stage all files",
"pre-commit",
"git: Stage all files"
],
"type": "shell",
"options": { "shell": { "executable": "pwsh", "args": ["-Command"] } },
"command": "dvc repro --force",
"icon": { "id": "graph" },
"problemMatcher": []
},
{
"label": "dvc: repro all",
"label": "dvc: repro force",
"dependsOrder": "sequence",
"dependsOn": [
"setup: Update project",
"dvc: pre-repro",
"dvc: freeze",
"git: Stage all files",
"pre-commit",
"git: Stage all files"
],
"dependsOn": ["dvc: pre-repro"],
"type": "shell",
"options": { "shell": { "executable": "pwsh", "args": ["-Command"] } },
"command": "dvc repro",
"command": "dvc repro --force ${input:stage}",
"icon": { "id": "graph" },
"problemMatcher": []
},
Expand All @@ -231,14 +197,6 @@
"icon": { "id": "graph" },
"problemMatcher": []
},
{
"label": "dvc: freeze",
"type": "shell",
"options": { "shell": { "executable": "pwsh", "args": ["-Command"] } },
"command": "pip freeze --requirement .tools/requirements/requirements.txt --local --exclude-editable > repro.txt",
"icon": { "id": "graph" },
"problemMatcher": []
},
// * -------------------------------------------------------------------------- * //
// * WSL-SPECIFIC * //
{
Expand Down
11 changes: 8 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ license = { file = "LICENSE" }
requires-python = ">=3.11"
classifiers = ["License :: OSI Approved :: MIT License"]
dependencies = [
"boilercore>=0.0.0",
"colorcet>=3.0.1",
"dulwich>=0.21.5",
"dvc[gs]>=3.10.1",
Expand All @@ -41,7 +42,6 @@ dependencies = [
"pyside6>=6.5.2",
"pytz>=2023.3",
"pyyaml>=6.0.1",
"ruamel.yaml>=0.17.32",
"scikit-image>=0.21.0",
"scipy>=1.11.1",
"seaborn>=0.12.2",
Expand All @@ -56,7 +56,8 @@ dependencies = [
# ! Consider switching to pims instead
"boilercine@git+https://github.com/blakeNaccarato/boilercine@main",
# ! Pin the merge commit in master once it is merged
"CMasher@git+https://github.com/1313e/CMasher@mpl_future_proofing",
# ? https://github.com/1313e/CMasher/tree/mpl_future_proofing
"CMasher@git+https://github.com/1313e/CMasher@18d50f4deddf2410a4c51475890329d445fd7e1b",
]
[[project.authors]]
name = "Blake Naccarato, Kwang Jin Kim"
Expand All @@ -69,8 +70,12 @@ Home = "https://github.com/blakeNaccarato/boilercv"
Tracker = "https://github.com/blakeNaccarato/boilercv/issues"

[tool.fawltydeps]
ignore_undeclared = ["pytest"]
ignore_undeclared = [
"boilercore", # Not picked up by fawltydeps
"pytest",
]
ignore_unused = [
"boilercore", # Not picked up by fawltydeps
"nbconvert", # boilercv.pre_repro.export_notebook
"numba", # Pin numba until it supports numpy>=1.25
"pandas-stubs", # Provides stubs for pandas
Expand Down
140 changes: 0 additions & 140 deletions src/boilercv/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,141 +1 @@
"""Parameter models for this project."""

from pathlib import Path
from typing import Any

from pydantic import BaseModel, Extra, validator
from ruamel.yaml import YAML

YAML_INDENT = 2


class YamlModel(BaseModel):
"""Model of a YAML file with automatic schema generation.
Updates a JSON schema next to the YAML file with each initialization.
"""

class Config:
extra = Extra.forbid

def __init__(self, data_file: Path):
"""Initialize and update the schema."""
params = self.get_params(data_file)
self.update_schema(data_file)
super().__init__(**params)

def get_params(self, data_file: Path) -> dict[str, Any]:
"""Get parameters from file."""
yaml = YAML()
yaml.indent(YAML_INDENT)
return (
yaml.load(data_file)
if data_file.exists() and data_file.read_text(encoding="utf-8")
else {}
)

def update_schema(self, data_file: Path):
schema_file = data_file.with_name(f"{data_file.stem}_schema.json")
schema_file.write_text(
encoding="utf-8", data=f"{self.schema_json(indent=YAML_INDENT)}\n"
)


class SynchronizedPathsYamlModel(YamlModel):
"""Model of a YAML file that synchronizes paths back to the file.
For example, synchronize complex path structures back to `params.yaml` DVC files for
pipeline orchestration.
"""

def __init__(self, data_file: Path):
"""Initialize, update the schema, and synchronize paths in the file."""
super().__init__(data_file)

def get_params(self, data_file: Path) -> dict[str, Any]:
"""Get parameters from file, synchronizing paths in the file."""
yaml = YAML()
yaml.indent(YAML_INDENT)
params = (
yaml.load(data_file)
if data_file.exists() and data_file.read_text(encoding="utf-8")
else {}
)
params |= self.get_paths()
yaml.dump(params, data_file)
return params

def get_paths(self) -> dict[str, dict[str, str]]:
"""Get all paths specified in paths-type models."""
maybe_excludes = self.__exclude_fields__
excludes = set(maybe_excludes.keys()) if maybe_excludes else set()
defaults: dict[str, dict[str, str]] = {}
for key, field in self.__fields__.items():
type_ = field.type_
if issubclass(type_, DefaultPathsModel) and key not in excludes:
defaults[key] = type_.get_paths()
return defaults


class DefaultPathsModel(BaseModel):
"""All fields must be path-like and have defaults specified in this model."""

class Config:
extra = Extra.forbid

@staticmethod
def schema_extra(schema: dict[str, Any], model):
"""Replace backslashes with forward slashes in paths."""
if schema.get("required"):
raise TypeError(
f"Defaults must be specified in {model}, derived from {DefaultPathsModel}."
)
for (field, prop), type_ in zip(
schema["properties"].items(),
(field.type_ for field in model.__fields__.values()),
strict=True,
):
if not issubclass(type_, Path):
raise TypeError(
f"Field <{field}> is not Path-like in {model}, derived from {DefaultPathsModel}."
)
default = prop.get("default")
if isinstance(default, list | tuple):
default = [item.replace("\\", "/") for item in default]
elif isinstance(default, dict):
default = {
key: value.replace("\\", "/") for key, value in default.items()
}
else:
default = default.replace("\\", "/")
prop["default"] = default

@validator("*", always=True, pre=True, each_item=True)
def check_pathlike(cls, value, field):
"""Check that the value is path-like."""
if not issubclass(field.type_, Path):
raise TypeError(
f"Field is not Path-like in {cls}, derived from {DefaultPathsModel}."
)
return value

@classmethod
def get_paths(cls) -> dict[str, Any]:
"""Get the paths for this model."""
return {
key: value["default"] for key, value in cls.schema()["properties"].items()
}


class CreatePathsModel(DefaultPathsModel):
"""Parent directories will be created for all fields in this model."""

@validator("*", always=True, pre=True, each_item=True)
def create_directories(cls, value):
"""Create directories associated with each value."""
path = Path(value)
if path.is_file():
return value
directory = path.parent if path.suffix else path
directory.mkdir(parents=True, exist_ok=True)
return value
2 changes: 1 addition & 1 deletion src/boilercv/models/params.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
"""Parameters for the data pipeline."""


from boilercore.models import SynchronizedPathsYamlModel
from pydantic import Field

from boilercv import PARAMS_FILE
from boilercv.models import SynchronizedPathsYamlModel
from boilercv.models.paths import LocalPaths, Paths, ProjectPaths

YAML_INDENT = 2
Expand Down
2 changes: 1 addition & 1 deletion src/boilercv/models/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

from pathlib import Path

from boilercore.models import CreatePathsModel
from pydantic import DirectoryPath, FilePath

from boilercv import DATA_DIR, LOCAL_DATA, PROJECT_DIR
from boilercv.models import CreatePathsModel


def get_sorted_paths(path: Path) -> list[Path]:
Expand Down

0 comments on commit 75bacfe

Please sign in to comment.