Skip to content

Commit

Permalink
Apply autoformatting
Browse files Browse the repository at this point in the history
  • Loading branch information
schlunma committed Sep 27, 2024
1 parent 7119884 commit 4d449b7
Show file tree
Hide file tree
Showing 10 changed files with 1,288 additions and 1,148 deletions.
2 changes: 1 addition & 1 deletion doc/quickstart/find_data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -682,7 +682,7 @@ Key Description Default value if not
recipe if default DRS is used)
```special_attr`` A special attribute in the filename No default
`ACCESS-ESM` raw data, it's related to
frquency of raw data
frequency of raw data
``sub_dataset`` Part of the ACCESS-ESM raw dataset No default
root, need to specify if you want to
use the cmoriser
Expand Down
105 changes: 55 additions & 50 deletions esmvalcore/_provenance.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Provenance module."""

import copy
import logging
import os
Expand All @@ -14,7 +15,7 @@

logger = logging.getLogger(__name__)

ESMVALTOOL_URI_PREFIX = 'https://www.esmvaltool.org/'
ESMVALTOOL_URI_PREFIX = "https://www.esmvaltool.org/"


def create_namespace(provenance, namespace):
Expand All @@ -25,11 +26,12 @@ def create_namespace(provenance, namespace):
def get_esmvaltool_provenance():
"""Create an esmvaltool run activity."""
provenance = ProvDocument()
namespace = 'software'
namespace = "software"
create_namespace(provenance, namespace)
attributes = {} # TODO: add dependencies with versions here
activity = provenance.activity(namespace + ':esmvaltool==' + __version__,
other_attributes=attributes)
activity = provenance.activity(
namespace + ":esmvaltool==" + __version__, other_attributes=attributes
)

return activity

Expand All @@ -39,55 +41,57 @@ def get_esmvaltool_provenance():

def attribute_to_authors(entity, authors):
"""Attribute entity to authors."""
namespace = 'author'
namespace = "author"
create_namespace(entity.bundle, namespace)

for author in authors:
if isinstance(author, str):
# This happens if the config-references.yml file is not available
author = {'name': author}
author = {"name": author}
agent = entity.bundle.agent(
namespace + ':' + author['name'],
{'attribute:' + k: author[k]
for k in author if k != 'name'})
namespace + ":" + author["name"],
{"attribute:" + k: author[k] for k in author if k != "name"},
)
entity.wasAttributedTo(agent)


def attribute_to_projects(entity, projects):
"""Attribute entity to projects."""
namespace = 'project'
namespace = "project"
create_namespace(entity.bundle, namespace)

for project in projects:
agent = entity.bundle.agent(namespace + ':' + project)
agent = entity.bundle.agent(namespace + ":" + project)
entity.wasAttributedTo(agent)


def get_recipe_provenance(documentation, filename):
"""Create a provenance entity describing a recipe."""
provenance = ProvDocument()

for namespace in ('recipe', 'attribute'):
for namespace in ("recipe", "attribute"):
create_namespace(provenance, namespace)

entity = provenance.entity(
'recipe:{}'.format(filename), {
'attribute:description': documentation.get('description', ''),
'attribute:references': str(documentation.get('references', [])),
})
"recipe:{}".format(filename),
{
"attribute:description": documentation.get("description", ""),
"attribute:references": str(documentation.get("references", [])),
},
)

attribute_to_authors(entity, documentation.get('authors', []))
attribute_to_projects(entity, documentation.get('projects', []))
attribute_to_authors(entity, documentation.get("authors", []))
attribute_to_projects(entity, documentation.get("projects", []))

return entity


def get_task_provenance(task, recipe_entity):
"""Create a provenance activity describing a task."""
provenance = ProvDocument()
create_namespace(provenance, 'task')
create_namespace(provenance, "task")

activity = provenance.activity('task:' + task.name)
activity = provenance.activity("task:" + task.name)

trigger = recipe_entity
provenance.update(recipe_entity.bundle)
Expand All @@ -104,11 +108,9 @@ def get_task_provenance(task, recipe_entity):
class TrackedFile:
"""File with provenance tracking."""

def __init__(self,
filename,
attributes=None,
ancestors=None,
prov_filename=None):
def __init__(
self, filename, attributes=None, ancestors=None, prov_filename=None
):
"""Create an instance of a file with provenance tracking.
Arguments
Expand Down Expand Up @@ -147,11 +149,11 @@ def __repr__(self):

def __eq__(self, other):
"""Check if `other` equals `self`."""
return hasattr(other, 'filename') and self.filename == other.filename
return hasattr(other, "filename") and self.filename == other.filename

def __lt__(self, other):
"""Check if `other` should be sorted before `self`."""
return hasattr(other, 'filename') and self.filename < other.filename
return hasattr(other, "filename") and self.filename < other.filename

def __hash__(self):
"""Return a unique hash for the file."""
Expand All @@ -175,7 +177,7 @@ def filename(self):
@property
def provenance_file(self):
"""Filename of provenance."""
return os.path.splitext(self.filename)[0] + '_provenance.xml'
return os.path.splitext(self.filename)[0] + "_provenance.xml"

def initialize_provenance(self, activity):
"""Initialize the provenance document.
Expand All @@ -186,7 +188,8 @@ def initialize_provenance(self, activity):
"""
if self.provenance is not None:
raise ValueError(
"Provenance of {} already initialized".format(self))
"Provenance of {} already initialized".format(self)
)
self.provenance = ProvDocument()
self._initialize_namespaces()
self._initialize_activity(activity)
Expand All @@ -195,7 +198,7 @@ def initialize_provenance(self, activity):

def _initialize_namespaces(self):
"""Initialize the namespaces."""
for namespace in ('file', 'attribute', 'preprocessor', 'task'):
for namespace in ("file", "attribute", "preprocessor", "task"):
create_namespace(self.provenance, namespace)

def _initialize_activity(self, activity):
Expand All @@ -207,21 +210,22 @@ def _initialize_entity(self):
"""Initialize the entity representing the file."""
if self.attributes is None:
self.attributes = {}
if 'nc' in Path(self.filename).suffix:
with Dataset(self.filename, 'r') as dataset:
if "nc" in Path(self.filename).suffix:
with Dataset(self.filename, "r") as dataset:
for attr in dataset.ncattrs():
self.attributes[attr] = dataset.getncattr(attr)

attributes = {
'attribute:' + str(k).replace(' ', '_'): str(v)
"attribute:" + str(k).replace(" ", "_"): str(v)
for k, v in self.attributes.items()
if k not in ('authors', 'projects')
if k not in ("authors", "projects")
}
self.entity = self.provenance.entity(f'file:{self.filename}',
attributes)
self.entity = self.provenance.entity(
f"file:{self.filename}", attributes
)

attribute_to_authors(self.entity, self.attributes.get('authors', []))
attribute_to_projects(self.entity, self.attributes.get('projects', []))
attribute_to_authors(self.entity, self.attributes.get("authors", []))
attribute_to_projects(self.entity, self.attributes.get("projects", []))

def _initialize_ancestors(self, activity):
"""Register ancestor files for provenance tracking."""
Expand All @@ -247,24 +251,24 @@ def wasderivedfrom(self, other):

def _select_for_include(self):
attributes = {
'software': "Created with ESMValTool v{}".format(__version__),
"software": "Created with ESMValTool v{}".format(__version__),
}
if 'caption' in self.attributes:
attributes['caption'] = self.attributes['caption']
if "caption" in self.attributes:
attributes["caption"] = self.attributes["caption"]
return attributes

@staticmethod
def _include_provenance_nc(filename, attributes):
with Dataset(filename, 'a') as dataset:
with Dataset(filename, "a") as dataset:
for key, value in attributes.items():
setattr(dataset, key, value)

@staticmethod
def _include_provenance_png(filename, attributes):
pnginfo = PngInfo()
exif_tags = {
'caption': 'ImageDescription',
'software': 'Software',
"caption": "ImageDescription",
"software": "Software",
}
for key, value in attributes.items():
pnginfo.add_text(exif_tags.get(key, key), value, zip=True)
Expand All @@ -276,8 +280,8 @@ def _include_provenance(self):
attributes = self._select_for_include()

# Attach provenance to supported file types
ext = os.path.splitext(self.filename)[1].lstrip('.').lower()
write = getattr(self, '_include_provenance_' + ext, None)
ext = os.path.splitext(self.filename)[1].lstrip(".").lower()
write = getattr(self, "_include_provenance_" + ext, None)
if write:
write(self.filename, attributes)

Expand All @@ -288,17 +292,18 @@ def save_provenance(self):
namespaces=self.provenance.namespaces,
)
self._include_provenance()
with open(self.provenance_file, 'wb') as file:
with open(self.provenance_file, "wb") as file:
# Create file with correct permissions before saving.
self.provenance.serialize(file, format='xml')
self.provenance.serialize(file, format="xml")
self.activity = None
self.entity = None
self.provenance = None

def restore_provenance(self):
"""Import provenance information from a previously saved file."""
self.provenance = ProvDocument.deserialize(self.provenance_file,
format='xml')
self.provenance = ProvDocument.deserialize(
self.provenance_file, format="xml"
)
entity_uri = f"{ESMVALTOOL_URI_PREFIX}file{self.prov_filename}"
self.entity = self.provenance.get_record(entity_uri)[0]
# Find the associated activity
Expand Down
Loading

0 comments on commit 4d449b7

Please sign in to comment.