diff --git a/docs/user/explanations/data-model.rst b/docs/user/explanations/data-model.rst index 21fb3ec6..985ae497 100644 --- a/docs/user/explanations/data-model.rst +++ b/docs/user/explanations/data-model.rst @@ -390,7 +390,8 @@ Minimal nontrivial valid example: .. code-block:: python # 'resource' document - {'parameters': {}, + {'path_semantics': 'posix', + 'resource_kwargs': {}, 'resource_path': '/local/path/subdirectory/data_file', 'root': '/local/path/', 'run_start': '10bf6945-4afd-43ca-af36-6ad8f3540bcd', @@ -405,7 +406,8 @@ Typical example: {'spec': 'AD_HDF5', 'root': '/GPFS/DATA/Andor/', 'resource_path': '2020/01/03/8ff08ff9-a2bf-48c3-8ff3-dcac0f309d7d.h5', - 'parameters': {'frame_per_point': 10}, + 'resource_kwargs': {'frame_per_point': 10}, + 'path_semantics': 'posix', 'uid': '3b300e6f-b431-4750-a635-5630d15c81a8', 'run_start': '10bf6945-4afd-43ca-af36-6ad8f3540bcd'} @@ -481,7 +483,7 @@ Typical example: # 'Stream Resource' document {'data_key': 'detector_1', - 'spec': 'AD_HDF5', + 'mimetype': 'application/x-hdf5', 'root': '/GPFS/DATA/Andor/', 'resource_path': '2020/01/03/8ff08ff9-a2bf-48c3-8ff3-dcac0f309d7d.h5', 'parameters': {'frame_per_point': 1}, diff --git a/docs/user/explanations/external.rst b/docs/user/explanations/external.rst index 0c949afe..ac39f3a3 100644 --- a/docs/user/explanations/external.rst +++ b/docs/user/explanations/external.rst @@ -95,7 +95,8 @@ contains path-related details. 'spec': 'AD_HDF5', 'root': '/GPFS/DATA/Andor/', 'resource_path': '2020/01/03/8ff08ff9-a2bf-48c3-8ff3-dcac0f309d7d.h5', - 'parameters': {'frame_per_point': 10}, + 'resource_kwargs': {'frame_per_point': 10}, + 'path_semantics': 'posix', 'uid': '3b300e6f-b431-4750-a635-5630d15c81a8', 'run_start': '10bf6945-4afd-43ca-af36-6ad8f3540bcd'} @@ -105,9 +106,32 @@ context-dependent (depending on what system you are accessing the data from) and subject to change (if the data is moved over time). The ``spec`` gives us a hint about the format of this asset, whether it be a -file, multiple files, or something more specialized. The ``parameters`` +file, multiple files, or something more specialized. The ``resource_kwargs`` provide any additional parameters for reading it. + .. code:: python + + # 'Stream Resource' document + {'uid': 'aa10035d-1d2b-41d9-97e6-03e3fe62fa6c', + 'mimetype': 'application/x-hdf5', + 'root': '/GPFS/DATA/Andor/', + 'resource_path': '2020/01/03/8ff08ff9-a2bf-48c3-8ff3-dcac0f309d7d.h5', + 'parameters': {'frame_per_point': 10}, + 'uid': '3b300e6f-b431-4750-a635-5630d15c81a8', + 'run_start': '10bf6945-4afd-43ca-af36-6ad8f3540bcd'} + +The ``resource_path`` is a relative path, all of which is semantic and should +usually not change during the lifecycle of this asset. The ``root`` is more +context-dependent (depending on what system you are accessing the data from) +and subject to change (if the data is moved over time). + +The ``mimetype`` is a recognized standard way to specify the I/O procedures to +read the asset. It gives us a hint about the format of this asset, whether it +be a file, multiple files, or something more specialized. We support standard +mimetypes, such as `image/tiff`, as well as custom ones, e.g. +`application/x-hdf5-smwr-slice`. The ``parameters`` provide any additional +parameters for reading the asset. + Handlers ======== @@ -120,7 +144,7 @@ Handler Interface A 'handler class' may be any callable with the signature:: - handler_class(full_path, **parameters) + handler_class(full_path, **resource_kwargs) It is expected to return an object, a 'handler instance', which is also callable and has the following signature:: @@ -134,8 +158,8 @@ typically implemented using a class that implements ``__init__`` and .. code:: python class MyHandler: - def __init__(self, path, **parameters): - # Consume the path information and the 'parameters' kwargs from the + def __init__(self, path, **resource_kwargs): + # Consume the path information and the 'resource_kwargs' from the # Resource. Typically stashes some state and/or opens file(s). ... @@ -150,7 +174,7 @@ But in general it may be any callable-that-returns-a-callable. .. code:: python - def handler(path, **parameters): + def handler(path, **resource_kwargs): def f(**datum_kwargs): return some_array_like return f diff --git a/event_model/__init__.py b/event_model/__init__.py index a71dbce2..3ce2f39c 100644 --- a/event_model/__init__.py +++ b/event_model/__init__.py @@ -538,7 +538,7 @@ class Filler(DocumentRouter): A 'handler class' may be any callable with the signature:: - handler_class(full_path, **parameters) + handler_class(full_path, **resource_kwargs) It is expected to return an object, a 'handler instance', which is also callable and has the following signature:: @@ -1041,7 +1041,7 @@ def get_handler(self, resource: Resource) -> Any: handler = _attempt_with_retries( func=handler_class, args=(resource_path,), - kwargs=resource["parameters"], + kwargs=resource["resource_kwargs"], intervals=[0] + self.retry_intervals, error_to_catch=IOError, error_to_raise=error_to_raise, @@ -1422,7 +1422,7 @@ class RunRouter(DocumentRouter): A 'handler class' may be any callable with the signature:: - handler_class(full_path, **parameters) + handler_class(full_path, **resource_kwargs) It is expected to return an object, a 'handler instance', which is also callable and has the following signature:: @@ -1899,6 +1899,13 @@ def __iter__(self) -> Iterator: ) +PATH_SEMANTICS: Dict[str, Literal["posix", "windows"]] = { + "posix": "posix", + "nt": "windows", +} +default_path_semantics: Literal["posix", "windows"] = PATH_SEMANTICS[os.name] + + @dataclass class ComposeResource: start: Optional[RunStart] @@ -1908,7 +1915,8 @@ def __call__( spec: str, root: str, resource_path: str, - parameters: Dict[str, Any], + resource_kwargs: Dict[str, Any], + path_semantics: Literal["posix", "windows"] = default_path_semantics, uid: Optional[str] = None, validate: bool = True, ) -> ComposeResourceBundle: @@ -1916,10 +1924,11 @@ def __call__( uid = str(uuid.uuid4()) doc = Resource( + path_semantics=path_semantics, uid=uid, spec=spec, root=root, - parameters=parameters, + resource_kwargs=resource_kwargs, resource_path=resource_path, ) @@ -1942,7 +1951,8 @@ def compose_resource( spec: str, root: str, resource_path: str, - parameters: Dict[str, Any], + resource_kwargs: Dict[str, Any], + path_semantics: Literal["posix", "windows"] = default_path_semantics, start: Optional[RunStart] = None, uid: Optional[str] = None, validate: bool = True, @@ -1954,7 +1964,8 @@ def compose_resource( spec, root, resource_path, - parameters, + resource_kwargs, + path_semantics=path_semantics, uid=uid, validate=validate, ) @@ -2036,7 +2047,7 @@ class ComposeStreamResource: def __call__( self, - spec: str, + mimetype: str, root: str, resource_path: str, data_key: str, @@ -2050,7 +2061,7 @@ def __call__( doc = StreamResource( uid=uid, data_key=data_key, - spec=spec, + mimetype=mimetype, root=root, resource_path=resource_path, parameters=parameters, @@ -2073,7 +2084,7 @@ def __call__( def compose_stream_resource( *, - spec: str, + mimetype: str, root: str, resource_path: str, data_key: str, @@ -2086,7 +2097,7 @@ def compose_stream_resource( Here for backwards compatibility, the Compose class is prefered. """ return ComposeStreamResource(start=start)( - spec, + mimetype, root, resource_path, data_key, diff --git a/event_model/documents/resource.py b/event_model/documents/resource.py index fb9227ba..882ceb69 100644 --- a/event_model/documents/resource.py +++ b/event_model/documents/resource.py @@ -1,6 +1,6 @@ from typing import Any, Dict -from typing_extensions import Annotated, NotRequired, TypedDict +from typing_extensions import Annotated, Literal, NotRequired, TypedDict from .generate.type_wrapper import Field, add_extra_schema @@ -18,10 +18,10 @@ class PartialResource(TypedDict): resource_path: Annotated[ str, Field(description="Filepath or URI for locating this resource") ] - parameters: Annotated[ + resource_kwargs: Annotated[ Dict[str, Any], Field( - description="Additional keyword arguments to pass to the Handler to read a Resource" + description="Additional argument to pass to the Handler to read a Resource" ), ] @@ -43,6 +43,12 @@ class Resource(PartialResource): externally-stored data """ + path_semantics: NotRequired[ + Annotated[ + Literal["posix", "windows"], + Field(description="Rules for joining paths"), + ] + ] run_start: NotRequired[ Annotated[ str, diff --git a/event_model/documents/stream_resource.py b/event_model/documents/stream_resource.py index 4dd1f03b..30c48067 100644 --- a/event_model/documents/stream_resource.py +++ b/event_model/documents/stream_resource.py @@ -46,7 +46,7 @@ class StreamResource(TypedDict): ), ] ] - spec: Annotated[ + mimetype: Annotated[ str, Field( description="String identifying the format/type of this Stream Resource, " diff --git a/event_model/schemas/stream_resource.json b/event_model/schemas/stream_resource.json index 43e8ac23..f2a64cfd 100644 --- a/event_model/schemas/stream_resource.json +++ b/event_model/schemas/stream_resource.json @@ -28,8 +28,8 @@ "description": "Globally unique ID to the run_start document this Stream Resource is associated with.", "type": "string" }, - "spec": { - "title": "Spec", + "mimetype": { + "title": "Mimetype", "description": "String identifying the format/type of this Stream Resource, used to identify a compatible Handler", "type": "string" }, @@ -44,7 +44,7 @@ "parameters", "resource_path", "root", - "spec", + "mimetype", "uid" ], "additionalProperties": false