From 35c07fee0a3263726cb39c52324b4ac3c728a345 Mon Sep 17 00:00:00 2001
From: Jakub Wlodek <jwlodek@bnl.gov>
Date: Thu, 19 Sep 2024 09:42:33 -0400
Subject: [PATCH] Include chunk shape as a parameter in stream resource for HDF
 dataset (#544)

* Adding record for num frames in chunk along with chunk_size field in desc

* Attributes are saved all in a single chunk

* Update tests to account for chunk_size datakey parameter

* Chunk size should be in sres not desc

* Move chunk size to sres parameters

* Refactor tests to reflect changes

* chunk size can be int or none

Co-authored-by: Eugene <ymatviych@bnl.gov>

* Update chunk size signal to non-zero in one of the AD test sets

* Use correct chunk size for PandA, make sure we use chunk size auto

* Add comment on chunk size

* Make chunk_size a tuple that explicitly describes all chunk dims

* Make sure chunk size is tuple even with one dim, update tests, simplify ad standard det tests

* Make chunk_size always tuple of int, default to empty tuple

* Use readback value to avoid disconnect between actual value and signal get

* Follow import convention for tests

* Make use of slicing for detector name in ad_standard_det_factory clearer

* Rename chunk size to chunk shape

* Add space for linting

* Fix test

* Fix merge conflict

* Simplifying ad standard det factory fixture

* Fix unawaited task issue

* kinetix fixture doesn't need to be async

---------

Co-authored-by: Eugene <ymatviych@bnl.gov>
---
 src/ophyd_async/core/_hdf_dataset.py        |  3 ++
 src/ophyd_async/epics/adcore/_core_io.py    |  2 ++
 src/ophyd_async/epics/adcore/_hdf_writer.py | 10 ++++++
 src/ophyd_async/fastcs/panda/_writer.py     |  6 +++-
 tests/epics/adaravis/test_aravis.py         | 29 ++++++---------
 tests/epics/adcore/test_writers.py          |  3 ++
 tests/epics/adkinetix/test_kinetix.py       | 32 +++++++----------
 tests/epics/advimba/test_vimba.py           | 40 ++++++++++-----------
 tests/epics/conftest.py                     | 38 ++++++++++++++++++++
 tests/fastcs/panda/test_hdf_panda.py        |  2 ++
 tests/fastcs/panda/test_writer.py           |  7 +++-
 11 files changed, 112 insertions(+), 60 deletions(-)
 create mode 100644 tests/epics/conftest.py

diff --git a/src/ophyd_async/core/_hdf_dataset.py b/src/ophyd_async/core/_hdf_dataset.py
index 5199eab8cf..79cb9c432a 100644
--- a/src/ophyd_async/core/_hdf_dataset.py
+++ b/src/ophyd_async/core/_hdf_dataset.py
@@ -20,6 +20,8 @@ class HDFDataset:
     dtype_numpy: str = ""
     multiplier: int = 1
     swmr: bool = False
+    # Represents explicit chunk size written to disk.
+    chunk_shape: tuple[int, ...] = ()
 
 
 SLICE_NAME = "AD_HDF5_SWMR_SLICE"
@@ -66,6 +68,7 @@ def __init__(
                     "dataset": ds.dataset,
                     "swmr": ds.swmr,
                     "multiplier": ds.multiplier,
+                    "chunk_shape": ds.chunk_shape,
                 },
                 uid=None,
                 validate=True,
diff --git a/src/ophyd_async/epics/adcore/_core_io.py b/src/ophyd_async/epics/adcore/_core_io.py
index f15d48cd2e..7968579117 100644
--- a/src/ophyd_async/epics/adcore/_core_io.py
+++ b/src/ophyd_async/epics/adcore/_core_io.py
@@ -135,4 +135,6 @@ def __init__(self, prefix: str, name="") -> None:
         self.array_size0 = epics_signal_r(int, prefix + "ArraySize0")
         self.array_size1 = epics_signal_r(int, prefix + "ArraySize1")
         self.create_directory = epics_signal_rw(int, prefix + "CreateDirectory")
+        self.num_frames_chunks = epics_signal_r(int, prefix + "NumFramesChunks_RBV")
+        self.chunk_size_auto = epics_signal_rw_rbv(bool, prefix + "ChunkSizeAuto")
         super().__init__(prefix, name)
diff --git a/src/ophyd_async/epics/adcore/_hdf_writer.py b/src/ophyd_async/epics/adcore/_hdf_writer.py
index b9034eded0..bfffa67b89 100644
--- a/src/ophyd_async/epics/adcore/_hdf_writer.py
+++ b/src/ophyd_async/epics/adcore/_hdf_writer.py
@@ -56,6 +56,9 @@ async def open(self, multiplier: int = 1) -> dict[str, DataKey]:
         # when directory path PV is processed.
         await self.hdf.create_directory.set(info.create_dir_depth)
 
+        # Make sure we are using chunk auto-sizing
+        await asyncio.gather(self.hdf.chunk_size_auto.set(True))
+
         await asyncio.gather(
             self.hdf.num_extra_dims.set(0),
             self.hdf.lazy_open.set(True),
@@ -84,6 +87,9 @@ async def open(self, multiplier: int = 1) -> dict[str, DataKey]:
         self._multiplier = multiplier
         outer_shape = (multiplier,) if multiplier > 1 else ()
 
+        # Determine number of frames that will be saved per HDF chunk
+        frames_per_chunk = await self.hdf.num_frames_chunks.get_value()
+
         # Add the main data
         self._datasets = [
             HDFDataset(
@@ -92,6 +98,7 @@ async def open(self, multiplier: int = 1) -> dict[str, DataKey]:
                 shape=detector_shape,
                 dtype_numpy=np_dtype,
                 multiplier=multiplier,
+                chunk_shape=(frames_per_chunk, *detector_shape),
             )
         ]
         # And all the scalar datasets
@@ -118,6 +125,9 @@ async def open(self, multiplier: int = 1) -> dict[str, DataKey]:
                             (),
                             np_datatype,
                             multiplier,
+                            # NDAttributes appear to always be configured with
+                            # this chunk size
+                            chunk_shape=(16384,),
                         )
                     )
 
diff --git a/src/ophyd_async/fastcs/panda/_writer.py b/src/ophyd_async/fastcs/panda/_writer.py
index 5a1fbe7063..100af8b10e 100644
--- a/src/ophyd_async/fastcs/panda/_writer.py
+++ b/src/ophyd_async/fastcs/panda/_writer.py
@@ -105,7 +105,11 @@ async def _update_datasets(self) -> None:
 
         capture_table = await self.panda_data_block.datasets.get_value()
         self._datasets = [
-            HDFDataset(dataset_name, "/" + dataset_name, [1], multiplier=1)
+            # TODO: Update chunk size to read signal once available in IOC
+            # Currently PandA IOC sets chunk size to 1024 points per chunk
+            HDFDataset(
+                dataset_name, "/" + dataset_name, [1], multiplier=1, chunk_shape=(1024,)
+            )
             for dataset_name in capture_table["name"]
         ]
 
diff --git a/tests/epics/adaravis/test_aravis.py b/tests/epics/adaravis/test_aravis.py
index 3c34fa49eb..341ad280ed 100644
--- a/tests/epics/adaravis/test_aravis.py
+++ b/tests/epics/adaravis/test_aravis.py
@@ -1,11 +1,9 @@
 import re
 
 import pytest
-from bluesky.run_engine import RunEngine
 
 from ophyd_async.core import (
     DetectorTrigger,
-    DeviceCollector,
     PathProvider,
     TriggerInfo,
     set_mock_value,
@@ -14,14 +12,8 @@
 
 
 @pytest.fixture
-async def test_adaravis(
-    RE: RunEngine,
-    static_path_provider: PathProvider,
-) -> adaravis.AravisDetector:
-    async with DeviceCollector(mock=True):
-        test_adaravis = adaravis.AravisDetector("ADARAVIS:", static_path_provider)
-
-    return test_adaravis
+def test_adaravis(ad_standard_det_factory) -> adaravis.AravisDetector:
+    return ad_standard_det_factory(adaravis.AravisDetector)
 
 
 @pytest.mark.parametrize("exposure_time", [0.0, 0.1, 1.0, 10.0, 100.0])
@@ -80,7 +72,7 @@ def test_gpio_pin_limited(test_adaravis: adaravis.AravisDetector):
 
 
 async def test_hints_from_hdf_writer(test_adaravis: adaravis.AravisDetector):
-    assert test_adaravis.hints == {"fields": ["test_adaravis"]}
+    assert test_adaravis.hints == {"fields": ["test_adaravis1"]}
 
 
 async def test_can_read(test_adaravis: adaravis.AravisDetector):
@@ -98,9 +90,9 @@ async def test_decribe_describes_writer_dataset(
     await test_adaravis.stage()
     await test_adaravis.prepare(one_shot_trigger_info)
     assert await test_adaravis.describe() == {
-        "test_adaravis": {
-            "source": "mock+ca://ADARAVIS:HDF1:FullFileName_RBV",
-            "shape": (0, 0),
+        "test_adaravis1": {
+            "source": "mock+ca://ARAVIS1:HDF1:FullFileName_RBV",
+            "shape": (10, 10),
             "dtype": "array",
             "dtype_numpy": "|i1",
             "external": "STREAM:",
@@ -125,12 +117,13 @@ async def test_can_collect(
     assert docs[0][0] == "stream_resource"
     stream_resource = docs[0][1]
     sr_uid = stream_resource["uid"]
-    assert stream_resource["data_key"] == "test_adaravis"
+    assert stream_resource["data_key"] == "test_adaravis1"
     assert stream_resource["uri"] == "file://localhost" + str(full_file_name)
     assert stream_resource["parameters"] == {
         "dataset": "/entry/data/data",
         "swmr": False,
         "multiplier": 1,
+        "chunk_shape": (1, 10, 10),
     }
     assert docs[1][0] == "stream_datum"
     stream_datum = docs[1][1]
@@ -148,9 +141,9 @@ async def test_can_decribe_collect(
     await test_adaravis.stage()
     await test_adaravis.prepare(one_shot_trigger_info)
     assert (await test_adaravis.describe_collect()) == {
-        "test_adaravis": {
-            "source": "mock+ca://ADARAVIS:HDF1:FullFileName_RBV",
-            "shape": (0, 0),
+        "test_adaravis1": {
+            "source": "mock+ca://ARAVIS1:HDF1:FullFileName_RBV",
+            "shape": (10, 10),
             "dtype": "array",
             "dtype_numpy": "|i1",
             "external": "STREAM:",
diff --git a/tests/epics/adcore/test_writers.py b/tests/epics/adcore/test_writers.py
index ec729d1052..af32f86667 100644
--- a/tests/epics/adcore/test_writers.py
+++ b/tests/epics/adcore/test_writers.py
@@ -46,6 +46,9 @@ async def hdf_writer_with_stats(
         hdf = adcore.NDFileHDFIO("HDF:")
         stats = adcore.NDPluginStatsIO("FOO:")
 
+    # Set number of frames per chunk to something reasonable
+    set_mock_value(hdf.num_frames_chunks, 2)
+
     return adcore.ADHDFWriter(
         hdf,
         static_path_provider,
diff --git a/tests/epics/adkinetix/test_kinetix.py b/tests/epics/adkinetix/test_kinetix.py
index a17be5e5b3..ae2f72462e 100644
--- a/tests/epics/adkinetix/test_kinetix.py
+++ b/tests/epics/adkinetix/test_kinetix.py
@@ -1,25 +1,18 @@
 import pytest
-from bluesky.run_engine import RunEngine
 
 from ophyd_async.core import (
     DetectorTrigger,
-    DeviceCollector,
     StaticPathProvider,
     set_mock_value,
 )
 from ophyd_async.core._detector import TriggerInfo
 from ophyd_async.epics import adkinetix
+from ophyd_async.epics.adkinetix._kinetix_io import KinetixTriggerMode
 
 
 @pytest.fixture
-async def test_adkinetix(
-    RE: RunEngine,
-    static_path_provider: StaticPathProvider,
-) -> adkinetix.KinetixDetector:
-    async with DeviceCollector(mock=True):
-        test_adkinetix = adkinetix.KinetixDetector("KINETIX:", static_path_provider)
-
-    return test_adkinetix
+def test_adkinetix(ad_standard_det_factory):
+    return ad_standard_det_factory(adkinetix.KinetixDetector)
 
 
 async def test_get_deadtime(
@@ -30,7 +23,7 @@ async def test_get_deadtime(
 
 
 async def test_trigger_modes(test_adkinetix: adkinetix.KinetixDetector):
-    set_mock_value(test_adkinetix.drv.trigger_mode, "Internal")
+    set_mock_value(test_adkinetix.drv.trigger_mode, KinetixTriggerMode.internal)
 
     async def setup_trigger_mode(trig_mode: DetectorTrigger):
         await test_adkinetix.controller.prepare(
@@ -58,7 +51,7 @@ async def setup_trigger_mode(trig_mode: DetectorTrigger):
 
 
 async def test_hints_from_hdf_writer(test_adkinetix: adkinetix.KinetixDetector):
-    assert test_adkinetix.hints == {"fields": ["test_adkinetix"]}
+    assert test_adkinetix.hints == {"fields": ["test_adkinetix1"]}
 
 
 async def test_can_read(test_adkinetix: adkinetix.KinetixDetector):
@@ -76,9 +69,9 @@ async def test_decribe_describes_writer_dataset(
     await test_adkinetix.stage()
     await test_adkinetix.prepare(one_shot_trigger_info)
     assert await test_adkinetix.describe() == {
-        "test_adkinetix": {
-            "source": "mock+ca://KINETIX:HDF1:FullFileName_RBV",
-            "shape": (0, 0),
+        "test_adkinetix1": {
+            "source": "mock+ca://KINETIX1:HDF1:FullFileName_RBV",
+            "shape": (10, 10),
             "dtype": "array",
             "dtype_numpy": "|i1",
             "external": "STREAM:",
@@ -103,12 +96,13 @@ async def test_can_collect(
     assert docs[0][0] == "stream_resource"
     stream_resource = docs[0][1]
     sr_uid = stream_resource["uid"]
-    assert stream_resource["data_key"] == "test_adkinetix"
+    assert stream_resource["data_key"] == "test_adkinetix1"
     assert stream_resource["uri"] == "file://localhost" + str(full_file_name)
     assert stream_resource["parameters"] == {
         "dataset": "/entry/data/data",
         "swmr": False,
         "multiplier": 1,
+        "chunk_shape": (1, 10, 10),
     }
     assert docs[1][0] == "stream_datum"
     stream_datum = docs[1][1]
@@ -126,9 +120,9 @@ async def test_can_decribe_collect(
     await test_adkinetix.stage()
     await test_adkinetix.prepare(one_shot_trigger_info)
     assert (await test_adkinetix.describe_collect()) == {
-        "test_adkinetix": {
-            "source": "mock+ca://KINETIX:HDF1:FullFileName_RBV",
-            "shape": (0, 0),
+        "test_adkinetix1": {
+            "source": "mock+ca://KINETIX1:HDF1:FullFileName_RBV",
+            "shape": (10, 10),
             "dtype": "array",
             "dtype_numpy": "|i1",
             "external": "STREAM:",
diff --git a/tests/epics/advimba/test_vimba.py b/tests/epics/advimba/test_vimba.py
index ec93cc07d3..a8990502c3 100644
--- a/tests/epics/advimba/test_vimba.py
+++ b/tests/epics/advimba/test_vimba.py
@@ -1,25 +1,22 @@
 import pytest
-from bluesky.run_engine import RunEngine
 
 from ophyd_async.core import (
     DetectorTrigger,
-    DeviceCollector,
     PathProvider,
     set_mock_value,
 )
 from ophyd_async.core._detector import TriggerInfo
 from ophyd_async.epics import advimba
+from ophyd_async.epics.advimba._vimba_io import (
+    VimbaExposeOutMode,
+    VimbaOnOff,
+    VimbaTriggerSource,
+)
 
 
 @pytest.fixture
-async def test_advimba(
-    RE: RunEngine,
-    static_path_provider: PathProvider,
-) -> advimba.VimbaDetector:
-    async with DeviceCollector(mock=True):
-        test_advimba = advimba.VimbaDetector("VIMBA:", static_path_provider)
-
-    return test_advimba
+def test_advimba(ad_standard_det_factory) -> advimba.VimbaDetector:
+    return ad_standard_det_factory(advimba.VimbaDetector)
 
 
 async def test_get_deadtime(
@@ -30,9 +27,9 @@ async def test_get_deadtime(
 
 
 async def test_arming_trig_modes(test_advimba: advimba.VimbaDetector):
-    set_mock_value(test_advimba.drv.trigger_source, "Freerun")
-    set_mock_value(test_advimba.drv.trigger_mode, "Off")
-    set_mock_value(test_advimba.drv.exposure_mode, "Timed")
+    set_mock_value(test_advimba.drv.trigger_source, VimbaTriggerSource.freerun)
+    set_mock_value(test_advimba.drv.trigger_mode, VimbaOnOff.off)
+    set_mock_value(test_advimba.drv.exposure_mode, VimbaExposeOutMode.timed)
 
     async def setup_trigger_mode(trig_mode: DetectorTrigger):
         await test_advimba.controller.prepare(TriggerInfo(number=1, trigger=trig_mode))
@@ -68,7 +65,7 @@ async def setup_trigger_mode(trig_mode: DetectorTrigger):
 
 
 async def test_hints_from_hdf_writer(test_advimba: advimba.VimbaDetector):
-    assert test_advimba.hints == {"fields": ["test_advimba"]}
+    assert test_advimba.hints == {"fields": ["test_advimba1"]}
 
 
 async def test_can_read(test_advimba: advimba.VimbaDetector):
@@ -86,9 +83,9 @@ async def test_decribe_describes_writer_dataset(
     await test_advimba.stage()
     await test_advimba.prepare(one_shot_trigger_info)
     assert await test_advimba.describe() == {
-        "test_advimba": {
-            "source": "mock+ca://VIMBA:HDF1:FullFileName_RBV",
-            "shape": (0, 0),
+        "test_advimba1": {
+            "source": "mock+ca://VIMBA1:HDF1:FullFileName_RBV",
+            "shape": (10, 10),
             "dtype": "array",
             "dtype_numpy": "|i1",
             "external": "STREAM:",
@@ -113,12 +110,13 @@ async def test_can_collect(
     assert docs[0][0] == "stream_resource"
     stream_resource = docs[0][1]
     sr_uid = stream_resource["uid"]
-    assert stream_resource["data_key"] == "test_advimba"
+    assert stream_resource["data_key"] == "test_advimba1"
     assert stream_resource["uri"] == "file://localhost" + str(full_file_name)
     assert stream_resource["parameters"] == {
         "dataset": "/entry/data/data",
         "swmr": False,
         "multiplier": 1,
+        "chunk_shape": (1, 10, 10),
     }
     assert docs[1][0] == "stream_datum"
     stream_datum = docs[1][1]
@@ -136,9 +134,9 @@ async def test_can_decribe_collect(
     await test_advimba.stage()
     await test_advimba.prepare(one_shot_trigger_info)
     assert (await test_advimba.describe_collect()) == {
-        "test_advimba": {
-            "source": "mock+ca://VIMBA:HDF1:FullFileName_RBV",
-            "shape": (0, 0),
+        "test_advimba1": {
+            "source": "mock+ca://VIMBA1:HDF1:FullFileName_RBV",
+            "shape": (10, 10),
             "dtype": "array",
             "dtype_numpy": "|i1",
             "external": "STREAM:",
diff --git a/tests/epics/conftest.py b/tests/epics/conftest.py
new file mode 100644
index 0000000000..6d8f331765
--- /dev/null
+++ b/tests/epics/conftest.py
@@ -0,0 +1,38 @@
+from collections.abc import Callable
+
+import pytest
+from bluesky.run_engine import RunEngine
+
+from ophyd_async.core._detector import StandardDetector
+from ophyd_async.core._device import DeviceCollector
+from ophyd_async.core._mock_signal_utils import set_mock_value
+
+
+@pytest.fixture
+def ad_standard_det_factory(
+    RE: RunEngine,
+    static_path_provider,
+) -> Callable:
+    def generate_ad_standard_det(
+        ad_standard_detector_class, number=1
+    ) -> StandardDetector:
+        # Dynamically generate a name based on the class of detector
+        detector_name = ad_standard_detector_class.__name__
+        if detector_name.endswith("Detector"):
+            detector_name = detector_name[: -len("Detector")]
+
+        with DeviceCollector(mock=True):
+            test_adstandard_det = ad_standard_detector_class(
+                f"{detector_name.upper()}{number}:",
+                static_path_provider,
+                name=f"test_ad{detector_name.lower()}{number}",
+            )
+
+        # Set number of frames per chunk and frame dimensions to something reasonable
+        set_mock_value(test_adstandard_det.hdf.num_frames_chunks, 1)
+        set_mock_value(test_adstandard_det.drv.array_size_x, 10)
+        set_mock_value(test_adstandard_det.drv.array_size_y, 10)
+
+        return test_adstandard_det
+
+    return generate_ad_standard_det
diff --git a/tests/fastcs/panda/test_hdf_panda.py b/tests/fastcs/panda/test_hdf_panda.py
index 7752a3263b..41f50d648d 100644
--- a/tests/fastcs/panda/test_hdf_panda.py
+++ b/tests/fastcs/panda/test_hdf_panda.py
@@ -168,6 +168,7 @@ def flying_plan():
                 "dataset": f"/{dataset_name}",
                 "swmr": False,
                 "multiplier": 1,
+                "chunk_shape": (1024,),
             },
         }
         assert "test-panda.h5" in stream_resource["uri"]
@@ -284,6 +285,7 @@ def flying_plan():
                 "dataset": f"/{dataset_name}",
                 "swmr": False,
                 "multiplier": 1,
+                "chunk_shape": (1024,),
             },
         }
         assert "test-panda.h5" in stream_resource["uri"]
diff --git a/tests/fastcs/panda/test_writer.py b/tests/fastcs/panda/test_writer.py
index 5905c3c0e2..e7298568d1 100644
--- a/tests/fastcs/panda/test_writer.py
+++ b/tests/fastcs/panda/test_writer.py
@@ -209,7 +209,12 @@ def assert_resource_document(name, resource_doc):
             "data_key": name,
             "mimetype": "application/x-hdf5",
             "uri": "file://localhost" + str(tmp_path / "mock_panda" / "data.h5"),
-            "parameters": {"dataset": f"/{name}", "swmr": False, "multiplier": 1},
+            "parameters": {
+                "dataset": f"/{name}",
+                "swmr": False,
+                "multiplier": 1,
+                "chunk_shape": (1024,),
+            },
         }
         assert "mock_panda/data.h5" in resource_doc["uri"]