From bc22a997c75649b064c020f732f64b9cbfaa1a18 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Wed, 21 Jun 2023 15:03:02 +0200 Subject: [PATCH 01/74] Fix some pytest deprecation warnings It wanted setup renamed to setup_method and teardown to teardown_method. --- tests/test_passthrough.py | 6 +++--- tests/test_recv.py | 10 +++++----- tests/test_send.py | 10 +++++----- tests/test_send_asyncio.py | 4 ++-- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/test_passthrough.py b/tests/test_passthrough.py index 687965ea9..a8a6217b2 100644 --- a/tests/test_passthrough.py +++ b/tests/test_passthrough.py @@ -1,4 +1,4 @@ -# Copyright 2015, 2019-2022 National Research Foundation (SARAO) +# Copyright 2015, 2019-2023 National Research Foundation (SARAO) # # This program is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free @@ -508,7 +508,7 @@ def _interface_address(self): pytest.skip('Envar SPEAD2_TEST_IBV_INTERFACE_ADDRESS not set') return ifaddr - def setup(self): + def setup_method(self): # mlx5 drivers only enable multicast loopback if there are multiple # device contexts. The sender and receiver end up sharing one, so we # need to explicitly create another. @@ -516,7 +516,7 @@ def setup(self): pytest.skip('IBV support not compiled in') self._extra_context = spead2.IbvContext(self._interface_address()) - def teardown(self): + def teardown_method(self): self._extra_context.reset() def prepare_receivers(self, receivers): diff --git a/tests/test_recv.py b/tests/test_recv.py index caa2d7a7b..05214a70c 100644 --- a/tests/test_recv.py +++ b/tests/test_recv.py @@ -1,4 +1,4 @@ -# Copyright 2015, 2017, 2019-2022 National Research Foundation (SARAO) +# Copyright 2015, 2017, 2019-2023 National Research Foundation (SARAO) # # This program is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free @@ -209,7 +209,7 @@ def make_numpy_descriptor_from(self, id, name, description, array): class TestDecode: """Various types of descriptors must be correctly interpreted to decode data""" - def setup(self): + def setup_method(self): self.flavour = FLAVOUR def data_to_heaps(self, data, **kwargs): @@ -955,7 +955,7 @@ def test_deprecated_constants(self): class TestStream: """Tests for the stream API.""" - def setup(self): + def setup_method(self): self.flavour = FLAVOUR def test_full_stop(self): @@ -1167,7 +1167,7 @@ def test_illegal_udp_port(self): class TestTcpReader: - def setup(self): + def setup_method(self): self.receiver = recv.Stream(spead2.ThreadPool()) recv_sock = socket.socket() recv_sock.bind(("127.0.0.1", 0)) @@ -1179,7 +1179,7 @@ def setup(self): self.send_sock = socket.socket() self.send_sock.connect(("127.0.0.1", port)) - def teardown(self): + def teardown_method(self): self.close() def close(self): diff --git a/tests/test_send.py b/tests/test_send.py index 5a011f820..8742dcdf2 100644 --- a/tests/test_send.py +++ b/tests/test_send.py @@ -1,4 +1,4 @@ -# Copyright 2015, 2019-2021 National Research Foundation (SARAO) +# Copyright 2015, 2019-2021, 2023 National Research Foundation (SARAO) # # This program is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free @@ -111,7 +111,7 @@ def offset_generator(fields): class TestEncode: """Test heap encoding of various data""" - def setup(self): + def setup_method(self): self.flavour = Flavour(4, 64, 48, 0) def test_empty(self): @@ -525,7 +525,7 @@ def test_bad_max_heaps(self): class TestStream: - def setup(self): + def setup_method(self): # A slow stream, so that we can test overflowing the queue self.flavour = Flavour(4, 64, 48, 0) self.stream = send.BytesStream( @@ -538,7 +538,7 @@ def setup(self): self.heap = ig.get_heap() self.threads = [] - def teardown(self): + def teardown_method(self): for thread in self.threads: thread.join() @@ -773,7 +773,7 @@ def test_failed_connect(self): class TestInprocStream: - def setup(self): + def setup_method(self): self.flavour = Flavour(4, 64, 48, 0) self.queue = spead2.InprocQueue() self.stream = send.InprocStream(spead2.ThreadPool(), [self.queue]) diff --git a/tests/test_send_asyncio.py b/tests/test_send_asyncio.py index 303e7180b..d8de79928 100644 --- a/tests/test_send_asyncio.py +++ b/tests/test_send_asyncio.py @@ -1,4 +1,4 @@ -# Copyright 2015, 2019-2022 National Research Foundation (SARAO) +# Copyright 2015, 2019-2023 National Research Foundation (SARAO) # # This program is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free @@ -29,7 +29,7 @@ @pytest.mark.asyncio class TestUdpStream: - def setup(self): + def setup_method(self): # Make a stream slow enough that we can test async interactions config = spead2.send.StreamConfig(rate=5e6) self.stream = UdpStream(spead2.ThreadPool(), [('localhost', 8888)], config) From feea31e377617bb580e0293ab00d1fa5453c3219 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Mon, 19 Jun 2023 13:52:08 +0200 Subject: [PATCH 02/74] Refactor chunk_stream_state The logic around acquiring and readying chunks is now handed off to a template parameter helper class. This will simplify plugging in a policy for stream groups later. --- include/spead2/recv_chunk_stream.h | 121 ++++++++++++++++++++--------- src/recv_chunk_stream.cpp | 88 ++++++++++++++------- 2 files changed, 142 insertions(+), 67 deletions(-) diff --git a/include/spead2/recv_chunk_stream.h b/include/spead2/recv_chunk_stream.h index 0983c2dfd..d43969b0b 100644 --- a/include/spead2/recv_chunk_stream.h +++ b/include/spead2/recv_chunk_stream.h @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 National Research Foundation (SARAO) +/* Copyright 2021-2023 National Research Foundation (SARAO) * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,10 @@ namespace recv /// Storage for a chunk with metadata class chunk { +private: + /// Reference count for chunks belonging to stream groups + std::size_t ref_count = 0; + public: /// Chunk ID std::int64_t chunk_id = -1; @@ -195,30 +200,29 @@ class chunk_stream_config namespace detail { -class chunk_stream_allocator; +template class chunk_stream_allocator; -/** - * Base class that holds the internal state of @ref - * spead2::recv::chunk_stream. - * - * This is split into a separate class to avoid some initialisation ordering - * problems: it is constructed before the @ref spead2::recv::stream base class, - * allowing the latter to use function objects that reference this class. - */ -class chunk_stream_state +/// Parts of chunk_stream_state that don't depend on the chunk manager +class chunk_stream_state_base { -private: +protected: struct free_place_data { - void operator()(unsigned char *ptr); + void operator()(unsigned char *ptr) const; }; const packet_memcpy_function orig_memcpy; ///< Packet memcpy provided by the user const chunk_stream_config chunk_config; const std::uintptr_t stream_id; const std::size_t base_stat_index; ///< Index of first custom stat - /// Circular buffer of chunks under construction - std::vector> chunks; + + /** + * Circular buffer of chunks under construction. + * + * This class might or might not have exclusive ownership of the chunks, + * depending on the template parameter. + */ + std::vector chunks; std::int64_t head_chunk = 0, tail_chunk = 0; ///< chunk IDs of valid chunk range std::size_t head_pos = 0, tail_pos = 0; ///< Positions corresponding to @ref head and @ref tail in @ref chunks /** @@ -232,14 +236,12 @@ class chunk_stream_state void packet_memcpy(const spead2::memory_allocator::pointer &allocation, const packet_header &packet) const; - /// Send the oldest chunk to the ready callback - void flush_head(); - -protected: - std::int64_t get_head_chunk() const { return head_chunk; } - std::int64_t get_tail_chunk() const { return tail_chunk; } - public: + /// Constructor + chunk_stream_state_base( + const stream_config &config, + const chunk_stream_config &chunk_config); + /** * Structure associated with each heap, as the deleter of the * allocated pointer. @@ -255,12 +257,50 @@ class chunk_stream_state void operator()(std::uint8_t *) const {} }; - /// Constructor - chunk_stream_state(const stream_config &config, const chunk_stream_config &chunk_config); - /// Get the stream's chunk configuration const chunk_stream_config &get_chunk_config() const { return chunk_config; } + /** + * Get the @ref heap_metadata associated with a heap payload pointer. + * If the pointer was not allocated by a chunk stream, returns @c + * nullptr. + */ + static const heap_metadata *get_heap_metadata(const memory_allocator::pointer &ptr); +}; + +/** + * Base class that holds the internal state of @ref + * spead2::recv::chunk_stream. + * + * This is split into a separate class to avoid some initialisation ordering + * problems: it is constructed before the @ref spead2::recv::stream base class, + * allowing the latter to use function objects that reference this class. + * + * The template parameter allows the policy for allocating and releasing + * chunks to be customised. See @ref chunk_manager_simple for the API. + */ +template +class chunk_stream_state : public chunk_stream_state_base +{ +private: + using chunk_manager_t = CM; + friend chunk_manager_t; + + chunk_manager_t chunk_manager; + /// Send the oldest chunk to the ready callback + void flush_head(); + +protected: + std::int64_t get_head_chunk() const { return head_chunk; } + std::int64_t get_tail_chunk() const { return tail_chunk; } + +public: + /// Constructor + chunk_stream_state( + const stream_config &config, + const chunk_stream_config &chunk_config, + chunk_manager_t chunk_manager); + /// Compute the config to pass down to @ref spead2::recv::stream. stream_config adjust_config(const stream_config &config); @@ -275,13 +315,13 @@ class chunk_stream_state /// Send all in-flight chunks to the ready callback void flush_chunks(); +}; - /** - * Get the @ref heap_metadata associated with a heap payload pointer. - * If the pointer was not allocated by a chunk stream, returns @c - * nullptr. - */ - static const heap_metadata *get_heap_metadata(const memory_allocator::pointer &ptr); +class chunk_manager_simple +{ +public: + chunk *allocate_chunk(chunk_stream_state &state, std::int64_t chunk_id); + void ready_chunk(chunk_stream_state &state, chunk *c); }; /** @@ -289,30 +329,35 @@ class chunk_stream_state * * It forwards allocation requests to @ref chunk_stream_state. */ +template class chunk_stream_allocator final : public memory_allocator { private: - chunk_stream_state &stream; + chunk_stream_state &stream; public: - explicit chunk_stream_allocator(chunk_stream_state &stream); + explicit chunk_stream_allocator(chunk_stream_state &stream); virtual pointer allocate(std::size_t size, void *hint) override; }; +extern template class chunk_stream_state; +extern template class chunk_stream_allocator; + } // namespace detail /** * Stream that writes incoming heaps into chunks. */ -class chunk_stream : private detail::chunk_stream_state, public stream +class chunk_stream : private detail::chunk_stream_state, public stream { - friend class chunk_stream_state; + friend class detail::chunk_stream_state; + friend class detail::chunk_manager_simple; virtual void heap_ready(live_heap &&) override; public: - using heap_metadata = detail::chunk_stream_state::heap_metadata; + using heap_metadata = detail::chunk_stream_state_base::heap_metadata; /** * Constructor. @@ -346,8 +391,8 @@ class chunk_stream : private detail::chunk_stream_state, public stream const stream_config &config, const chunk_stream_config &chunk_config); - using detail::chunk_stream_state::get_chunk_config; - using detail::chunk_stream_state::get_heap_metadata; + using detail::chunk_stream_state::get_chunk_config; + using detail::chunk_stream_state::get_heap_metadata; virtual void stop_received() override; virtual void stop() override; diff --git a/src/recv_chunk_stream.cpp b/src/recv_chunk_stream.cpp index 88d9662bf..225d346db 100644 --- a/src/recv_chunk_stream.cpp +++ b/src/recv_chunk_stream.cpp @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 National Research Foundation (SARAO) +/* Copyright 2021-2023 National Research Foundation (SARAO) * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -105,7 +105,7 @@ static std::size_t round_up(std::size_t size, std::size_t align) return (size + align - 1) / align * align; } -chunk_stream_state::chunk_stream_state( +chunk_stream_state_base::chunk_stream_state_base( const stream_config &config, const chunk_stream_config &chunk_config) : orig_memcpy(config.get_memcpy()), chunk_config(chunk_config), @@ -115,10 +115,6 @@ chunk_stream_state::chunk_stream_state( { if (!this->chunk_config.get_place()) throw std::invalid_argument("chunk_config.place is not set"); - if (!this->chunk_config.get_allocate()) - throw std::invalid_argument("chunk_config.allocate is not set"); - if (!this->chunk_config.get_ready()) - throw std::invalid_argument("chunk_config.ready is not set"); /* Compute the memory required for place_data_storage. The layout is * - chunk_place_data @@ -166,7 +162,7 @@ chunk_stream_state::chunk_stream_state( place_data_storage.reset(ptr); } -void chunk_stream_state::free_place_data::operator()(unsigned char *ptr) +void chunk_stream_state_base::free_place_data::operator()(unsigned char *ptr) const { // TODO: should this use std::launder in C++17? auto *place_data = reinterpret_cast(ptr); @@ -174,7 +170,7 @@ void chunk_stream_state::free_place_data::operator()(unsigned char *ptr) operator delete[](ptr); } -void chunk_stream_state::packet_memcpy( +void chunk_stream_state_base::packet_memcpy( const memory_allocator::pointer &allocation, const packet_header &packet) const { @@ -196,13 +192,34 @@ void chunk_stream_state::packet_memcpy( } } -stream_config chunk_stream_state::adjust_config(const stream_config &config) +const chunk_stream_state_base::heap_metadata *chunk_stream_state_base::get_heap_metadata( + const memory_allocator::pointer &ptr) +{ + return ptr.get_deleter().target(); +} + +template +chunk_stream_state::chunk_stream_state( + const stream_config &config, + const chunk_stream_config &chunk_config, + chunk_manager_t chunk_manager) + : chunk_stream_state_base(config, chunk_config), + chunk_manager(std::move(chunk_manager)) +{ + if (!this->chunk_config.get_allocate()) + throw std::invalid_argument("chunk_config.allocate is not set"); + if (!this->chunk_config.get_ready()) + throw std::invalid_argument("chunk_config.ready is not set"); +} + +template +stream_config chunk_stream_state::adjust_config(const stream_config &config) { using namespace std::placeholders; stream_config new_config = config; // Unsized heaps won't work with the custom allocator new_config.set_allow_unsized_heaps(false); - new_config.set_memory_allocator(std::make_shared(*this)); + new_config.set_memory_allocator(std::make_shared>(*this)); // Override the original memcpy with our custom version new_config.set_memcpy(std::bind(&chunk_stream_state::packet_memcpy, this, _1, _2)); // Add custom statistics @@ -211,15 +228,14 @@ stream_config chunk_stream_state::adjust_config(const stream_config &config) return new_config; } -void chunk_stream_state::flush_head() +template +void chunk_stream_state::flush_head() { assert(head_chunk < tail_chunk); if (chunks[head_pos]) { - std::uint64_t *batch_stats = static_cast(this)->batch_stats.data(); - chunk_config.get_ready()(std::move(chunks[head_pos]), batch_stats); - // If the ready callback didn't take over ownership, free it. - chunks[head_pos].reset(); + chunk_manager.ready_chunk(*this, chunks[head_pos]); + chunks[head_pos] = nullptr; } head_chunk++; head_pos++; @@ -227,18 +243,13 @@ void chunk_stream_state::flush_head() head_pos = 0; // wrap around the circular buffer } -void chunk_stream_state::flush_chunks() +template +void chunk_stream_state::flush_chunks() { while (head_chunk != tail_chunk) flush_head(); } -const chunk_stream_state::heap_metadata *chunk_stream_state::get_heap_metadata( - const memory_allocator::pointer &ptr) -{ - return ptr.get_deleter().target(); -} - // Used to get a non-null pointer static std::uint8_t dummy_uint8; @@ -246,8 +257,9 @@ static std::uint8_t dummy_uint8; static constexpr std::size_t too_old_heaps_offset = 0; static constexpr std::size_t rejected_heaps_offset = 1; -std::pair -chunk_stream_state::allocate(std::size_t size, const packet_header &packet) +template +std::pair +chunk_stream_state::allocate(std::size_t size, const packet_header &packet) { /* Extract the user's requested items. * TODO: this could possibly be optimised with a hash table (with a @@ -307,7 +319,6 @@ chunk_stream_state::allocate(std::size_t size, const packet_header &packet) { // We've moved beyond the end of our current window, and need to // allocate fresh chunks. - const auto &allocate = chunk_config.get_allocate(); if (chunk_id >= tail_chunk + std::int64_t(max_chunks)) { /* We've jumped ahead so far that the entire current window @@ -323,7 +334,7 @@ chunk_stream_state::allocate(std::size_t size, const packet_header &packet) { if (std::size_t(tail_chunk - head_chunk) == max_chunks) flush_head(); - chunks[tail_pos] = allocate(tail_chunk, place_data->batch_stats); + chunks[tail_pos] = chunk_manager.allocate_chunk(*this, tail_chunk); if (chunks[tail_pos]) { chunks[tail_pos]->chunk_id = tail_chunk; @@ -365,12 +376,28 @@ chunk_stream_state::allocate(std::size_t size, const packet_header &packet) } } -chunk_stream_allocator::chunk_stream_allocator(chunk_stream_state &stream) +chunk *chunk_manager_simple::allocate_chunk(chunk_stream_state &state, std::int64_t chunk_id) +{ + const auto &allocate = state.chunk_config.get_allocate(); + std::unique_ptr owned = allocate(chunk_id, state.place_data->batch_stats); + return owned.release(); // ready_chunk will re-take ownership +} + +void chunk_manager_simple::ready_chunk(chunk_stream_state &state, chunk *c) +{ + std::uint64_t *batch_stats = static_cast(&state)->batch_stats.data(); + std::unique_ptr owned(c); + state.chunk_config.get_ready()(std::move(owned), batch_stats); +} + +template +chunk_stream_allocator::chunk_stream_allocator(chunk_stream_state &stream) : stream(stream) { } -memory_allocator::pointer chunk_stream_allocator::allocate(std::size_t size, void *hint) +template +memory_allocator::pointer chunk_stream_allocator::allocate(std::size_t size, void *hint) { if (hint) { @@ -382,13 +409,16 @@ memory_allocator::pointer chunk_stream_allocator::allocate(std::size_t size, voi return memory_allocator::allocate(size, hint); } +template class chunk_stream_state; +template class chunk_stream_allocator; + } // namespace detail chunk_stream::chunk_stream( io_service_ref io_service, const stream_config &config, const chunk_stream_config &chunk_config) - : chunk_stream_state(config, chunk_config), + : chunk_stream_state(config, chunk_config, detail::chunk_manager_simple()), stream(std::move(io_service), adjust_config(config)) { } From dfcdddbba2f9fe31033caaba3e3607fdc8b5f2c3 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Mon, 19 Jun 2023 14:16:20 +0200 Subject: [PATCH 03/74] Move template code into header file --- include/spead2/recv_chunk_stream.h | 203 +++++++++++++++++++++++++++++ src/recv_chunk_stream.cpp | 198 ---------------------------- 2 files changed, 203 insertions(+), 198 deletions(-) diff --git a/include/spead2/recv_chunk_stream.h b/include/spead2/recv_chunk_stream.h index d43969b0b..d036ab4f2 100644 --- a/include/spead2/recv_chunk_stream.h +++ b/include/spead2/recv_chunk_stream.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -341,6 +342,25 @@ class chunk_stream_allocator final : public memory_allocator virtual pointer allocate(std::size_t size, void *hint) override; }; +template +chunk_stream_allocator::chunk_stream_allocator(chunk_stream_state &stream) + : stream(stream) +{ +} + +template +memory_allocator::pointer chunk_stream_allocator::allocate(std::size_t size, void *hint) +{ + if (hint) + { + auto alloc = stream.allocate(size, *reinterpret_cast(hint)); + // Use the heap_metadata as the deleter + return pointer(alloc.first, std::move(alloc.second)); + } + // Probably unreachable, but provides a safety net + return memory_allocator::allocate(size, hint); +} + extern template class chunk_stream_state; extern template class chunk_stream_allocator; @@ -477,6 +497,189 @@ class chunk_ring_stream : public chunk_stream virtual ~chunk_ring_stream(); }; +namespace detail +{ + +template +chunk_stream_state::chunk_stream_state( + const stream_config &config, + const chunk_stream_config &chunk_config, + chunk_manager_t chunk_manager) + : chunk_stream_state_base(config, chunk_config), + chunk_manager(std::move(chunk_manager)) +{ + if (!this->chunk_config.get_allocate()) + throw std::invalid_argument("chunk_config.allocate is not set"); + if (!this->chunk_config.get_ready()) + throw std::invalid_argument("chunk_config.ready is not set"); +} + +template +stream_config chunk_stream_state::adjust_config(const stream_config &config) +{ + using namespace std::placeholders; + stream_config new_config = config; + // Unsized heaps won't work with the custom allocator + new_config.set_allow_unsized_heaps(false); + new_config.set_memory_allocator(std::make_shared>(*this)); + // Override the original memcpy with our custom version + new_config.set_memcpy(std::bind(&chunk_stream_state::packet_memcpy, this, _1, _2)); + // Add custom statistics + new_config.add_stat("too_old_heaps"); + new_config.add_stat("rejected_heaps"); + return new_config; +} + +template +void chunk_stream_state::flush_head() +{ + assert(head_chunk < tail_chunk); + if (chunks[head_pos]) + { + chunk_manager.ready_chunk(*this, chunks[head_pos]); + chunks[head_pos] = nullptr; + } + head_chunk++; + head_pos++; + if (head_pos == chunks.size()) + head_pos = 0; // wrap around the circular buffer +} + +template +void chunk_stream_state::flush_chunks() +{ + while (head_chunk != tail_chunk) + flush_head(); +} + +template +std::pair +chunk_stream_state::allocate(std::size_t size, const packet_header &packet) +{ + // Used to get a non-null pointer + static std::uint8_t dummy_uint8; + + // Keep these in sync with stats added in adjust_config + static constexpr std::size_t too_old_heaps_offset = 0; + static constexpr std::size_t rejected_heaps_offset = 1; + + /* Extract the user's requested items. + * TODO: this could possibly be optimised with a hash table (with a + * perfect hash function chosen in advance), but for the expected + * sizes the overheads will probably outweight the benefits. + */ + const auto &item_ids = get_chunk_config().get_items(); + std::fill(place_data->items, place_data->items + item_ids.size(), -1); + pointer_decoder decoder(packet.heap_address_bits); + /* packet.pointers and packet.n_items skips initial "special" item + * pointers. To allow them to be matched as well, we start from the + * original packet and skip over the 8-byte header. + */ + for (const std::uint8_t *p = packet.packet + 8; p != packet.payload; p += sizeof(item_pointer_t)) + { + item_pointer_t pointer = load_be(p); + if (decoder.is_immediate(pointer)) + { + item_pointer_t id = decoder.get_id(pointer); + for (std::size_t j = 0; j < item_ids.size(); j++) + if (item_ids[j] == id) + place_data->items[j] = decoder.get_immediate(pointer); + } + } + + /* TODO: see if the storage can be in the class with the deleter + * just referencing it. That will avoid the implied memory allocation + * in constructing the std::function underlying the deleter. + */ + std::pair out; + out.first = &dummy_uint8; // Use a non-null value to avoid confusion with empty pointers + heap_metadata &metadata = out.second; + + place_data->packet = packet.packet; + place_data->packet_size = packet.payload + packet.payload_length - packet.packet; + place_data->chunk_id = -1; + place_data->heap_index = 0; + place_data->heap_offset = 0; + place_data->batch_stats = static_cast(this)->batch_stats.data(); + place_data->extra_offset = 0; + place_data->extra_size = 0; + chunk_config.get_place()(place_data, sizeof(*place_data)); + auto chunk_id = place_data->chunk_id; + if (chunk_id < head_chunk) + { + // We don't want this heap. + metadata.chunk_id = -1; + metadata.chunk_ptr = nullptr; + std::size_t stat_offset = (chunk_id >= 0) ? too_old_heaps_offset : rejected_heaps_offset; + place_data->batch_stats[base_stat_index + stat_offset]++; + return out; + } + else + { + std::size_t max_chunks = chunk_config.get_max_chunks(); + if (chunk_id >= tail_chunk) + { + // We've moved beyond the end of our current window, and need to + // allocate fresh chunks. + if (chunk_id >= tail_chunk + std::int64_t(max_chunks)) + { + /* We've jumped ahead so far that the entire current window + * is stale. Flush it all and fast-forward to the new window. + * We leave it to the while loop below to actually allocate + * the chunks. + */ + flush_chunks(); + head_chunk = tail_chunk = chunk_id - (max_chunks - 1); + head_pos = tail_pos = 0; + } + while (chunk_id >= tail_chunk) + { + if (std::size_t(tail_chunk - head_chunk) == max_chunks) + flush_head(); + chunks[tail_pos] = chunk_manager.allocate_chunk(*this, tail_chunk); + if (chunks[tail_pos]) + { + chunks[tail_pos]->chunk_id = tail_chunk; + chunks[tail_pos]->stream_id = stream_id; + } + tail_chunk++; + tail_pos++; + if (tail_pos == max_chunks) + tail_pos = 0; // wrap around circular buffer + } + } + // Find position of chunk within the storage + std::size_t pos = chunk_id - head_chunk + head_pos; + if (pos >= max_chunks) + pos -= max_chunks; // wrap around the circular storage + if (chunks[pos]) + { + chunk &c = *chunks[pos]; + out.first = c.data.get() + place_data->heap_offset; + metadata.chunk_id = chunk_id; + metadata.heap_index = place_data->heap_index; + metadata.heap_offset = place_data->heap_offset; + metadata.chunk_ptr = &c; + if (place_data->extra_size > 0) + { + assert(place_data->extra_size <= chunk_config.get_max_heap_extra()); + assert(c.extra); + std::memcpy(c.extra.get() + place_data->extra_offset, place_data->extra, place_data->extra_size); + } + return out; + } + else + { + // the allocator didn't allocate a chunk for this slot. + metadata.chunk_id = -1; + metadata.chunk_ptr = nullptr; + return out; + } + } +} + +} // namespace detail + template chunk_ring_stream::chunk_ring_stream( io_service_ref io_service, diff --git a/src/recv_chunk_stream.cpp b/src/recv_chunk_stream.cpp index 225d346db..b736683dd 100644 --- a/src/recv_chunk_stream.cpp +++ b/src/recv_chunk_stream.cpp @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -198,184 +197,6 @@ const chunk_stream_state_base::heap_metadata *chunk_stream_state_base::get_heap_ return ptr.get_deleter().target(); } -template -chunk_stream_state::chunk_stream_state( - const stream_config &config, - const chunk_stream_config &chunk_config, - chunk_manager_t chunk_manager) - : chunk_stream_state_base(config, chunk_config), - chunk_manager(std::move(chunk_manager)) -{ - if (!this->chunk_config.get_allocate()) - throw std::invalid_argument("chunk_config.allocate is not set"); - if (!this->chunk_config.get_ready()) - throw std::invalid_argument("chunk_config.ready is not set"); -} - -template -stream_config chunk_stream_state::adjust_config(const stream_config &config) -{ - using namespace std::placeholders; - stream_config new_config = config; - // Unsized heaps won't work with the custom allocator - new_config.set_allow_unsized_heaps(false); - new_config.set_memory_allocator(std::make_shared>(*this)); - // Override the original memcpy with our custom version - new_config.set_memcpy(std::bind(&chunk_stream_state::packet_memcpy, this, _1, _2)); - // Add custom statistics - new_config.add_stat("too_old_heaps"); - new_config.add_stat("rejected_heaps"); - return new_config; -} - -template -void chunk_stream_state::flush_head() -{ - assert(head_chunk < tail_chunk); - if (chunks[head_pos]) - { - chunk_manager.ready_chunk(*this, chunks[head_pos]); - chunks[head_pos] = nullptr; - } - head_chunk++; - head_pos++; - if (head_pos == chunks.size()) - head_pos = 0; // wrap around the circular buffer -} - -template -void chunk_stream_state::flush_chunks() -{ - while (head_chunk != tail_chunk) - flush_head(); -} - -// Used to get a non-null pointer -static std::uint8_t dummy_uint8; - -// Keep these in sync with stats added in adjust_config -static constexpr std::size_t too_old_heaps_offset = 0; -static constexpr std::size_t rejected_heaps_offset = 1; - -template -std::pair -chunk_stream_state::allocate(std::size_t size, const packet_header &packet) -{ - /* Extract the user's requested items. - * TODO: this could possibly be optimised with a hash table (with a - * perfect hash function chosen in advance), but for the expected - * sizes the overheads will probably outweight the benefits. - */ - const auto &item_ids = get_chunk_config().get_items(); - std::fill(place_data->items, place_data->items + item_ids.size(), -1); - pointer_decoder decoder(packet.heap_address_bits); - /* packet.pointers and packet.n_items skips initial "special" item - * pointers. To allow them to be matched as well, we start from the - * original packet and skip over the 8-byte header. - */ - for (const std::uint8_t *p = packet.packet + 8; p != packet.payload; p += sizeof(item_pointer_t)) - { - item_pointer_t pointer = load_be(p); - if (decoder.is_immediate(pointer)) - { - item_pointer_t id = decoder.get_id(pointer); - for (std::size_t j = 0; j < item_ids.size(); j++) - if (item_ids[j] == id) - place_data->items[j] = decoder.get_immediate(pointer); - } - } - - /* TODO: see if the storage can be in the class with the deleter - * just referencing it. That will avoid the implied memory allocation - * in constructing the std::function underlying the deleter. - */ - std::pair out; - out.first = &dummy_uint8; // Use a non-null value to avoid confusion with empty pointers - heap_metadata &metadata = out.second; - - place_data->packet = packet.packet; - place_data->packet_size = packet.payload + packet.payload_length - packet.packet; - place_data->chunk_id = -1; - place_data->heap_index = 0; - place_data->heap_offset = 0; - place_data->batch_stats = static_cast(this)->batch_stats.data(); - place_data->extra_offset = 0; - place_data->extra_size = 0; - chunk_config.get_place()(place_data, sizeof(*place_data)); - auto chunk_id = place_data->chunk_id; - if (chunk_id < head_chunk) - { - // We don't want this heap. - metadata.chunk_id = -1; - metadata.chunk_ptr = nullptr; - std::size_t stat_offset = (chunk_id >= 0) ? too_old_heaps_offset : rejected_heaps_offset; - place_data->batch_stats[base_stat_index + stat_offset]++; - return out; - } - else - { - std::size_t max_chunks = chunk_config.get_max_chunks(); - if (chunk_id >= tail_chunk) - { - // We've moved beyond the end of our current window, and need to - // allocate fresh chunks. - if (chunk_id >= tail_chunk + std::int64_t(max_chunks)) - { - /* We've jumped ahead so far that the entire current window - * is stale. Flush it all and fast-forward to the new window. - * We leave it to the while loop below to actually allocate - * the chunks. - */ - flush_chunks(); - head_chunk = tail_chunk = chunk_id - (max_chunks - 1); - head_pos = tail_pos = 0; - } - while (chunk_id >= tail_chunk) - { - if (std::size_t(tail_chunk - head_chunk) == max_chunks) - flush_head(); - chunks[tail_pos] = chunk_manager.allocate_chunk(*this, tail_chunk); - if (chunks[tail_pos]) - { - chunks[tail_pos]->chunk_id = tail_chunk; - chunks[tail_pos]->stream_id = stream_id; - } - tail_chunk++; - tail_pos++; - if (tail_pos == max_chunks) - tail_pos = 0; // wrap around circular buffer - } - } - // Find position of chunk within the storage - std::size_t pos = chunk_id - head_chunk + head_pos; - if (pos >= max_chunks) - pos -= max_chunks; // wrap around the circular storage - if (chunks[pos]) - { - chunk &c = *chunks[pos]; - out.first = c.data.get() + place_data->heap_offset; - metadata.chunk_id = chunk_id; - metadata.heap_index = place_data->heap_index; - metadata.heap_offset = place_data->heap_offset; - metadata.chunk_ptr = &c; - if (place_data->extra_size > 0) - { - assert(place_data->extra_size <= chunk_config.get_max_heap_extra()); - assert(c.extra); - std::memcpy(c.extra.get() + place_data->extra_offset, place_data->extra, place_data->extra_size); - } - return out; - } - else - { - // the allocator didn't allocate a chunk for this slot. - metadata.chunk_id = -1; - metadata.chunk_ptr = nullptr; - return out; - } - } -} - chunk *chunk_manager_simple::allocate_chunk(chunk_stream_state &state, std::int64_t chunk_id) { const auto &allocate = state.chunk_config.get_allocate(); @@ -390,25 +211,6 @@ void chunk_manager_simple::ready_chunk(chunk_stream_state state.chunk_config.get_ready()(std::move(owned), batch_stats); } -template -chunk_stream_allocator::chunk_stream_allocator(chunk_stream_state &stream) - : stream(stream) -{ -} - -template -memory_allocator::pointer chunk_stream_allocator::allocate(std::size_t size, void *hint) -{ - if (hint) - { - auto alloc = stream.allocate(size, *reinterpret_cast(hint)); - // Use the heap_metadata as the deleter - return pointer(alloc.first, std::move(alloc.second)); - } - // Probably unreachable, but provides a safety net - return memory_allocator::allocate(size, hint); -} - template class chunk_stream_state; template class chunk_stream_allocator; From 38e23dbb40bce7e4040fabe6ab85769c691cb78f Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Mon, 19 Jun 2023 15:01:57 +0200 Subject: [PATCH 04/74] Factor out chunk_window This will simplify reusing some logic for chunk_stream_group. --- include/spead2/recv_chunk_stream.h | 161 +++++++++++++++++++---------- src/recv_chunk_stream.cpp | 4 +- 2 files changed, 107 insertions(+), 58 deletions(-) diff --git a/include/spead2/recv_chunk_stream.h b/include/spead2/recv_chunk_stream.h index d036ab4f2..91dcded08 100644 --- a/include/spead2/recv_chunk_stream.h +++ b/include/spead2/recv_chunk_stream.h @@ -21,6 +21,7 @@ #ifndef SPEAD2_RECV_CHUNK_STREAM #define SPEAD2_RECV_CHUNK_STREAM +#include #include #include #include @@ -201,6 +202,92 @@ class chunk_stream_config namespace detail { +/** + * Sliding window of chunk pointers. + */ +class chunk_window +{ +private: + /// Circular buffer of chunks under construction. + std::vector chunks; + std::int64_t head_chunk = 0, tail_chunk = 0; ///< chunk IDs of valid chunk range + std::size_t head_pos = 0, tail_pos = 0; ///< Positions corresponding to @ref head and @ref tail in @ref chunks + +public: + /// Send the oldest chunk to the ready callback + template + void flush_head(const F &ready_chunk) + { + assert(head_chunk < tail_chunk); + if (chunks[head_pos]) + { + ready_chunk(chunks[head_pos]); + chunks[head_pos] = nullptr; + } + head_chunk++; + head_pos++; + if (head_pos == chunks.size()) + head_pos = 0; // wrap around the circular buffer + } + + explicit chunk_window(std::size_t max_chunks); + + /** + * Obtain a pointer to a chunk with ID @a chunk_id. + * + * If @a chunk_id is behind the window, returns nullptr. If it is ahead of + * the window, the window is advanced using @a ready_chunk and @a allocate_chunk. + */ + template + chunk *get_chunk( + std::uint64_t chunk_id, std::uintptr_t stream_id, const F1 &allocate_chunk, const F2 &ready_chunk) + { + const std::size_t max_chunks = chunks.size(); + if (chunk_id >= head_chunk) + { + // We've moved beyond the end of our current window, and need to + // allocate fresh chunks. + if (chunk_id >= tail_chunk + std::int64_t(max_chunks)) + { + /* We've jumped ahead so far that the entire current window + * is stale. Flush it all and fast-forward to the new window. + * We leave it to the while loop below to actually allocate + * the chunks. + */ + while (head_chunk != tail_chunk) + flush_head(ready_chunk); + head_chunk = tail_chunk = chunk_id - (max_chunks - 1); + head_pos = tail_pos = 0; + } + while (chunk_id >= tail_chunk) + { + if (std::size_t(tail_chunk - head_chunk) == max_chunks) + flush_head(ready_chunk); + chunks[tail_pos] = allocate_chunk(tail_chunk); + if (chunks[tail_pos]) + { + chunks[tail_pos]->chunk_id = tail_chunk; + chunks[tail_pos]->stream_id = stream_id; + } + tail_chunk++; + tail_pos++; + if (tail_pos == max_chunks) + tail_pos = 0; // wrap around circular buffer + } + // Find position of chunk within the storage + std::size_t pos = chunk_id - head_chunk + head_pos; + if (pos >= max_chunks) + pos -= max_chunks; // wrap around the circular storage + return chunks[pos]; + } + else + return nullptr; + } + + std::int64_t get_head_chunk() const { return head_chunk; } + std::int64_t get_tail_chunk() const { return tail_chunk; } +}; + template class chunk_stream_allocator; /// Parts of chunk_stream_state that don't depend on the chunk manager @@ -223,9 +310,8 @@ class chunk_stream_state_base * This class might or might not have exclusive ownership of the chunks, * depending on the template parameter. */ - std::vector chunks; - std::int64_t head_chunk = 0, tail_chunk = 0; ///< chunk IDs of valid chunk range - std::size_t head_pos = 0, tail_pos = 0; ///< Positions corresponding to @ref head and @ref tail in @ref chunks + chunk_window chunks; + /** * Scratch area for use by @ref allocate. This contains not just the @ref * chunk_place_data, but also the various arrays it points to. They're @@ -237,6 +323,10 @@ class chunk_stream_state_base void packet_memcpy(const spead2::memory_allocator::pointer &allocation, const packet_header &packet) const; +protected: + std::int64_t get_head_chunk() const { return chunks.get_head_chunk(); } + std::int64_t get_tail_chunk() const { return chunks.get_tail_chunk(); } + public: /// Constructor chunk_stream_state_base( @@ -291,10 +381,6 @@ class chunk_stream_state : public chunk_stream_state_base /// Send the oldest chunk to the ready callback void flush_head(); -protected: - std::int64_t get_head_chunk() const { return head_chunk; } - std::int64_t get_tail_chunk() const { return tail_chunk; } - public: /// Constructor chunk_stream_state( @@ -533,22 +619,13 @@ stream_config chunk_stream_state::adjust_config(const stream_config &config) template void chunk_stream_state::flush_head() { - assert(head_chunk < tail_chunk); - if (chunks[head_pos]) - { - chunk_manager.ready_chunk(*this, chunks[head_pos]); - chunks[head_pos] = nullptr; - } - head_chunk++; - head_pos++; - if (head_pos == chunks.size()) - head_pos = 0; // wrap around the circular buffer + chunks.flush_head([this](chunk *c) { chunk_manager.ready_chunk(*this, c); }); } template void chunk_stream_state::flush_chunks() { - while (head_chunk != tail_chunk) + while (get_head_chunk() != get_tail_chunk()) flush_head(); } @@ -605,7 +682,7 @@ chunk_stream_state::allocate(std::size_t size, const packet_header &packet) place_data->extra_size = 0; chunk_config.get_place()(place_data, sizeof(*place_data)); auto chunk_id = place_data->chunk_id; - if (chunk_id < head_chunk) + if (chunk_id < get_head_chunk()) { // We don't want this heap. metadata.chunk_id = -1; @@ -616,45 +693,15 @@ chunk_stream_state::allocate(std::size_t size, const packet_header &packet) } else { - std::size_t max_chunks = chunk_config.get_max_chunks(); - if (chunk_id >= tail_chunk) - { - // We've moved beyond the end of our current window, and need to - // allocate fresh chunks. - if (chunk_id >= tail_chunk + std::int64_t(max_chunks)) - { - /* We've jumped ahead so far that the entire current window - * is stale. Flush it all and fast-forward to the new window. - * We leave it to the while loop below to actually allocate - * the chunks. - */ - flush_chunks(); - head_chunk = tail_chunk = chunk_id - (max_chunks - 1); - head_pos = tail_pos = 0; - } - while (chunk_id >= tail_chunk) - { - if (std::size_t(tail_chunk - head_chunk) == max_chunks) - flush_head(); - chunks[tail_pos] = chunk_manager.allocate_chunk(*this, tail_chunk); - if (chunks[tail_pos]) - { - chunks[tail_pos]->chunk_id = tail_chunk; - chunks[tail_pos]->stream_id = stream_id; - } - tail_chunk++; - tail_pos++; - if (tail_pos == max_chunks) - tail_pos = 0; // wrap around circular buffer - } - } - // Find position of chunk within the storage - std::size_t pos = chunk_id - head_chunk + head_pos; - if (pos >= max_chunks) - pos -= max_chunks; // wrap around the circular storage - if (chunks[pos]) + chunk *chunk_ptr = chunks.get_chunk( + chunk_id, + stream_id, + [this](std::int64_t chunk_id) { return chunk_manager.allocate_chunk(*this, chunk_id); }, + [this](chunk *c) { chunk_manager.ready_chunk(*this, c); } + ); + if (chunk_ptr) { - chunk &c = *chunks[pos]; + chunk &c = *chunk_ptr; out.first = c.data.get() + place_data->heap_offset; metadata.chunk_id = chunk_id; metadata.heap_index = place_data->heap_index; diff --git a/src/recv_chunk_stream.cpp b/src/recv_chunk_stream.cpp index b736683dd..2c6d376f4 100644 --- a/src/recv_chunk_stream.cpp +++ b/src/recv_chunk_stream.cpp @@ -104,6 +104,8 @@ static std::size_t round_up(std::size_t size, std::size_t align) return (size + align - 1) / align * align; } +chunk_window::chunk_window(std::size_t max_chunks) : chunks(max_chunks) {} + chunk_stream_state_base::chunk_stream_state_base( const stream_config &config, const chunk_stream_config &chunk_config) : orig_memcpy(config.get_memcpy()), @@ -174,7 +176,7 @@ void chunk_stream_state_base::packet_memcpy( const packet_header &packet) const { const heap_metadata &metadata = *get_heap_metadata(allocation); - if (metadata.chunk_id < head_chunk) + if (metadata.chunk_id < get_head_chunk()) { // The packet corresponds to a chunk that has already been aged out // TODO: increment a counter / log a warning From 1c5c176cce4a7fa6dcd09177d4f24f9d22eb9d01 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Mon, 19 Jun 2023 16:47:23 +0200 Subject: [PATCH 05/74] Work in progress on chunk stream groups It still needs some mechanisms to unblock streams that aren't receiving data, and it's failing unit tests. --- include/Makefile.am | 3 +- include/spead2/recv_chunk_stream.h | 26 +-- include/spead2/recv_chunk_stream_group.h | 198 +++++++++++++++++++++++ src/Makefile.am | 3 +- src/recv_chunk_stream.cpp | 49 ++++-- src/recv_chunk_stream_group.cpp | 162 +++++++++++++++++++ 6 files changed, 414 insertions(+), 27 deletions(-) create mode 100644 include/spead2/recv_chunk_stream_group.h create mode 100644 src/recv_chunk_stream_group.cpp diff --git a/include/Makefile.am b/include/Makefile.am index b4b3134c6..c29387e98 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -1,4 +1,4 @@ -# Copyright 2016-2021 National Research Foundation (SARAO) +# Copyright 2016-2021, 2023 National Research Foundation (SARAO) # # This program is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free @@ -37,6 +37,7 @@ nobase_include_HEADERS = \ spead2/common_unbounded_queue.h \ spead2/portable_endian.h \ spead2/recv_chunk_stream.h \ + spead2/recv_chunk_stream_group.h \ spead2/recv_heap.h \ spead2/recv_inproc.h \ spead2/recv_live_heap.h \ diff --git a/include/spead2/recv_chunk_stream.h b/include/spead2/recv_chunk_stream.h index 91dcded08..878e59990 100644 --- a/include/spead2/recv_chunk_stream.h +++ b/include/spead2/recv_chunk_stream.h @@ -44,8 +44,14 @@ namespace recv /// Storage for a chunk with metadata class chunk { + friend class chunk_stream_group; private: - /// Reference count for chunks belonging to stream groups + /** + * Reference count for chunks belonging to stream groups. + * + * This must only be manipulated from a single thread at a time e.g. + * with the group's mutex locked. + */ std::size_t ref_count = 0; public: @@ -323,6 +329,9 @@ class chunk_stream_state_base void packet_memcpy(const spead2::memory_allocator::pointer &allocation, const packet_header &packet) const; + /// Implementation of @ref stream::heap_ready + void do_heap_ready(live_heap &&lh); + protected: std::int64_t get_head_chunk() const { return chunks.get_head_chunk(); } std::int64_t get_tail_chunk() const { return chunks.get_tail_chunk(); } @@ -407,6 +416,9 @@ class chunk_stream_state : public chunk_stream_state_base class chunk_manager_simple { public: + explicit chunk_manager_simple(const chunk_stream_config &chunk_config); + + std::uint64_t *get_batch_stats(chunk_stream_state &state) const; chunk *allocate_chunk(chunk_stream_state &state, std::int64_t chunk_id); void ready_chunk(chunk_stream_state &state, chunk *c); }; @@ -489,7 +501,7 @@ class chunk_stream : private detail::chunk_stream_state::get_chunk_config; - using detail::chunk_stream_state::get_heap_metadata; + using detail::chunk_stream_state_base::get_chunk_config; + using detail::chunk_stream_state_base::get_heap_metadata; virtual void stop_received() override; virtual void stop() override; @@ -594,10 +606,6 @@ chunk_stream_state::chunk_stream_state( : chunk_stream_state_base(config, chunk_config), chunk_manager(std::move(chunk_manager)) { - if (!this->chunk_config.get_allocate()) - throw std::invalid_argument("chunk_config.allocate is not set"); - if (!this->chunk_config.get_ready()) - throw std::invalid_argument("chunk_config.ready is not set"); } template @@ -677,7 +685,7 @@ chunk_stream_state::allocate(std::size_t size, const packet_header &packet) place_data->chunk_id = -1; place_data->heap_index = 0; place_data->heap_offset = 0; - place_data->batch_stats = static_cast(this)->batch_stats.data(); + place_data->batch_stats = chunk_manager.get_batch_stats(*this); place_data->extra_offset = 0; place_data->extra_size = 0; chunk_config.get_place()(place_data, sizeof(*place_data)); diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h new file mode 100644 index 000000000..5d506652e --- /dev/null +++ b/include/spead2/recv_chunk_stream_group.h @@ -0,0 +1,198 @@ +/* Copyright 2023 National Research Foundation (SARAO) + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + +/** + * @file + */ + +#ifndef SPEAD2_RECV_CHUNK_STREAM_GROUP +#define SPEAD2_RECV_CHUNK_STREAM_GROUP + +#include +#include +#include +#include +#include +#include + +namespace spead2 +{ +namespace recv +{ + +/// Configuration for chunk_stream_group +class chunk_stream_group_config +{ +public: + /// Default value for @ref set_max_chunks + static constexpr std::size_t default_max_chunks = chunk_stream_config::default_max_chunks; + +private: + std::size_t max_chunks = default_max_chunks; + chunk_allocate_function allocate; + chunk_ready_function ready; + +public: + /** + * Set the maximum number of chunks that can be live at the same time. + * A value of 1 means that heaps must be received in order: once a + * chunk is started, no heaps from a previous chunk will be accepted. + * + * @throw std::invalid_argument if @a max_chunks is 0. + */ + chunk_stream_group_config &set_max_chunks(std::size_t max_chunks); + /// Return the maximum number of chunks that can be live at the same time. + std::size_t get_max_chunks() const { return max_chunks; } + + /// Set the function used to allocate a chunk. + chunk_stream_group_config &set_allocate(chunk_allocate_function allocate); + /// Get the function used to allocate a chunk. + const chunk_allocate_function &get_allocate() const { return allocate; } + + /// Set the function that is provided with completed chunks. + chunk_stream_group_config &set_ready(chunk_ready_function ready); + /// Get the function that is provided with completed chunks. + const chunk_ready_function &get_ready() const { return ready; } +}; + +class chunk_stream_group; + +namespace detail +{ + +class chunk_manager_group +{ +private: + chunk_stream_group &group; + +public: + explicit chunk_manager_group(chunk_stream_group &group); + + std::uint64_t *get_batch_stats(chunk_stream_state &state) const; + chunk *allocate_chunk(chunk_stream_state &state, std::int64_t chunk_id); + void ready_chunk(chunk_stream_state &state, chunk *c); +}; + +} // namespace detail + +/** + * A holder for a collection of streams that share chunks. + * + * @todo write more documentation here + */ +class chunk_stream_group +{ +private: + friend class detail::chunk_manager_group; + + const chunk_stream_group_config config; + + std::mutex mutex; // Protects all the mutable state + + /** + * Circular buffer of chunks under construction. + * + * Ownership of the chunks is shared between the group and the member + * streams, but reference counting is manual (rather than using + * std::shared_ptr) so that the reference count can be embedded in the + * object, and to facilitate code sharing with @ref chunk_stream. + */ + detail::chunk_window chunks; + + /** + * Obtain the chunk with a given ID. + * + * This will shift the window if the chunk_id is beyond the tail. If the + * chunk is too old, it will return @c nullptr. The reference count of the + * returned chunk will be incremented. + * + * This function is thread-safe. + */ + chunk *get_chunk(std::int64_t chunk_id, std::uintptr_t stream_id, std::uint64_t *batch_stats); + + /** + * Decrement chunk reference count. + * + * If the reference count reaches zero, the chunk is passed to the ready + * callback. + * + * This function is thread-safe. + */ + void release_chunk(chunk *c, std::uint64_t *batch_stats); + + /// Version of release_chunk that does not take the lock + void release_chunk_unlocked(chunk *c, std::uint64_t *batch_stats); + +public: + chunk_stream_group(const chunk_stream_group_config &config); + ~chunk_stream_group(); + + /** + * Release all chunks. This function is thread-safe. + */ + void flush_chunks(); +}; + +/** + * Single single within a group managed by @ref chunk_stream_group. + */ +class chunk_stream_group_member : private detail::chunk_stream_state, public stream +{ + friend class detail::chunk_manager_group; + + virtual void heap_ready(live_heap &&) override; + +public: + using heap_metadata = detail::chunk_stream_state_base::heap_metadata; + + /** + * Constructor. + * + * This class passes a modified @a config to the base class constructor. + * See @ref chunk_stream for more information. + * + * The @link chunk_stream_config::set_allocate allocate@endlink and + * @link chunk_stream_config::set_ready ready@endlink callbacks are + * ignored, and the group's callbacks are used instead. + * + * Instances of this class must not outlive the group. + * + * @param io_service I/O service (also used by the readers). + * @param config Basic stream configuration + * @param chunk_config Configuration for chunking + * @param group Group to which this stream belongs + * + * @throw invalid_argument if the place function pointer in @a chunk_config + * has not been set. + */ + chunk_stream_group_member( + io_service_ref io_service, + const stream_config &config, + const chunk_stream_config &chunk_config, + chunk_stream_group &group); + + using detail::chunk_stream_state_base::get_chunk_config; + using detail::chunk_stream_state_base::get_heap_metadata; + + virtual void stop_received() override; + virtual void stop() override; + virtual ~chunk_stream_group_member() override; +}; + +} // namespace recv +} // namespace spead2 + +#endif // SPEAD2_RECV_CHUNK_STREAM_GROUP diff --git a/src/Makefile.am b/src/Makefile.am index 64b0d0512..2690f8593 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,4 +1,4 @@ -# Copyright 2016, 2020, 2022 National Research Foundation (SARAO) +# Copyright 2016, 2020-2023 National Research Foundation (SARAO) # # This program is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free @@ -76,6 +76,7 @@ libspead2_a_SOURCES = \ common_socket.cpp \ common_thread_pool.cpp \ recv_chunk_stream.cpp \ + recv_chunk_stream_group.cpp \ recv_heap.cpp \ recv_inproc.cpp \ recv_live_heap.cpp \ diff --git a/src/recv_chunk_stream.cpp b/src/recv_chunk_stream.cpp index 2c6d376f4..237f28ec8 100644 --- a/src/recv_chunk_stream.cpp +++ b/src/recv_chunk_stream.cpp @@ -193,12 +193,42 @@ void chunk_stream_state_base::packet_memcpy( } } +void chunk_stream_state_base::do_heap_ready(live_heap &&lh) +{ + if (lh.is_complete()) + { + heap h(std::move(lh)); + auto metadata = get_heap_metadata(h.get_payload()); + // We need to check the chunk_id because the chunk might have been aged + // out while the heap was incomplete. + if (metadata && metadata->chunk_ptr && metadata->chunk_id >= get_head_chunk() + && !get_chunk_config().get_packet_presence_payload_size()) + { + assert(metadata->heap_index < metadata->chunk_ptr->present_size); + metadata->chunk_ptr->present[metadata->heap_index] = true; + } + } +} + const chunk_stream_state_base::heap_metadata *chunk_stream_state_base::get_heap_metadata( const memory_allocator::pointer &ptr) { return ptr.get_deleter().target(); } +chunk_manager_simple::chunk_manager_simple(const chunk_stream_config &chunk_config) +{ + if (!chunk_config.get_allocate()) + throw std::invalid_argument("chunk_config.allocate is not set"); + if (!chunk_config.get_ready()) + throw std::invalid_argument("chunk_config.ready is not set"); +} + +std::uint64_t *chunk_manager_simple::get_batch_stats(chunk_stream_state &state) const +{ + return static_cast(&state)->batch_stats.data(); +} + chunk *chunk_manager_simple::allocate_chunk(chunk_stream_state &state, std::int64_t chunk_id) { const auto &allocate = state.chunk_config.get_allocate(); @@ -208,9 +238,8 @@ chunk *chunk_manager_simple::allocate_chunk(chunk_stream_state &state, chunk *c) { - std::uint64_t *batch_stats = static_cast(&state)->batch_stats.data(); std::unique_ptr owned(c); - state.chunk_config.get_ready()(std::move(owned), batch_stats); + state.chunk_config.get_ready()(std::move(owned), get_batch_stats(state)); } template class chunk_stream_state; @@ -222,26 +251,14 @@ chunk_stream::chunk_stream( io_service_ref io_service, const stream_config &config, const chunk_stream_config &chunk_config) - : chunk_stream_state(config, chunk_config, detail::chunk_manager_simple()), + : chunk_stream_state(config, chunk_config, detail::chunk_manager_simple(chunk_config)), stream(std::move(io_service), adjust_config(config)) { } void chunk_stream::heap_ready(live_heap &&lh) { - if (lh.is_complete()) - { - heap h(std::move(lh)); - auto metadata = get_heap_metadata(h.get_payload()); - // We need to check the chunk_id because the chunk might have been aged - // out while the heap was incomplete. - if (metadata && metadata->chunk_ptr && metadata->chunk_id >= get_head_chunk() - && !get_chunk_config().get_packet_presence_payload_size()) - { - assert(metadata->heap_index < metadata->chunk_ptr->present_size); - metadata->chunk_ptr->present[metadata->heap_index] = true; - } - } + do_heap_ready(std::move(lh)); } void chunk_stream::stop_received() diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp new file mode 100644 index 000000000..ff24c1c12 --- /dev/null +++ b/src/recv_chunk_stream_group.cpp @@ -0,0 +1,162 @@ +/* Copyright 2023 National Research Foundation (SARAO) + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + +/** + * @file + */ + +#include +#include + +namespace spead2 +{ +namespace recv +{ + +chunk_stream_group_config &chunk_stream_group_config::set_max_chunks(std::size_t max_chunks) +{ + if (max_chunks == 0) + throw std::invalid_argument("max_chunks cannot be 0"); + this->max_chunks = max_chunks; + return *this; +} + +chunk_stream_group_config &chunk_stream_group_config::set_allocate(chunk_allocate_function allocate) +{ + this->allocate = std::move(allocate); + return *this; +} + +chunk_stream_group_config &chunk_stream_group_config::set_ready(chunk_ready_function ready) +{ + this->ready = std::move(ready); + return *this; +} + +namespace detail +{ + +chunk_manager_group::chunk_manager_group(chunk_stream_group &group) + : group(group) +{ +} + +std::uint64_t *chunk_manager_group::get_batch_stats(chunk_stream_state &state) const +{ + return static_cast(&state)->batch_stats.data(); +} + +chunk *chunk_manager_group::allocate_chunk( + chunk_stream_state &state, std::int64_t chunk_id) +{ + return group.get_chunk(chunk_id, state.stream_id, state.place_data->batch_stats); +} + +void chunk_manager_group::ready_chunk(chunk_stream_state &state, chunk *c) +{ + std::uint64_t *batch_stats = static_cast(&state)->batch_stats.data(); + group.release_chunk(c, batch_stats); +} + +} // namespace detail + +chunk_stream_group::chunk_stream_group(const chunk_stream_group_config &config) + : chunks(config.get_max_chunks()) +{ +} + +chunk_stream_group::~chunk_stream_group() +{ + flush_chunks(); +} + +void chunk_stream_group::flush_chunks() +{ + std::lock_guard lock(mutex); + while (chunks.get_head_chunk() != chunks.get_tail_chunk()) + chunks.flush_head([this](chunk *c) { release_chunk_unlocked(c, nullptr); }); +} + +chunk *chunk_stream_group::get_chunk(std::int64_t chunk_id, std::uintptr_t stream_id, std::uint64_t *batch_stats) +{ + std::lock_guard lock(mutex); + chunk *c = chunks.get_chunk( + chunk_id, + stream_id, + [this, batch_stats](std::int64_t id) { + return config.get_allocate()(id, batch_stats).release(); + }, + [this, batch_stats](chunk *c) { release_chunk_unlocked(c, batch_stats); } + ); + if (c) + c->ref_count++; + return c; +} + +void chunk_stream_group::release_chunk_unlocked(chunk *c, std::uint64_t *batch_stats) +{ + std::lock_guard lock(mutex); + if (--c->ref_count == 0) + { + std::unique_ptr owned(c); + config.get_ready()(std::move(owned), batch_stats); + } +} + +void chunk_stream_group::release_chunk(chunk *c, std::uint64_t *batch_stats) +{ + std::lock_guard lock(mutex); + release_chunk_unlocked(c, batch_stats); +} + + +chunk_stream_group_member::chunk_stream_group_member( + io_service_ref io_service, + const stream_config &config, + const chunk_stream_config &chunk_config, + chunk_stream_group &group) + : chunk_stream_state(config, chunk_config, detail::chunk_manager_group(group)), + stream(std::move(io_service), adjust_config(config)) +{ +} + +void chunk_stream_group_member::heap_ready(live_heap &&lh) +{ + do_heap_ready(std::move(lh)); +} + +void chunk_stream_group_member::stop_received() +{ + stream::stop_received(); + flush_chunks(); +} + +void chunk_stream_group_member::stop() +{ + { + std::lock_guard lock(queue_mutex); + flush_chunks(); + } + stream::stop(); +} + +chunk_stream_group_member::~chunk_stream_group_member() +{ + stop(); +} + +} // namespace recv +} // namespace spead2 From 3fe8ccf28caad253c13977f10a69e29ee802f46a Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Tue, 20 Jun 2023 17:27:57 +0200 Subject: [PATCH 06/74] WIP on shutdown rewrite This is an attempt to simplify the reader shutdown path. Instead of stream::stop blocking until the readers signal that their completion handlers have all run, try to make it safe for the handlers to run after the stream has shut down. To do this, the queue_mutex is moved inside an object managed by shared_ptr, so that the handlers can keep it alive even after the stream is destroyed. This does end up requiring creation and destruction of a shared_ptr for every handler invocation, which isn't ideal; perhaps there is some way to transfer it down the handler chain (custom executor?). This also means that reader no longer need a specific `stop` member function; they just stop things in their destructors, generally automatically. --- include/spead2/common_thread_pool.h | 1 - include/spead2/recv_chunk_stream.h | 2 +- include/spead2/recv_inproc.h | 9 +- include/spead2/recv_mem.h | 5 +- include/spead2/recv_reader.h | 92 +------------ include/spead2/recv_stream.h | 195 +++++++++++++++++++++++----- include/spead2/recv_tcp.h | 21 ++- include/spead2/recv_udp.h | 14 +- include/spead2/recv_udp_base.h | 3 +- include/spead2/recv_udp_ibv.h | 85 ++++++------ include/spead2/recv_udp_ibv_mprq.h | 3 +- include/spead2/recv_udp_pcap.h | 6 +- src/Makefile.am | 1 - src/recv_chunk_stream.cpp | 2 +- src/recv_chunk_stream_group.cpp | 2 +- src/recv_inproc.cpp | 45 ++----- src/recv_mem.cpp | 11 +- src/recv_reader.cpp | 53 -------- src/recv_stream.cpp | 93 +++++++------ src/recv_tcp.cpp | 53 +++----- src/recv_udp.cpp | 64 ++++----- src/recv_udp_base.cpp | 3 +- src/recv_udp_ibv.cpp | 14 +- src/recv_udp_pcap.cpp | 16 +-- 24 files changed, 353 insertions(+), 440 deletions(-) delete mode 100644 src/recv_reader.cpp diff --git a/include/spead2/common_thread_pool.h b/include/spead2/common_thread_pool.h index be6938e6d..1a03974c8 100644 --- a/include/spead2/common_thread_pool.h +++ b/include/spead2/common_thread_pool.h @@ -28,7 +28,6 @@ #include #include #include -#include namespace spead2 { diff --git a/include/spead2/recv_chunk_stream.h b/include/spead2/recv_chunk_stream.h index 878e59990..e52df9033 100644 --- a/include/spead2/recv_chunk_stream.h +++ b/include/spead2/recv_chunk_stream.h @@ -828,7 +828,7 @@ void chunk_ring_stream::stop() data_ring->stop(); // NB: NOT remove_producer as that might not break a deadlock chunk_stream::stop(); { - std::lock_guard lock(queue_mutex); + std::lock_guard lock(shared->queue_mutex); graveyard.clear(); // free chunks that didn't make it into data_ring } } diff --git a/include/spead2/recv_inproc.h b/include/spead2/recv_inproc.h index b98d2e1f6..4a05d39aa 100644 --- a/include/spead2/recv_inproc.h +++ b/include/spead2/recv_inproc.h @@ -1,4 +1,4 @@ -/* Copyright 2018 National Research Foundation (SARAO) +/* Copyright 2018, 2023 National Research Foundation (SARAO) * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -24,7 +24,6 @@ #include #include #include -#include #include namespace spead2 @@ -43,7 +42,10 @@ class inproc_reader : public reader void process_one_packet(stream_base::add_packet_state &state, const inproc_queue::packet &packet); - void packet_handler(const boost::system::error_code &error, std::size_t bytes_received); + void packet_handler( + stream_base::add_packet_state &state, + const boost::system::error_code &error, + std::size_t bytes_received); void enqueue(); public: @@ -52,7 +54,6 @@ class inproc_reader : public reader stream &owner, std::shared_ptr queue); - virtual void stop() override; virtual bool lossy() const override; }; diff --git a/include/spead2/recv_mem.h b/include/spead2/recv_mem.h index d3bb1f287..03c84d633 100644 --- a/include/spead2/recv_mem.h +++ b/include/spead2/recv_mem.h @@ -1,4 +1,4 @@ -/* Copyright 2015 National Research Foundation (SARAO) +/* Copyright 2015, 2023 National Research Foundation (SARAO) * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -22,7 +22,7 @@ #define SPEAD2_RECV_MEM_H #include -#include +#include namespace spead2 { @@ -51,7 +51,6 @@ class mem_reader : public reader mem_reader(stream &owner, const std::uint8_t *ptr, std::size_t length); - virtual void stop() override {} virtual bool lossy() const override; }; diff --git a/include/spead2/recv_reader.h b/include/spead2/recv_reader.h index a03dd7608..22259d0a8 100644 --- a/include/spead2/recv_reader.h +++ b/include/spead2/recv_reader.h @@ -1,4 +1,4 @@ -/* Copyright 2015, 2019 National Research Foundation (SARAO) +/* Copyright 2015, 2019, 2023 National Research Foundation (SARAO) * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -16,96 +16,14 @@ /** * @file + * + * This file exists purely for backwards compatibility. The code has moved to + * recv_stream.h. */ #ifndef SPEAD2_RECV_READER_H #define SPEAD2_RECV_READER_H -#include -#include -#include - -namespace spead2 -{ -namespace recv -{ - -class stream; -class stream_base; - -/** - * Abstract base class for asynchronously reading data and passing it into - * a stream. Subclasses will usually override @ref stop. - * - * The lifecycle of a reader is: - * - construction - * - @ref stop (called with @ref stream_base::queue_mutex held) - * - destruction - * - * All of the above occur with @ref stream::reader_mutex held. - * - * Once the reader has completed its work (whether because @ref stop was called or - * because of network input), it must call @ref stopped to indicate that - * it can be safely destroyed. - */ -class reader -{ -private: - stream &owner; ///< Owning stream - -protected: - /// Called by last completion handler - void stopped(); - -public: - explicit reader(stream &owner) : owner(owner) {} - virtual ~reader() = default; - - /// Retrieve the wrapped stream - stream &get_stream() const { return owner; } - - /** - * Retrieve the wrapped stream's base class. This is normally only used - * to construct a @ref stream_base::add_packet_state. - */ - stream_base &get_stream_base() const; - - /// Retrieve the @c io_service corresponding to the owner - boost::asio::io_service &get_io_service(); - - /** - * Cancel any pending asynchronous operations. This is called with the - * owner's queue_mutex and reader_mutex held. This function does not need - * to wait for completion handlers to run, but it must schedule a call to - * @ref stopped. - */ - virtual void stop() = 0; - - /** - * Whether the reader risks losing data if it is not given a chance to - * run (true by default). This is used to control whether a warning - * should be given when the consumer is applying back-pressure. - */ - virtual bool lossy() const; -}; - -/** - * Factory for creating a new reader. This is used by @ref - * stream::emplace_reader to create the reader. The default implementation - * simply chains to the constructor, but it can be overloaded in cases where - * it is desirable to select the class dynamically. - */ -template -struct reader_factory -{ - template - static std::unique_ptr make_reader(Args&&... args) - { - return std::unique_ptr(new Reader(std::forward(args)...)); - } -}; - -} // namespace recv -} // namespace spead2 +#include #endif // SPEAD2_RECV_READER_H diff --git a/include/spead2/recv_stream.h b/include/spead2/recv_stream.h index e8dd4cbc5..9306196a9 100644 --- a/include/spead2/recv_stream.h +++ b/include/spead2/recv_stream.h @@ -1,4 +1,4 @@ -/* Copyright 2015, 2017-2021 National Research Foundation (SARAO) +/* Copyright 2015, 2017-2021, 2023 National Research Foundation (SARAO) * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include @@ -53,6 +52,7 @@ namespace recv { struct packet_header; +class stream; /// Registration information about a statistic counter. class stream_stat_config @@ -456,6 +456,8 @@ class stream_config std::size_t next_stat_index() const { return stats->size(); } }; +class stream_base; + /** * Encapsulation of a SPEAD stream. Packets are fed in through @ref add_packet. * The base class does nothing with heaps; subclasses will typically override @@ -501,8 +503,8 @@ class stream_config * Avoiding deadlocks requires a careful design with several mutexes. It's * governed by the requirement that @ref heap_ready may block indefinitely, and * this must not block other functions. Thus, several mutexes are involved: - * - @ref queue_mutex: protects values only used by @ref add_packet. This - * may be locked for long periods. + * - @ref shared_state::queue_mutex: protects values only used + * by @ref add_packet. This may be locked for long periods. * - @ref stats_mutex: protects stream statistics, and is mostly locked for * writes (assuming the user is only occasionally checking the stats). * @@ -513,6 +515,7 @@ class stream_config */ class stream_base { + friend class reader; public: struct add_packet_state; @@ -561,17 +564,35 @@ class stream_base const stream_config config; protected: + struct shared_state + { + /** + * Mutex protecting the state of the queue. This includes + * - @ref queue_storage + * - @ref buckets + * - @ref head + * - @ref stopped + * + * Subclasses may use it to protect additional state. It is guaranteed to + * be locked when @ref heap_ready is called. + */ + mutable std::mutex queue_mutex; + + /** + * Pointer back to the owning stream. This is set to @c nullptr + * when the stream is stopped. + */ + stream_base *self; + + explicit shared_state(stream_base *self) : self(self) {} + }; + /** - * Mutex protecting the state of the queue. This includes - * - @ref queue_storage - * - @ref buckets - * - @ref head - * - @ref stopped - * - * Subclasses may use it to protect additional state. It is guaranteed to - * be locked when @ref heap_ready is called. + * State that is indirectly held via @c std::shared_ptr. The indirection + * allows readers to have access to the mutex in a way that won't + * unexpectedly vanish from under them. */ - mutable std::mutex queue_mutex; + std::shared_ptr shared; private: /// @ref stop_received has been called, either externally or by stream control @@ -596,15 +617,16 @@ class stream_base /** * Callback called when a heap is being ejected from the live list. - * The heap might or might not be complete. The @ref queue_mutex will be + * The heap might or might not be complete. The + * @ref shared_state::queue_mutex will be * locked during this call, which will block @ref stop and @ref flush. */ virtual void heap_ready(live_heap &&) {} - /// Implementation of @ref flush that assumes the caller has locked @ref queue_mutex + /// Implementation of @ref flush that assumes the caller has locked @ref shared_state::queue_mutex void flush_unlocked(); - /// Implementation of @ref stop that assumes the caller has locked @ref queue_mutex + /// Implementation of @ref stop that assumes the caller has locked @ref shared_state::queue_mutex void stop_unlocked(); /// Implementation of @ref add_packet_state::add_packet @@ -632,20 +654,29 @@ class stream_base * It is undefined what happens if @ref add_packet is called after a stream * is stopped. * - * This is called with @ref queue_mutex locked. Users must not call this - * function themselves; instead, call @ref stop. + * This is called with @ref shared_state::queue_mutex + * locked. Users must not call this function themselves; instead, call @ref + * stop. */ virtual void stop_received(); public: /** * State for a batch of calls to @ref add_packet. Constructing this object - * locks the stream's @ref queue_mutex. + * locks the stream's @ref shared_state::queue_mutex. + * + * After constructing this object, one *must* check whether @ref owner is + * null. If so, do not call any methods except for @ref stop and + * @ref is_stopped. + * + * While this object is alive, one must also keep alive a + * @c std::shared_ptr to the @ref shared_state. */ struct add_packet_state { - stream_base &owner; - std::lock_guard lock; ///< Holds a lock on the owner's @ref queue_mutex + /// Holds a lock on the owner's @ref shared_state::queue_mutex + std::lock_guard lock; + stream_base *owner; // Updates to the statistics std::uint64_t packets = 0; @@ -654,12 +685,13 @@ class stream_base std::uint64_t single_packet_heaps = 0; std::uint64_t search_dist = 0; - explicit add_packet_state(stream_base &owner); + explicit add_packet_state(shared_state &owner); + explicit add_packet_state(stream_base &s) : add_packet_state(*s.shared) {} ~add_packet_state(); - bool is_stopped() const { return owner.stopped; } + bool is_stopped() const { return owner == nullptr || owner->stopped; } /// Indicate that the stream has stopped (e.g. because the remote peer disconnected) - void stop() { owner.stop_unlocked(); } + void stop() { if (owner) owner->stop_unlocked(); } /** * Add a packet that was received, and which has been examined by @ref * decode_packet, and returns @c true if it is consumed. Even though @ref @@ -667,8 +699,15 @@ class stream_base * by @ref live_heap::add_packet e.g., because it is a duplicate. * * It is an error to call this after the stream has been stopped. + * + * Calling this function may cause the readers to be destroyed, + * including the reader that is calling this function. */ - bool add_packet(const packet_header &packet) { return owner.add_packet(*this, packet); } + bool add_packet(const packet_header &packet) + { + assert(!is_stopped()); + return owner->add_packet(*this, packet); + } }; /** @@ -696,6 +735,92 @@ class stream_base stream_stats get_stats() const; }; +/** + * Abstract base class for asynchronously reading data and passing it into + * a stream. Subclasses will usually override @ref stop. + * + * The lifecycle of a reader is: + * - The reader mutex is taken + * - construction + * - The queue mutex is taken + * - the stream stops + * - the reader mutex is tken + * - destruction + * - the stream is destroyed + * + * Destruction must ensure that any pending asynchronous operations are + * handled. Since destruction may happen on a separate thread to the one + * running in-flight handlers, care must be taken not to access the stream or + * the reader after the stream is stopped. In many cases this can be + * facilitated using @ref bind_handler. + */ +class reader +{ +private: + boost::asio::io_service &io_service; + std::shared_ptr owner; ///< Access to owning stream + +protected: + template + class bound_handler + { + private: + std::shared_ptr owner; + T orig; + + public: + template + bound_handler(std::shared_ptr owner, U &&orig) + : owner(std::move(owner)), orig(std::forward(orig)) + { + } + + template + void operator()(Args&&... args) + { + stream_base::add_packet_state state(*owner); + if (!state.is_stopped()) + orig(state, std::forward(args)...); + } + }; + + template + bound_handler bind_handler(T &&handler) const + { + return bound_handler::type>(owner, std::forward(handler)); + } + +public: + explicit reader(stream &owner); + virtual ~reader() = default; + + /// Retrieve the @c io_service corresponding to the owner + boost::asio::io_service &get_io_service() { return io_service; } + + /** + * Whether the reader risks losing data if it is not given a chance to + * run (true by default). This is used to control whether a warning + * should be given when the consumer is applying back-pressure. + */ + virtual bool lossy() const; +}; + +/** + * Factory for creating a new reader. This is used by @ref + * stream::emplace_reader to create the reader. The default implementation + * simply chains to the constructor, but it can be overloaded in cases where + * it is desirable to select the class dynamically. + */ +template +struct reader_factory +{ + template + static std::unique_ptr make_reader(Args&&... args) + { + return std::unique_ptr(new Reader(std::forward(args)...)); + } +}; + /** * Stream that is fed by subclasses of @ref reader. * @@ -756,13 +881,13 @@ class stream : protected stream_base /** * Add a new reader by passing its constructor arguments, excluding - * the initial @a stream argument. + * the initial @a io_service and @a owner arguments. */ template void emplace_reader(Args&&... args) { std::lock_guard lock(reader_mutex); - // See comments in stop_impl for why we do this check + // See comments in stop_received for why we do this check if (!stop_readers) { // Guarantee space before constructing the reader @@ -776,9 +901,9 @@ class stream : protected stream_base } /** - * Stop the stream and block until all the readers have wound up. After - * calling this there should be no more outstanding completion handlers - * in the thread pool. + * Stop the stream. After this returns, the io_service may still have + * outstanding completion handlers, but they should be no-ops when they're + * called. * * In most cases subclasses should override @ref stop_received rather than * this function. However, if @ref heap_ready can block indefinitely, this @@ -790,6 +915,16 @@ class stream : protected stream_base bool is_lossy() const; }; +/** + * Push packets found in a block of memory to a stream. Returns a pointer to + * after the last packet found in the stream. Processing stops as soon as + * after @ref decode_packet fails (because there is no way to find the next + * packet after a corrupt one), but packets may still be rejected by the stream. + * + * The stream is @em not stopped. + */ +const std::uint8_t *mem_to_stream(stream_base::add_packet_state &state, const std::uint8_t *ptr, std::size_t length); + /** * Push packets found in a block of memory to a stream. Returns a pointer to * after the last packet found in the stream. Processing stops as soon as diff --git a/include/spead2/recv_tcp.h b/include/spead2/recv_tcp.h index 38eb685b6..e7908fd8e 100644 --- a/include/spead2/recv_tcp.h +++ b/include/spead2/recv_tcp.h @@ -29,7 +29,6 @@ #include #include #include -#include #include #include @@ -44,10 +43,13 @@ namespace recv class tcp_reader : public reader { private: - /// The acceptor object - boost::asio::ip::tcp::acceptor acceptor; - /// TCP peer socket (i.e., the one connected to the remote end) - boost::asio::ip::tcp::socket peer; + /* The definition order is important here: the buffer must outlive the peer + * socket, so that the destructor cancels an asynchronous buffer read + * before the buffer is destroyed. + * + * Similarly, the accepter must be destroyed before the peer. + */ + /// Maximum packet size we will accept. Needed mostly for the underlying packet deserialization logic std::size_t max_size; /// Buffer for packet data reception @@ -63,15 +65,22 @@ class tcp_reader : public reader /// Number of packets to hold on each buffer for asynchronous receive static constexpr std::size_t pkts_per_buffer = 64; + /// TCP peer socket (i.e., the one connected to the remote end) + boost::asio::ip::tcp::socket peer; + /// The acceptor object + boost::asio::ip::tcp::acceptor acceptor; + /// Start an asynchronous receive void enqueue_receive(); /// Callback on completion of asynchronous accept void accept_handler( + stream_base::add_packet_state &state, const boost::system::error_code &error); /// Callback on completion of asynchronous receive void packet_handler( + stream_base::add_packet_state &state, const boost::system::error_code &error, std::size_t bytes_transferred); @@ -140,8 +149,6 @@ class tcp_reader : public reader boost::asio::ip::tcp::acceptor &&acceptor, std::size_t max_size = default_max_size); - virtual void stop() override; - virtual bool lossy() const override; }; diff --git a/include/spead2/recv_udp.h b/include/spead2/recv_udp.h index a33dc82f9..87b3f7bac 100644 --- a/include/spead2/recv_udp.h +++ b/include/spead2/recv_udp.h @@ -1,4 +1,4 @@ -/* Copyright 2015, 2020 National Research Foundation (SARAO) +/* Copyright 2015, 2020, 2023 National Research Foundation (SARAO) * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -31,7 +31,6 @@ #endif #include #include -#include #include #include @@ -46,8 +45,10 @@ namespace recv class udp_reader : public udp_reader_base { private: - /// UDP socket we are listening on - boost::asio::ip::udp::socket socket; + /* Note: declaration order is import for correct destruction + * (the stream must be closed before we start destroying buffers). + */ + /// Unused, but need to provide the memory for asio to write to boost::asio::ip::udp::endpoint endpoint; /// Maximum packet size we will accept @@ -63,12 +64,15 @@ class udp_reader : public udp_reader_base /// Buffer for asynchronous receive, of size @a max_size + 1. std::unique_ptr buffer; #endif + /// UDP socket we are listening on + boost::asio::ip::udp::socket socket; /// Start an asynchronous receive void enqueue_receive(); /// Callback on completion of asynchronous receive void packet_handler( + stream_base::add_packet_state &state, const boost::system::error_code &error, std::size_t bytes_transferred); @@ -166,8 +170,6 @@ class udp_reader : public udp_reader_base stream &owner, boost::asio::ip::udp::socket &&socket, std::size_t max_size = default_max_size); - - virtual void stop() override; }; /** diff --git a/include/spead2/recv_udp_base.h b/include/spead2/recv_udp_base.h index d5b0ef417..9c6c8716a 100644 --- a/include/spead2/recv_udp_base.h +++ b/include/spead2/recv_udp_base.h @@ -1,4 +1,4 @@ -/* Copyright 2016 National Research Foundation (SARAO) +/* Copyright 2016, 2023 National Research Foundation (SARAO) * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -23,7 +23,6 @@ #include #include -#include #include namespace spead2 diff --git a/include/spead2/recv_udp_ibv.h b/include/spead2/recv_udp_ibv.h index d7213c48f..c38c83039 100644 --- a/include/spead2/recv_udp_ibv.h +++ b/include/spead2/recv_udp_ibv.h @@ -1,4 +1,4 @@ -/* Copyright 2016, 2019-2020 National Research Foundation (SARAO) +/* Copyright 2016, 2019-2020, 2023 National Research Foundation (SARAO) * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -39,7 +39,6 @@ #include #include #include -#include #include #include @@ -113,8 +112,6 @@ class udp_ibv_reader_core : public udp_reader_base const std::size_t max_size; ///< Number of times to poll before waiting const int max_poll; - /// Signals poll-mode to stop - std::atomic stop_poll; void join_groups(const std::vector &endpoints, const boost::asio::ip::address &interface_address); @@ -128,8 +125,6 @@ class udp_ibv_reader_core : public udp_reader_base udp_ibv_reader_core( stream &owner, const udp_ibv_config &config); - - virtual void stop() override; }; /** @@ -151,8 +146,10 @@ class udp_ibv_reader_base : public udp_ibv_reader_core * If @a consume_event is true, an event should be removed and consumed * from the completion channel. */ - void packet_handler(const boost::system::error_code &error, - bool consume_event); + void packet_handler( + stream_base::add_packet_state &state, + const boost::system::error_code &error, + bool consume_event); /** * Request a callback when there is data (or as soon as possible, in @@ -164,11 +161,11 @@ class udp_ibv_reader_base : public udp_ibv_reader_core }; template -void udp_ibv_reader_base::packet_handler(const boost::system::error_code &error, - bool consume_event) +void udp_ibv_reader_base::packet_handler( + stream_base::add_packet_state &state, + const boost::system::error_code &error, + bool consume_event) { - stream_base::add_packet_state state(get_stream_base()); - bool need_poll = true; if (!error) { @@ -184,43 +181,34 @@ void udp_ibv_reader_base::packet_handler(const boost::system::error_cod static_cast(this)->recv_cq.ack_events(1); } } - if (state.is_stopped()) + for (int i = 0; i < max_poll; i++) { - log_info("UDP reader: discarding packet received after stream stopped"); - } - else - { - for (int i = 0; i < max_poll; i++) + if (comp_channel) { - if (comp_channel) + if (i == max_poll - 1) { - if (i == max_poll - 1) - { - /* We need to call req_notify_cq *before* the last - * poll_once, because notifications are edge-triggered. - * If we did it the other way around, there is a race - * where a new packet can arrive after poll_once but - * before req_notify_cq, failing to trigger a - * notification. - */ - static_cast(this)->recv_cq.req_notify(false); - need_poll = false; - } - } - else if (stop_poll.load()) - break; - poll_result result = static_cast(this)->poll_once(state); - if (result == poll_result::stopped) - break; - else if (result == poll_result::partial) - { - /* If we armed req_notify_cq but then didn't drain the CQ, and - * we get no more packets, then we won't get woken up again, so - * we need to poll again next time we go around the event loop. + /* We need to call req_notify_cq *before* the last + * poll_once, because notifications are edge-triggered. + * If we did it the other way around, there is a race + * where a new packet can arrive after poll_once but + * before req_notify_cq, failing to trigger a + * notification. */ - need_poll = true; + static_cast(this)->recv_cq.req_notify(false); + need_poll = false; } } + poll_result result = static_cast(this)->poll_once(state); + if (result == poll_result::stopped) + break; + else if (result == poll_result::partial) + { + /* If we armed req_notify_cq but then didn't drain the CQ, and + * we get no more packets, then we won't get woken up again, so + * we need to poll again next time we go around the event loop. + */ + need_poll = true; + } } } else if (error != boost::asio::error::operation_aborted) @@ -230,8 +218,6 @@ void udp_ibv_reader_base::packet_handler(const boost::system::error_cod { enqueue_receive(need_poll); } - else - stopped(); } template @@ -243,14 +229,17 @@ void udp_ibv_reader_base::enqueue_receive(bool need_poll) // Asynchronous mode comp_channel_wrapper.async_read_some( boost::asio::null_buffers(), - std::bind(&udp_ibv_reader_base::packet_handler, this, _1, true)); + bind_handler(std::bind(&udp_ibv_reader_base::packet_handler, this, _1, _2, true))); } else { // Polling mode get_io_service().post( - std::bind(&udp_ibv_reader_base::packet_handler, this, - boost::system::error_code(), false)); + bind_handler( + std::bind(&udp_ibv_reader_base::packet_handler, this, _1, + boost::system::error_code(), false) + ) + ); } } diff --git a/include/spead2/recv_udp_ibv_mprq.h b/include/spead2/recv_udp_ibv_mprq.h index 82de26cdc..74a97da18 100644 --- a/include/spead2/recv_udp_ibv_mprq.h +++ b/include/spead2/recv_udp_ibv_mprq.h @@ -1,4 +1,4 @@ -/* Copyright 2019-2020 National Research Foundation (SARAO) +/* Copyright 2019-2020, 2023 National Research Foundation (SARAO) * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include diff --git a/include/spead2/recv_udp_pcap.h b/include/spead2/recv_udp_pcap.h index 1f273e1b6..f44fa7c17 100644 --- a/include/spead2/recv_udp_pcap.h +++ b/include/spead2/recv_udp_pcap.h @@ -1,4 +1,4 @@ -/* Copyright 2016-2017 National Research Foundation (SARAO) +/* Copyright 2016-2017, 2023 National Research Foundation (SARAO) * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -28,7 +28,6 @@ #include #include #include -#include #include #include @@ -49,7 +48,7 @@ class udp_pcap_file_reader : public udp_reader_base pcap_t *handle; udp_unpacker udp_from_frame; - void run(); + void run(stream_base::add_packet_state &state); public: /** @@ -64,7 +63,6 @@ class udp_pcap_file_reader : public udp_reader_base udp_pcap_file_reader(stream &owner, const std::string &filename, const std::string &filter = ""); virtual ~udp_pcap_file_reader(); - virtual void stop() override; virtual bool lossy() const override; }; diff --git a/src/Makefile.am b/src/Makefile.am index 2690f8593..637238f9b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -82,7 +82,6 @@ libspead2_a_SOURCES = \ recv_live_heap.cpp \ recv_mem.cpp \ recv_packet.cpp \ - recv_reader.cpp \ recv_ring_stream.cpp \ recv_stream.cpp \ recv_tcp.cpp \ diff --git a/src/recv_chunk_stream.cpp b/src/recv_chunk_stream.cpp index 237f28ec8..42a7bee0d 100644 --- a/src/recv_chunk_stream.cpp +++ b/src/recv_chunk_stream.cpp @@ -270,7 +270,7 @@ void chunk_stream::stop_received() void chunk_stream::stop() { { - std::lock_guard lock(queue_mutex); + std::lock_guard lock(shared->queue_mutex); flush_chunks(); } stream::stop(); diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index ff24c1c12..2feaee90e 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -147,7 +147,7 @@ void chunk_stream_group_member::stop_received() void chunk_stream_group_member::stop() { { - std::lock_guard lock(queue_mutex); + std::lock_guard lock(shared->queue_mutex); flush_chunks(); } stream::stop(); diff --git a/src/recv_inproc.cpp b/src/recv_inproc.cpp index 3a9a3ddee..ed1306eac 100644 --- a/src/recv_inproc.cpp +++ b/src/recv_inproc.cpp @@ -1,4 +1,4 @@ -/* Copyright 2018-2019 National Research Foundation (SARAO) +/* Copyright 2018-2019, 2023 National Research Foundation (SARAO) * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -24,7 +24,7 @@ #include #include #include -#include +#include namespace spead2 { @@ -58,32 +58,25 @@ void inproc_reader::process_one_packet(stream_base::add_packet_state &state, } void inproc_reader::packet_handler( + stream_base::add_packet_state &state, const boost::system::error_code &error, std::size_t bytes_transferred) { - stream_base::add_packet_state state(get_stream_base()); if (!error) { - if (state.is_stopped()) + try { - log_info("inproc reader: discarding packet received after stream stopped"); + inproc_queue::packet packet = queue->buffer.try_pop(); + process_one_packet(state, packet); + /* TODO: could grab a batch of packets to amortise costs */ } - else + catch (ringbuffer_stopped &) { - try - { - inproc_queue::packet packet = queue->buffer.try_pop(); - process_one_packet(state, packet); - /* TODO: could grab a batch of packets to amortise costs */ - } - catch (ringbuffer_stopped &) - { - state.stop(); - } - catch (ringbuffer_empty &) - { - // spurious wakeup - no action needed - } + state.stop(); + } + catch (ringbuffer_empty &) + { + // spurious wakeup - no action needed } } else if (error != boost::asio::error::operation_aborted) @@ -91,11 +84,6 @@ void inproc_reader::packet_handler( if (!state.is_stopped()) enqueue(); - else - { - data_sem_wrapper.close(); - stopped(); - } } void inproc_reader::enqueue() @@ -103,12 +91,7 @@ void inproc_reader::enqueue() using namespace std::placeholders; data_sem_wrapper.async_read_some( boost::asio::null_buffers(), - std::bind(&inproc_reader::packet_handler, this, _1, _2)); -} - -void inproc_reader::stop() -{ - data_sem_wrapper.close(); + bind_handler(std::bind(&inproc_reader::packet_handler, this, _1, _2, _3))); } bool inproc_reader::lossy() const diff --git a/src/recv_mem.cpp b/src/recv_mem.cpp index ccc90c1e0..bdf26e4b9 100644 --- a/src/recv_mem.cpp +++ b/src/recv_mem.cpp @@ -1,4 +1,4 @@ -/* Copyright 2015, 2019 National Research Foundation (SARAO) +/* Copyright 2015, 2019, 2023 National Research Foundation (SARAO) * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -20,7 +20,6 @@ #include #include -#include #include #include @@ -35,13 +34,11 @@ mem_reader::mem_reader( : reader(owner), ptr(ptr), length(length) { assert(ptr != nullptr); - get_io_service().post([this] { - mem_to_stream(get_stream_base(), this->ptr, this->length); + get_io_service().post(bind_handler([this] (stream_base::add_packet_state &state) { + mem_to_stream(state, this->ptr, this->length); // There will be no more data, so we can stop the stream immediately. - stream_base::add_packet_state state(get_stream_base()); state.stop(); - stopped(); - }); + })); } bool mem_reader::lossy() const diff --git a/src/recv_reader.cpp b/src/recv_reader.cpp deleted file mode 100644 index b2f8e9da3..000000000 --- a/src/recv_reader.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* Copyright 2015, 2019 National Research Foundation (SARAO) - * - * This program is free software: you can redistribute it and/or modify it under - * the terms of the GNU Lesser General Public License as published by the Free - * Software Foundation, either version 3 of the License, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see . - */ - -/** - * @file - */ - -#include -#include - -namespace spead2 -{ -namespace recv -{ - -void reader::stopped() -{ - // Schedule it to run later so that at the time it occurs there are no - // further references to *this. - stream *owner_ptr = &owner; - get_io_service().post([owner_ptr] { owner_ptr->readers_stopped.put(); }); -} - -bool reader::lossy() const -{ - return true; -} - -boost::asio::io_service &reader::get_io_service() -{ - return owner.get_io_service(); -} - -stream_base &reader::get_stream_base() const -{ - return owner; -} - -} // namespace recv -} // namespace spead2 diff --git a/src/recv_stream.cpp b/src/recv_stream.cpp index 416f19aee..98d16e80d 100644 --- a/src/recv_stream.cpp +++ b/src/recv_stream.cpp @@ -1,4 +1,4 @@ -/* Copyright 2015, 2017-2021 National Research Foundation (SARAO) +/* Copyright 2015, 2017-2021, 2023 National Research Foundation (SARAO) * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -381,6 +381,7 @@ stream_base::stream_base(const stream_config &config) substreams(new substream[config.get_substreams() + 1]), substream_div(config.get_substreams()), config(config), + shared(std::make_shared(this)), stats(config.get_stats().size()), batch_stats(config.get_stats().size()) { @@ -439,35 +440,36 @@ void stream_base::unlink_entry(queue_entry *entry) entry->next = INVALID_ENTRY; } -stream_base::add_packet_state::add_packet_state(stream_base &owner) - : owner(owner), lock(owner.queue_mutex) +stream_base::add_packet_state::add_packet_state(shared_state &owner) + : lock(owner.queue_mutex), owner(owner.self) { - std::fill(owner.batch_stats.begin(), owner.batch_stats.end(), 0); + if (this->owner) + std::fill(this->owner->batch_stats.begin(), this->owner->batch_stats.end(), 0); } stream_base::add_packet_state::~add_packet_state() { - if (!packets && is_stopped()) + if (!owner || (!packets && is_stopped())) return; // Stream was stopped before we could do anything - don't count as a batch - std::lock_guard stats_lock(owner.stats_mutex); + std::lock_guard stats_lock(owner->stats_mutex); // The built-in stats are updated directly; batch_stats is not used - owner.stats[stream_stat_indices::packets] += packets; - owner.stats[stream_stat_indices::batches]++; - owner.stats[stream_stat_indices::heaps] += complete_heaps + incomplete_heaps_evicted; - owner.stats[stream_stat_indices::incomplete_heaps_evicted] += incomplete_heaps_evicted; - owner.stats[stream_stat_indices::single_packet_heaps] += single_packet_heaps; - owner.stats[stream_stat_indices::search_dist] += search_dist; - auto &owner_max_batch = owner.stats[stream_stat_indices::max_batch]; + owner->stats[stream_stat_indices::packets] += packets; + owner->stats[stream_stat_indices::batches]++; + owner->stats[stream_stat_indices::heaps] += complete_heaps + incomplete_heaps_evicted; + owner->stats[stream_stat_indices::incomplete_heaps_evicted] += incomplete_heaps_evicted; + owner->stats[stream_stat_indices::single_packet_heaps] += single_packet_heaps; + owner->stats[stream_stat_indices::search_dist] += search_dist; + auto &owner_max_batch = owner->stats[stream_stat_indices::max_batch]; owner_max_batch = std::max(owner_max_batch, packets); // Update custom statistics - const auto &stats_config = owner.get_config().get_stats(); + const auto &stats_config = owner->get_config().get_stats(); for (std::size_t i = stream_stat_indices::custom; i < stats_config.size(); i++) - owner.stats[i] = stats_config[i].combine(owner.stats[i], owner.batch_stats[i]); + owner->stats[i] = stats_config[i].combine(owner->stats[i], owner->batch_stats[i]); } bool stream_base::add_packet(add_packet_state &state, const packet_header &packet) { - const stream_config &config = state.owner.get_config(); + const stream_config &config = state.owner->get_config(); assert(!stopped); state.packets++; if (packet.heap_length < 0 && !config.get_allow_unsized_heaps()) @@ -585,7 +587,7 @@ void stream_base::flush_unlocked() void stream_base::flush() { - std::lock_guard lock(queue_mutex); + std::lock_guard lock(shared->queue_mutex); flush_unlocked(); } @@ -597,7 +599,7 @@ void stream_base::stop_unlocked() void stream_base::stop() { - std::lock_guard lock(queue_mutex); + std::lock_guard lock(shared->queue_mutex); stop_unlocked(); } @@ -605,6 +607,7 @@ void stream_base::stop_received() { assert(!stopped); stopped = true; + shared->self = nullptr; flush_unlocked(); } @@ -616,6 +619,17 @@ stream_stats stream_base::get_stats() const } +reader::reader(stream &owner) + : io_service(owner.get_io_service()), owner(owner.shared) +{ +} + +bool reader::lossy() const +{ + return true; +} + + stream::stream(io_service_ref io_service, const stream_config &config) : stream_base(config), thread_pool_holder(std::move(io_service).get_shared_thread_pool()), @@ -627,38 +641,18 @@ void stream::stop_received() { stream_base::stop_received(); std::lock_guard lock(reader_mutex); - for (const auto &reader : readers) - reader->stop(); + readers.clear(); + /* This ensures that once we clear out the readers, any future call to + * emplace_reader will silently be ignored. This avoids issues if there + * is a race between the user calling emplace_reader and a stop packet + * in the stream. + */ + stop_readers = true; } void stream::stop_impl() { stream_base::stop(); - - std::size_t n_readers; - { - std::lock_guard lock(reader_mutex); - /* Prevent any further calls to emplace_reader from doing anything, so - * that n_readers will remain accurate. - */ - stop_readers = true; - n_readers = readers.size(); - } - - // Wait until all readers have wound up all their completion handlers - while (n_readers > 0) - { - semaphore_get(readers_stopped); - n_readers--; - } - - { - /* This lock is not strictly needed since no other thread can touch - * readers any more, but is harmless. - */ - std::lock_guard lock(reader_mutex); - readers.clear(); - } } void stream::stop() @@ -678,9 +672,8 @@ stream::~stream() } -const std::uint8_t *mem_to_stream(stream_base &s, const std::uint8_t *ptr, std::size_t length) +const std::uint8_t *mem_to_stream(stream_base::add_packet_state &state, const std::uint8_t *ptr, std::size_t length) { - stream_base::add_packet_state state(s); while (length > 0 && !state.is_stopped()) { packet_header packet; @@ -697,5 +690,11 @@ const std::uint8_t *mem_to_stream(stream_base &s, const std::uint8_t *ptr, std:: return ptr; } +const std::uint8_t *mem_to_stream(stream_base &s, const std::uint8_t *ptr, std::size_t length) +{ + stream_base::add_packet_state state(s); + return mem_to_stream(state, ptr, length); +} + } // namespace recv } // namespace spead2 diff --git a/src/recv_tcp.cpp b/src/recv_tcp.cpp index 50fe64d0c..41e435ef1 100644 --- a/src/recv_tcp.cpp +++ b/src/recv_tcp.cpp @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include @@ -50,17 +50,25 @@ tcp_reader::tcp_reader( boost::asio::ip::tcp::acceptor &&acceptor, std::size_t max_size, std::size_t buffer_size) - : reader(owner), acceptor(std::move(acceptor)), - peer(get_socket_io_service(this->acceptor)), + : reader(owner), max_size(max_size), buffer(new std::uint8_t[max_size * pkts_per_buffer]), head(buffer.get()), - tail(buffer.get()) + tail(buffer.get()), + peer(get_socket_io_service(acceptor)), + acceptor(std::move(acceptor)) { assert(socket_uses_io_service(this->acceptor, get_io_service())); set_socket_recv_buffer_size(this->acceptor, buffer_size); - this->acceptor.async_accept(peer, - std::bind(&tcp_reader::accept_handler, this, std::placeholders::_1)); + /* We need to hold the stream's queue_mutex, because that guards access + * to the sockets. This is a heavy-weight way to do it, but since it + * only happens once per connection it is probably not worth trying to + * add a lighter-weight interface to stream. + */ + using namespace std::placeholders; + this->acceptor.async_accept( + peer, + bind_handler(std::bind(&tcp_reader::accept_handler, this, _1, _2))); } tcp_reader::tcp_reader( @@ -84,11 +92,10 @@ tcp_reader::tcp_reader( } void tcp_reader::packet_handler( + stream_base::add_packet_state &state, const boost::system::error_code &error, std::size_t bytes_transferred) { - stream_base::add_packet_state state(get_stream_base()); - bool read_more = false; if (!error) { @@ -107,11 +114,6 @@ void tcp_reader::packet_handler( if (read_more) enqueue_receive(); - else - { - peer.close(); - stopped(); - } } bool tcp_reader::parse_packet(stream_base::add_packet_state &state) @@ -214,15 +216,8 @@ bool tcp_reader::skip_bytes() return to_skip > 0; } -void tcp_reader::accept_handler(const boost::system::error_code &error) +void tcp_reader::accept_handler(stream_base::add_packet_state &state, const boost::system::error_code &error) { - /* We need to hold the stream's queue_mutex, because that guards access - * to the sockets. This is a heavy-weight way to do it, but since it - * only happens once per connection it is probably not worth trying to - * add a lighter-weight interface to @c stream. - */ - stream_base::add_packet_state state(get_stream_base()); - acceptor.close(); if (!error) enqueue_receive(); @@ -230,7 +225,6 @@ void tcp_reader::accept_handler(const boost::system::error_code &error) { if (error != boost::asio::error::operation_aborted) log_warning("Error in TCP accept: %1%", error.message()); - stopped(); } } @@ -254,20 +248,7 @@ void tcp_reader::enqueue_receive() peer.async_receive( boost::asio::buffer(tail, bufsize - (tail - buf)), - std::bind(&tcp_reader::packet_handler, this, _1, _2)); -} - -void tcp_reader::stop() -{ - /* asio guarantees that closing a socket will cancel any pending - * operations on it. - * Don't put any logging here: it could be running in a shutdown - * path where it is no longer safe to do so. - */ - if (peer.is_open()) - peer.close(); - if (acceptor.is_open()) - acceptor.close(); + bind_handler(std::bind(&tcp_reader::packet_handler, this, _1, _2, _3))); } bool tcp_reader::lossy() const diff --git a/src/recv_udp.cpp b/src/recv_udp.cpp index 473058fb1..d4c588627 100644 --- a/src/recv_udp.cpp +++ b/src/recv_udp.cpp @@ -1,4 +1,4 @@ -/* Copyright 2015, 2019-2020 National Research Foundation (SARAO) +/* Copyright 2015, 2019-2020, 2023 National Research Foundation (SARAO) * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include #include @@ -63,12 +63,13 @@ udp_reader::udp_reader( stream &owner, boost::asio::ip::udp::socket &&socket, std::size_t max_size) - : udp_reader_base(owner), socket(std::move(socket)), max_size(max_size), + : udp_reader_base(owner), max_size(max_size), #if SPEAD2_USE_RECVMMSG - buffer(mmsg_count), iov(mmsg_count), msgvec(mmsg_count) + buffer(mmsg_count), iov(mmsg_count), msgvec(mmsg_count), #else - buffer(new std::uint8_t[max_size + 1]) + buffer(new std::uint8_t[max_size + 1]), #endif + socket(std::move(socket)) { assert(socket_uses_io_service(this->socket, get_io_service())); #if SPEAD2_USE_RECVMMSG @@ -182,38 +183,31 @@ udp_reader::udp_reader( } void udp_reader::packet_handler( + stream_base::add_packet_state &state, const boost::system::error_code &error, std::size_t bytes_transferred) { - stream_base::add_packet_state state(get_stream_base()); if (!error) { - if (state.is_stopped()) +#if SPEAD2_USE_RECVMMSG + int received = recvmmsg(socket.native_handle(), msgvec.data(), msgvec.size(), + MSG_DONTWAIT, nullptr); + log_debug("recvmmsg returned %1%", received); + if (received == -1 && errno != EAGAIN && errno != EWOULDBLOCK) { - log_info("UDP reader: discarding packet received after stream stopped"); + std::error_code code(errno, std::system_category()); + log_warning("recvmmsg failed: %1% (%2%)", code.value(), code.message()); } - else + for (int i = 0; i < received; i++) { -#if SPEAD2_USE_RECVMMSG - int received = recvmmsg(socket.native_handle(), msgvec.data(), msgvec.size(), - MSG_DONTWAIT, nullptr); - log_debug("recvmmsg returned %1%", received); - if (received == -1 && errno != EAGAIN && errno != EWOULDBLOCK) - { - std::error_code code(errno, std::system_category()); - log_warning("recvmmsg failed: %1% (%2%)", code.value(), code.message()); - } - for (int i = 0; i < received; i++) - { - bool stopped = process_one_packet(state, - buffer[i].get(), msgvec[i].msg_len, max_size); - if (stopped) - break; - } + bool stopped = process_one_packet(state, + buffer[i].get(), msgvec[i].msg_len, max_size); + if (stopped) + break; + } #else - process_one_packet(state, buffer.get(), bytes_transferred, max_size); + process_one_packet(state, buffer.get(), bytes_transferred, max_size); #endif - } } else if (error != boost::asio::error::operation_aborted) log_warning("Error in UDP receiver: %1%", error.message()); @@ -222,10 +216,6 @@ void udp_reader::packet_handler( { enqueue_receive(); } - else - { - stopped(); - } } void udp_reader::enqueue_receive() @@ -238,17 +228,7 @@ void udp_reader::enqueue_receive() boost::asio::buffer(buffer.get(), max_size + 1), #endif endpoint, - std::bind(&udp_reader::packet_handler, this, _1, _2)); -} - -void udp_reader::stop() -{ - /* asio guarantees that closing a socket will cancel any pending - * operations on it. - * Don't put any logging here: it could be running in a shutdown - * path where it is no longer safe to do so. - */ - socket.close(); + bind_handler(std::bind(&udp_reader::packet_handler, this, _1, _2, _3))); } ///////////////////////////////////////////////////////////////////////////// diff --git a/src/recv_udp_base.cpp b/src/recv_udp_base.cpp index f848a0296..913d3424a 100644 --- a/src/recv_udp_base.cpp +++ b/src/recv_udp_base.cpp @@ -1,4 +1,4 @@ -/* Copyright 2016, 2019 National Research Foundation (SARAO) +/* Copyright 2016, 2019, 2023 National Research Foundation (SARAO) * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -20,7 +20,6 @@ #include #include -#include #include #include #include diff --git a/src/recv_udp_ibv.cpp b/src/recv_udp_ibv.cpp index 58e9e08f0..13d0ee22b 100644 --- a/src/recv_udp_ibv.cpp +++ b/src/recv_udp_ibv.cpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 National Research Foundation (SARAO) +/* Copyright 2016-2020, 2023 National Research Foundation (SARAO) * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -89,8 +88,7 @@ udp_ibv_reader_core::udp_ibv_reader_core( event_channel(nullptr), comp_channel_wrapper(owner.get_io_service()), max_size(config.get_max_size()), - max_poll(config.get_max_poll()), - stop_poll(false) + max_poll(config.get_max_poll()) { if (config.get_endpoints().empty()) throw std::invalid_argument("endpoints is empty"); @@ -120,14 +118,6 @@ void udp_ibv_reader_core::join_groups( } } -void udp_ibv_reader_core::stop() -{ - if (comp_channel) - comp_channel_wrapper.close(); - else - stop_poll = true; -} - } // namespace detail static std::size_t compute_n_slots(const rdma_cm_id_t &cm_id, std::size_t buffer_size, diff --git a/src/recv_udp_pcap.cpp b/src/recv_udp_pcap.cpp index 4b292e187..ed9076800 100644 --- a/src/recv_udp_pcap.cpp +++ b/src/recv_udp_pcap.cpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2017, 2019 National Research Foundation (SARAO) +/* Copyright 2016-2017, 2019, 2023 National Research Foundation (SARAO) * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -44,11 +43,10 @@ namespace spead2 namespace recv { -void udp_pcap_file_reader::run() +void udp_pcap_file_reader::run(stream_base::add_packet_state &state) { const int BATCH = 64; // maximum number of packets to process in one go - spead2::recv::stream_base::add_packet_state state(get_stream_base()); for (int pass = 0; pass < BATCH; pass++) { if (state.is_stopped()) @@ -93,9 +91,7 @@ void udp_pcap_file_reader::run() } // Run ourselves again if (!state.is_stopped()) - get_io_service().post([this] { run(); }); - else - stopped(); + get_io_service().post(bind_handler([this] (stream_base::add_packet_state &state) { run(state); })); } udp_pcap_file_reader::udp_pcap_file_reader(stream &owner, const std::string &filename, const std::string &user_filter) @@ -130,7 +126,7 @@ udp_pcap_file_reader::udp_pcap_file_reader(stream &owner, const std::string &fil udp_from_frame = (linktype == DLT_EN10MB) ? udp_from_ethernet : udp_from_linux_sll; // Process the file - get_io_service().post([this] { run(); }); + get_io_service().post(bind_handler([this] (stream_base::add_packet_state &state) { run(state); })); } udp_pcap_file_reader::~udp_pcap_file_reader() @@ -139,10 +135,6 @@ udp_pcap_file_reader::~udp_pcap_file_reader() pcap_close(handle); } -void udp_pcap_file_reader::stop() -{ -} - bool udp_pcap_file_reader::lossy() const { return false; From 5befeaa7e592a02b31e95a84b5cffae6f921e638 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Wed, 21 Jun 2023 14:49:46 +0200 Subject: [PATCH 07/74] Eliminate some std::shared_ptr refcount twiddling Readers typically have a chain of completion handlers, each of which schedules the next. Previously, each completion handler would create a new copy of the std::shared_ptr, which would cause atomic refcount twiddling potentially for every packet. Instead, encapsulate the shared_ptr in an opaque `handler_context` structure which gets passed along the chain. The `handler_context` is marked as non-copyable, to ensure that we don't accidentally inc and dec the ref-count unnecessarily. This revealed that using the legacy `io_context::post` function doesn't support non-copyable completion handlers, and we had to use `boost::asio::post` instead. That means Boost 1.66 will be the minimum version. --- include/spead2/recv_inproc.h | 3 +- include/spead2/recv_stream.h | 79 ++++++++++++++++++++++++++++++---- include/spead2/recv_tcp.h | 4 +- include/spead2/recv_udp.h | 3 +- include/spead2/recv_udp_ibv.h | 18 +++++--- include/spead2/recv_udp_pcap.h | 2 +- src/recv_inproc.cpp | 9 ++-- src/recv_mem.cpp | 13 +++--- src/recv_tcp.cpp | 13 +++--- src/recv_udp.cpp | 9 ++-- src/recv_udp_ibv.cpp | 2 +- src/recv_udp_ibv_mprq.cpp | 2 +- src/recv_udp_pcap.cpp | 11 +++-- 13 files changed, 126 insertions(+), 42 deletions(-) diff --git a/include/spead2/recv_inproc.h b/include/spead2/recv_inproc.h index 4a05d39aa..a44af7ae1 100644 --- a/include/spead2/recv_inproc.h +++ b/include/spead2/recv_inproc.h @@ -43,10 +43,11 @@ class inproc_reader : public reader void process_one_packet(stream_base::add_packet_state &state, const inproc_queue::packet &packet); void packet_handler( + handler_context ctx, stream_base::add_packet_state &state, const boost::system::error_code &error, std::size_t bytes_received); - void enqueue(); + void enqueue(handler_context ctx); public: /// Constructor. diff --git a/include/spead2/recv_stream.h b/include/spead2/recv_stream.h index 9306196a9..c72e0e0e1 100644 --- a/include/spead2/recv_stream.h +++ b/include/spead2/recv_stream.h @@ -752,7 +752,9 @@ class stream_base * handled. Since destruction may happen on a separate thread to the one * running in-flight handlers, care must be taken not to access the stream or * the reader after the stream is stopped. In many cases this can be - * facilitated using @ref bind_handler. + * facilitated using @ref bind_handler, although it is still important to + * re-check whether the stream has stopped after calling + * @ref stream_base::add_packet_state::add_packet. */ class reader { @@ -761,33 +763,94 @@ class reader std::shared_ptr owner; ///< Access to owning stream protected: + class handler_context + { + friend class reader; + private: + std::shared_ptr owner; + + public: + explicit handler_context(std::shared_ptr owner) + : owner(std::move(owner)) + { + assert(this->owner); + } + + // Whether the context is still valid + explicit operator bool() const noexcept { return bool(owner); } + bool operator!() const noexcept { return !owner; } + + /* Prevent copy construction and assignment. They're perfectly safe, + * but potentially slow (atomic reference count manipulation) so + * they're disabled to prevent them being used by accident. + */ + handler_context(handler_context &) = delete; + handler_context &operator=(handler_context &) = delete; + handler_context(handler_context &&) = default; + handler_context &operator=(handler_context &&) = default; + }; + template class bound_handler { private: - std::shared_ptr owner; + handler_context ctx; T orig; public: template - bound_handler(std::shared_ptr owner, U &&orig) - : owner(std::move(owner)), orig(std::forward(orig)) + bound_handler(handler_context ctx, U &&orig) + : ctx(std::move(ctx)), orig(std::forward(orig)) { } template void operator()(Args&&... args) { - stream_base::add_packet_state state(*owner); + // Note: because we give away our shared pointer, this can only be + // called once. Fortunately, asio makes that guarantee. + assert(ctx); + stream_base::add_packet_state state(*ctx.owner); if (!state.is_stopped()) - orig(state, std::forward(args)...); + orig(std::move(ctx), state, std::forward(args)...); } }; + handler_context make_handler_context() const + { + return handler_context(owner); + } + + /** + * Wrap a function object to manage locking and lifetime. This is intended + * to be used to bind a completion handler. The wrapper handler is called + * with extra arguments prefixed, so it should have the signature + * void handler(handler_context ctx, stream_base::add_packet_state &state, ...); + * + * The @ref handler_context can be passed (by rvalue + * reference) to a single call to @ref bind_handler, which is cheaper + * than the overload that doesn't take it (it avoids manipulating reference + * counts on a @c std::shared_ptr). + * + * At the time the wrapped handler is invoked, the stream is guaranteed to still + * exist and not yet have been stopped. After calling + * @ref stream_base::add_packet_state::add_packet one must again check whether + * the stream has been stopped, as this can cause the reader to be destroyed. + */ + template + bound_handler::type> bind_handler(T &&handler) const + { + return bind_handler(make_handler_context(), std::forward(handler)); + } + + /** + * Overload that takes an existing @ref handler_context. + */ template - bound_handler bind_handler(T &&handler) const + bound_handler::type> bind_handler(handler_context ctx, T &&handler) const { - return bound_handler::type>(owner, std::forward(handler)); + assert(ctx); // make sure it hasn't already been used + return bound_handler::type>(std::move(ctx), std::forward(handler)); } public: diff --git a/include/spead2/recv_tcp.h b/include/spead2/recv_tcp.h index e7908fd8e..b1167504c 100644 --- a/include/spead2/recv_tcp.h +++ b/include/spead2/recv_tcp.h @@ -71,15 +71,17 @@ class tcp_reader : public reader boost::asio::ip::tcp::acceptor acceptor; /// Start an asynchronous receive - void enqueue_receive(); + void enqueue_receive(handler_context ctx); /// Callback on completion of asynchronous accept void accept_handler( + handler_context ctx, stream_base::add_packet_state &state, const boost::system::error_code &error); /// Callback on completion of asynchronous receive void packet_handler( + handler_context ctx, stream_base::add_packet_state &state, const boost::system::error_code &error, std::size_t bytes_transferred); diff --git a/include/spead2/recv_udp.h b/include/spead2/recv_udp.h index 87b3f7bac..c9164bee1 100644 --- a/include/spead2/recv_udp.h +++ b/include/spead2/recv_udp.h @@ -68,10 +68,11 @@ class udp_reader : public udp_reader_base boost::asio::ip::udp::socket socket; /// Start an asynchronous receive - void enqueue_receive(); + void enqueue_receive(handler_context ctx); /// Callback on completion of asynchronous receive void packet_handler( + handler_context ctx, stream_base::add_packet_state &state, const boost::system::error_code &error, std::size_t bytes_transferred); diff --git a/include/spead2/recv_udp_ibv.h b/include/spead2/recv_udp_ibv.h index c38c83039..91b5b88c7 100644 --- a/include/spead2/recv_udp_ibv.h +++ b/include/spead2/recv_udp_ibv.h @@ -147,6 +147,7 @@ class udp_ibv_reader_base : public udp_ibv_reader_core * from the completion channel. */ void packet_handler( + handler_context ctx, stream_base::add_packet_state &state, const boost::system::error_code &error, bool consume_event); @@ -155,13 +156,14 @@ class udp_ibv_reader_base : public udp_ibv_reader_core * Request a callback when there is data (or as soon as possible, in * polling mode or when @a need_poll is true). */ - void enqueue_receive(bool needs_poll); + void enqueue_receive(handler_context ctx, bool needs_poll); using udp_ibv_reader_core::udp_ibv_reader_core; }; template void udp_ibv_reader_base::packet_handler( + handler_context ctx, stream_base::add_packet_state &state, const boost::system::error_code &error, bool consume_event) @@ -216,12 +218,12 @@ void udp_ibv_reader_base::packet_handler( if (!state.is_stopped()) { - enqueue_receive(need_poll); + enqueue_receive(std::move(ctx), need_poll); } } template -void udp_ibv_reader_base::enqueue_receive(bool need_poll) +void udp_ibv_reader_base::enqueue_receive(handler_context ctx, bool need_poll) { using namespace std::placeholders; if (comp_channel && !need_poll) @@ -229,14 +231,18 @@ void udp_ibv_reader_base::enqueue_receive(bool need_poll) // Asynchronous mode comp_channel_wrapper.async_read_some( boost::asio::null_buffers(), - bind_handler(std::bind(&udp_ibv_reader_base::packet_handler, this, _1, _2, true))); + bind_handler( + std::move(ctx), + std::bind(&udp_ibv_reader_base::packet_handler, this, _1, _2, _3, true))); } else { // Polling mode - get_io_service().post( + boost::asio::post( + get_io_service(), bind_handler( - std::bind(&udp_ibv_reader_base::packet_handler, this, _1, + std::move(ctx), + std::bind(&udp_ibv_reader_base::packet_handler, this, _1, _2, boost::system::error_code(), false) ) ); diff --git a/include/spead2/recv_udp_pcap.h b/include/spead2/recv_udp_pcap.h index f44fa7c17..1d7071af8 100644 --- a/include/spead2/recv_udp_pcap.h +++ b/include/spead2/recv_udp_pcap.h @@ -48,7 +48,7 @@ class udp_pcap_file_reader : public udp_reader_base pcap_t *handle; udp_unpacker udp_from_frame; - void run(stream_base::add_packet_state &state); + void run(handler_context ctx, stream_base::add_packet_state &state); public: /** diff --git a/src/recv_inproc.cpp b/src/recv_inproc.cpp index ed1306eac..6a64733e7 100644 --- a/src/recv_inproc.cpp +++ b/src/recv_inproc.cpp @@ -39,7 +39,7 @@ inproc_reader::inproc_reader( data_sem_wrapper(wrap_fd(owner.get_io_service(), this->queue->buffer.get_data_sem().get_fd())) { - enqueue(); + enqueue(make_handler_context()); } void inproc_reader::process_one_packet(stream_base::add_packet_state &state, @@ -58,6 +58,7 @@ void inproc_reader::process_one_packet(stream_base::add_packet_state &state, } void inproc_reader::packet_handler( + handler_context ctx, stream_base::add_packet_state &state, const boost::system::error_code &error, std::size_t bytes_transferred) @@ -83,15 +84,15 @@ void inproc_reader::packet_handler( log_warning("Error in inproc receiver: %1%", error.message()); if (!state.is_stopped()) - enqueue(); + enqueue(std::move(ctx)); } -void inproc_reader::enqueue() +void inproc_reader::enqueue(handler_context ctx) { using namespace std::placeholders; data_sem_wrapper.async_read_some( boost::asio::null_buffers(), - bind_handler(std::bind(&inproc_reader::packet_handler, this, _1, _2, _3))); + bind_handler(std::move(ctx), std::bind(&inproc_reader::packet_handler, this, _1, _2, _3, _4))); } bool inproc_reader::lossy() const diff --git a/src/recv_mem.cpp b/src/recv_mem.cpp index bdf26e4b9..3b69c4252 100644 --- a/src/recv_mem.cpp +++ b/src/recv_mem.cpp @@ -34,11 +34,14 @@ mem_reader::mem_reader( : reader(owner), ptr(ptr), length(length) { assert(ptr != nullptr); - get_io_service().post(bind_handler([this] (stream_base::add_packet_state &state) { - mem_to_stream(state, this->ptr, this->length); - // There will be no more data, so we can stop the stream immediately. - state.stop(); - })); + boost::asio::post( + get_io_service(), + bind_handler([this] (handler_context ctx, stream_base::add_packet_state &state) { + mem_to_stream(state, this->ptr, this->length); + // There will be no more data, so we can stop the stream immediately. + state.stop(); + }) + ); } bool mem_reader::lossy() const diff --git a/src/recv_tcp.cpp b/src/recv_tcp.cpp index 41e435ef1..869304f0d 100644 --- a/src/recv_tcp.cpp +++ b/src/recv_tcp.cpp @@ -68,7 +68,7 @@ tcp_reader::tcp_reader( using namespace std::placeholders; this->acceptor.async_accept( peer, - bind_handler(std::bind(&tcp_reader::accept_handler, this, _1, _2))); + bind_handler(std::bind(&tcp_reader::accept_handler, this, _1, _2, _3))); } tcp_reader::tcp_reader( @@ -92,6 +92,7 @@ tcp_reader::tcp_reader( } void tcp_reader::packet_handler( + handler_context ctx, stream_base::add_packet_state &state, const boost::system::error_code &error, std::size_t bytes_transferred) @@ -113,7 +114,7 @@ void tcp_reader::packet_handler( log_warning("Error in TCP receiver: %1%", error.message()); if (read_more) - enqueue_receive(); + enqueue_receive(std::move(ctx)); } bool tcp_reader::parse_packet(stream_base::add_packet_state &state) @@ -216,11 +217,11 @@ bool tcp_reader::skip_bytes() return to_skip > 0; } -void tcp_reader::accept_handler(stream_base::add_packet_state &state, const boost::system::error_code &error) +void tcp_reader::accept_handler(handler_context ctx, stream_base::add_packet_state &state, const boost::system::error_code &error) { acceptor.close(); if (!error) - enqueue_receive(); + enqueue_receive(std::move(ctx)); else { if (error != boost::asio::error::operation_aborted) @@ -228,7 +229,7 @@ void tcp_reader::accept_handler(stream_base::add_packet_state &state, const boos } } -void tcp_reader::enqueue_receive() +void tcp_reader::enqueue_receive(handler_context ctx) { using namespace std::placeholders; @@ -248,7 +249,7 @@ void tcp_reader::enqueue_receive() peer.async_receive( boost::asio::buffer(tail, bufsize - (tail - buf)), - bind_handler(std::bind(&tcp_reader::packet_handler, this, _1, _2, _3))); + bind_handler(std::move(ctx), std::bind(&tcp_reader::packet_handler, this, _1, _2, _3, _4))); } bool tcp_reader::lossy() const diff --git a/src/recv_udp.cpp b/src/recv_udp.cpp index d4c588627..1b18ff70d 100644 --- a/src/recv_udp.cpp +++ b/src/recv_udp.cpp @@ -85,7 +85,7 @@ udp_reader::udp_reader( } #endif - enqueue_receive(); + enqueue_receive(make_handler_context()); } static boost::asio::ip::udp::socket make_bound_v4_socket( @@ -183,6 +183,7 @@ udp_reader::udp_reader( } void udp_reader::packet_handler( + handler_context ctx, stream_base::add_packet_state &state, const boost::system::error_code &error, std::size_t bytes_transferred) @@ -214,11 +215,11 @@ void udp_reader::packet_handler( if (!state.is_stopped()) { - enqueue_receive(); + enqueue_receive(std::move(ctx)); } } -void udp_reader::enqueue_receive() +void udp_reader::enqueue_receive(handler_context ctx) { using namespace std::placeholders; socket.async_receive_from( @@ -228,7 +229,7 @@ void udp_reader::enqueue_receive() boost::asio::buffer(buffer.get(), max_size + 1), #endif endpoint, - bind_handler(std::bind(&udp_reader::packet_handler, this, _1, _2, _3))); + bind_handler(std::move(ctx), std::bind(&udp_reader::packet_handler, this, _1, _2, _3, _4))); } ///////////////////////////////////////////////////////////////////////////// diff --git a/src/recv_udp_ibv.cpp b/src/recv_udp_ibv.cpp index 13d0ee22b..1c3134dae 100644 --- a/src/recv_udp_ibv.cpp +++ b/src/recv_udp_ibv.cpp @@ -265,7 +265,7 @@ udp_ibv_reader::udp_ibv_reader( qp.post_recv(&slots[i].wr); } - enqueue_receive(true); + enqueue_receive(make_handler_context(), true); qp.modify(IBV_QPS_RTR); join_groups(config.get_endpoints(), config.get_interface_address()); } diff --git a/src/recv_udp_ibv_mprq.cpp b/src/recv_udp_ibv_mprq.cpp index 45da47479..356384c36 100644 --- a/src/recv_udp_ibv_mprq.cpp +++ b/src/recv_udp_ibv_mprq.cpp @@ -226,7 +226,7 @@ udp_ibv_mprq_reader::udp_ibv_mprq_reader( post_wr(i * wqe_size); flows = create_flows(qp, config.get_endpoints(), cm_id->port_num); - enqueue_receive(true); + enqueue_receive(make_handler_context(), true); join_groups(config.get_endpoints(), config.get_interface_address()); } diff --git a/src/recv_udp_pcap.cpp b/src/recv_udp_pcap.cpp index ed9076800..5f484ef01 100644 --- a/src/recv_udp_pcap.cpp +++ b/src/recv_udp_pcap.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -43,7 +44,7 @@ namespace spead2 namespace recv { -void udp_pcap_file_reader::run(stream_base::add_packet_state &state) +void udp_pcap_file_reader::run(handler_context ctx, stream_base::add_packet_state &state) { const int BATCH = 64; // maximum number of packets to process in one go @@ -91,7 +92,10 @@ void udp_pcap_file_reader::run(stream_base::add_packet_state &state) } // Run ourselves again if (!state.is_stopped()) - get_io_service().post(bind_handler([this] (stream_base::add_packet_state &state) { run(state); })); + { + using namespace std::placeholders; + boost::asio::post(get_io_service(), bind_handler(std::move(ctx), std::bind(&udp_pcap_file_reader::run, this, _1, _2))); + } } udp_pcap_file_reader::udp_pcap_file_reader(stream &owner, const std::string &filename, const std::string &user_filter) @@ -126,7 +130,8 @@ udp_pcap_file_reader::udp_pcap_file_reader(stream &owner, const std::string &fil udp_from_frame = (linktype == DLT_EN10MB) ? udp_from_ethernet : udp_from_linux_sll; // Process the file - get_io_service().post(bind_handler([this] (stream_base::add_packet_state &state) { run(state); })); + using namespace std::placeholders; + boost::asio::post(get_io_service(), bind_handler(std::bind(&udp_pcap_file_reader::run, this, _1, _2))); } udp_pcap_file_reader::~udp_pcap_file_reader() From c24dddd3195cc923a778b713d35dfa858feac82a Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Wed, 21 Jun 2023 15:52:15 +0200 Subject: [PATCH 08/74] Fix signedness of a chunk_id parameter --- include/spead2/recv_chunk_stream.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/spead2/recv_chunk_stream.h b/include/spead2/recv_chunk_stream.h index e52df9033..dabeba284 100644 --- a/include/spead2/recv_chunk_stream.h +++ b/include/spead2/recv_chunk_stream.h @@ -246,7 +246,7 @@ class chunk_window */ template chunk *get_chunk( - std::uint64_t chunk_id, std::uintptr_t stream_id, const F1 &allocate_chunk, const F2 &ready_chunk) + std::int64_t chunk_id, std::uintptr_t stream_id, const F1 &allocate_chunk, const F2 &ready_chunk) { const std::size_t max_chunks = chunks.size(); if (chunk_id >= head_chunk) From 3e1735cf9720e4062f73d2c4bee288204b6d4ed6 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Wed, 21 Jun 2023 15:58:16 +0200 Subject: [PATCH 09/74] Change chunk graveyard to a linked list The linked list pointers are stored inside the chunks themselves. This allows the graveyard to grow arbitrarily large without incurring any memory allocations, which could throw during a destructor. --- include/spead2/recv_chunk_stream.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/include/spead2/recv_chunk_stream.h b/include/spead2/recv_chunk_stream.h index dabeba284..df9dc4a10 100644 --- a/include/spead2/recv_chunk_stream.h +++ b/include/spead2/recv_chunk_stream.h @@ -45,6 +45,7 @@ namespace recv class chunk { friend class chunk_stream_group; + template friend class chunk_ring_stream; private: /** * Reference count for chunks belonging to stream groups. @@ -54,6 +55,9 @@ class chunk */ std::size_t ref_count = 0; + /// Linked list of chunks to dispose of at shutdown + std::unique_ptr graveyard_next; + public: /// Chunk ID std::int64_t chunk_id = -1; @@ -541,15 +545,15 @@ class chunk_ring_stream : public chunk_stream private: std::shared_ptr data_ring; std::shared_ptr free_ring; - /// Temporary storage for in-flight chunks during @ref stop - std::vector> graveyard; + /// Temporary storage for linked list of in-flight chunks during @ref stop + std::unique_ptr graveyard; /// Create a new @ref spead2::recv::chunk_stream_config that uses the ringbuffers static chunk_stream_config adjust_chunk_config( const chunk_stream_config &chunk_config, DataRingbuffer &data_ring, FreeRingbuffer &free_ring, - std::vector> &graveyard); + std::unique_ptr &graveyard); public: /** @@ -750,8 +754,6 @@ chunk_ring_stream::chunk_ring_stream( free_ring(std::move(free_ring)) { this->data_ring->add_producer(); - // Ensure that we don't run out of memory during shutdown - graveyard.reserve(get_chunk_config().get_max_chunks()); } template @@ -759,7 +761,7 @@ chunk_stream_config chunk_ring_stream::adjust_ch const chunk_stream_config &chunk_config, DataRingbuffer &data_ring, FreeRingbuffer &free_ring, - std::vector> &graveyard) + std::unique_ptr &graveyard) { chunk_stream_config new_config = chunk_config; // Set the allocate callback to get a chunk from the free ringbuffer @@ -791,7 +793,9 @@ chunk_stream_config chunk_ring_stream::adjust_ch { // Suppress the error, move the chunk to the graveyard log_info("dropped chunk %d due to external stop", c->chunk_id); - graveyard.push_back(std::move(c)); + assert(!c->graveyard_next); // chunk should not already be in a linked list + c->graveyard_next = std::move(graveyard); + graveyard = std::move(c); } }); return new_config; @@ -829,7 +833,7 @@ void chunk_ring_stream::stop() chunk_stream::stop(); { std::lock_guard lock(shared->queue_mutex); - graveyard.clear(); // free chunks that didn't make it into data_ring + graveyard.reset(); // free chunks that didn't make it into data_ring } } From e647275def14fbbbba577e236ae10cfb5658c5f3 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Wed, 21 Jun 2023 17:46:55 +0200 Subject: [PATCH 10/74] Write a bunch more code for chunk_stream_ring_group It's all untested beyond the fact that it compiles. chunk_ring_pair was split out of chunk_ring_stream to reduce duplication. --- include/spead2/recv_chunk_stream.h | 121 +++++++++++--------- include/spead2/recv_chunk_stream_group.h | 138 ++++++++++++++++++++++- src/recv_chunk_stream_group.cpp | 45 +++++++- 3 files changed, 245 insertions(+), 59 deletions(-) diff --git a/include/spead2/recv_chunk_stream.h b/include/spead2/recv_chunk_stream.h index df9dc4a10..1e08506bf 100644 --- a/include/spead2/recv_chunk_stream.h +++ b/include/spead2/recv_chunk_stream.h @@ -521,6 +521,43 @@ class chunk_stream : private detail::chunk_stream_state>, + typename FreeRingbuffer = ringbuffer>> +class chunk_ring_pair +{ +protected: + const std::shared_ptr data_ring; + const std::shared_ptr free_ring; + /// Temporary stroage for linked list of in-flight chunks while stopping + std::unique_ptr graveyard; + + chunk_ring_pair(std::shared_ptr data_ring, std::shared_ptr free_ring); + +public: + /** + * Add a chunk to the free ringbuffer. This takes care of zeroing out + * the @ref spead2::recv::chunk::present array, and it will suppress the + * @ref spead2::ringbuffer_stopped error if the free ringbuffer has been + * stopped (in which case the argument will not have been moved from). + * + * If the free ring is full, it will throw @ref spead2::ringbuffer_full + * rather than blocking. The free ringbuffer should be constructed with + * enough slots that this does not happen. + */ + void add_free_chunk(std::unique_ptr &&c); + + /// Retrieve the data ringbuffer passed to the constructor + std::shared_ptr get_data_ringbuffer() const { return data_ring; } + /// Retrieve the free ringbuffer passed to the constructor + std::shared_ptr get_free_ringbuffer() const { return free_ring; } +}; + +} // namespace detail + /** * Wrapper around @ref chunk_stream that uses ringbuffers to manage chunks. * @@ -532,22 +569,12 @@ class chunk_stream : private detail::chunk_stream_state>, typename FreeRingbuffer = ringbuffer>> -class chunk_ring_stream : public chunk_stream +class chunk_ring_stream : public detail::chunk_ring_pair, public chunk_stream { private: - std::shared_ptr data_ring; - std::shared_ptr free_ring; - /// Temporary storage for linked list of in-flight chunks during @ref stop - std::unique_ptr graveyard; - /// Create a new @ref spead2::recv::chunk_stream_config that uses the ringbuffers static chunk_stream_config adjust_chunk_config( const chunk_stream_config &chunk_config, @@ -577,23 +604,6 @@ class chunk_ring_stream : public chunk_stream std::shared_ptr data_ring, std::shared_ptr free_ring); - /** - * Add a chunk to the free ringbuffer. This takes care of zeroing out - * the @ref spead2::recv::chunk::present array, and it will suppress the - * @ref spead2::ringbuffer_stopped error if the free ringbuffer has been - * stopped (in which case the argument will not have been moved from). - * - * If the free ring is full, it will throw @ref spead2::ringbuffer_full - * rather than blocking. The free ringbuffer should be constructed with - * enough slots that this does not happen. - */ - void add_free_chunk(std::unique_ptr &&c); - - /// Retrieve the data ringbuffer passed to the constructor - std::shared_ptr get_data_ringbuffer() const { return data_ring; } - /// Retrieve the free ringbuffer passed to the constructor - std::shared_ptr get_free_ringbuffer() const { return free_ring; } - virtual void stop_received() override; virtual void stop() override; virtual ~chunk_ring_stream(); @@ -737,6 +747,29 @@ chunk_stream_state::allocate(std::size_t size, const packet_header &packet) } } +template +chunk_ring_pair::chunk_ring_pair( + std::shared_ptr data_ring, + std::shared_ptr free_ring) + : data_ring(std::move(data_ring)), free_ring(std::move(free_ring)) +{ +} + +template +void chunk_ring_pair::add_free_chunk(std::unique_ptr &&c) +{ + // Mark all heaps as not yet present + std::memset(c->present.get(), 0, c->present_size); + try + { + free_ring->try_push(std::move(c)); + } + catch (spead2::ringbuffer_stopped &) + { + // Suppress the error + } +} + } // namespace detail template @@ -746,12 +779,11 @@ chunk_ring_stream::chunk_ring_stream( const chunk_stream_config &chunk_config, std::shared_ptr data_ring, std::shared_ptr free_ring) - : chunk_stream( + : detail::chunk_ring_pair(std::move(data_ring), std::move(free_ring)), + chunk_stream( io_service, config, - adjust_chunk_config(chunk_config, *data_ring, *free_ring, graveyard)), - data_ring(std::move(data_ring)), - free_ring(std::move(free_ring)) + adjust_chunk_config(chunk_config, *this->data_ring, *this->free_ring, this->graveyard)) { this->data_ring->add_producer(); } @@ -801,26 +833,11 @@ chunk_stream_config chunk_ring_stream::adjust_ch return new_config; } -template -void chunk_ring_stream::add_free_chunk(std::unique_ptr &&c) -{ - // Mark all heaps as not yet present - std::memset(c->present.get(), 0, c->present_size); - try - { - free_ring->try_push(std::move(c)); - } - catch (spead2::ringbuffer_stopped &) - { - // Suppress the error - } -} - template void chunk_ring_stream::stop_received() { chunk_stream::stop_received(); - data_ring->remove_producer(); + this->data_ring->remove_producer(); } template @@ -828,12 +845,14 @@ void chunk_ring_stream::stop() { // Stop the ringbuffers first, so that if the calling code is no longer // servicing them it will not lead to a deadlock as we flush. - free_ring->stop(); - data_ring->stop(); // NB: NOT remove_producer as that might not break a deadlock + this->free_ring->stop(); + this->data_ring->stop(); // NB: NOT remove_producer as that might not break a deadlock chunk_stream::stop(); { + // Locking is probably not needed, as all readers are terminated by + // chunk_stream::stop(). But it should be safe. std::lock_guard lock(shared->queue_mutex); - graveyard.reset(); // free chunks that didn't make it into data_ring + this->graveyard.reset(); // free chunks that didn't make it into data_ring } } diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h index 5d506652e..281ff6104 100644 --- a/include/spead2/recv_chunk_stream_group.h +++ b/include/spead2/recv_chunk_stream_group.h @@ -24,7 +24,9 @@ #include #include #include +#include #include +#include #include #include @@ -88,6 +90,8 @@ class chunk_manager_group } // namespace detail +class chunk_stream_group_member; + /** * A holder for a collection of streams that share chunks. * @@ -97,6 +101,7 @@ class chunk_stream_group { private: friend class detail::chunk_manager_group; + friend class chunk_stream_group_member; const chunk_stream_group_config config; @@ -112,6 +117,15 @@ class chunk_stream_group */ detail::chunk_window chunks; + /** + * References to the component streams that have not yet been stopped. + * + * Note that these are insufficient to actually keep the streams alive. + * The stream_stop_received callback ensures that we don't end up with + * dangling pointers. + */ + std::set streams; + /** * Obtain the chunk with a given ID. * @@ -136,14 +150,32 @@ class chunk_stream_group /// Version of release_chunk that does not take the lock void release_chunk_unlocked(chunk *c, std::uint64_t *batch_stats); + /// Called by newly-constructed streams + virtual void stream_added(chunk_stream_group_member &s); + /** + * Called when a stream stops (whether from the network or the user). + * + * The stream's @c queue_mutex is locked when this is called. + */ + virtual void stream_stop_received(chunk_stream_group_member &s); + /** + * Called when the user stops (or destroys) a stream. + * + * This is called before the caller actually stops the stream, and without + * the stream's @c queue_mutex. + */ + virtual void stream_pre_stop(chunk_stream_group_member &s) {} + public: chunk_stream_group(const chunk_stream_group_config &config); - ~chunk_stream_group(); + virtual ~chunk_stream_group(); /** - * Release all chunks. This function is thread-safe. + * Stop all streams and release all chunks. This function is mostly + * thread-safe, but it is unsafe to call it at the same time as a stream is + * being destroyed. */ - void flush_chunks(); + virtual void stop(); }; /** @@ -153,6 +185,9 @@ class chunk_stream_group_member : private detail::chunk_stream_state>, + typename FreeRingbuffer = ringbuffer>> +class chunk_stream_ring_group +: public detail::chunk_ring_pair, public chunk_stream_group +{ +private: + /// Create a new @ref chunk_stream_group_config that uses the ringbuffers + static chunk_stream_group_config adjust_group_config( + const chunk_stream_group_config &config, + DataRingbuffer &data_ring, + FreeRingbuffer &free_ring, + std::unique_ptr &graveyard); + + virtual void stream_added(chunk_stream_group_member &s) override; + virtual void stream_stop_received(chunk_stream_group_member &s) override; + virtual void stream_pre_stop(chunk_stream_group_member &s) override; + +public: + chunk_stream_ring_group( + const chunk_stream_group_config &group_config, + std::shared_ptr data_ring, + std::shared_ptr free_ring); + virtual void stop() override; + + ~chunk_stream_ring_group(); +}; + +template +chunk_stream_ring_group::chunk_stream_ring_group( + const chunk_stream_group_config &group_config, + std::shared_ptr data_ring, + std::shared_ptr free_ring) + : detail::chunk_ring_pair(std::move(data_ring), std::move(free_ring)), + chunk_stream_group(adjust_group_config(this->data_ring, this->free_ring, this->graveyard)) +{ +} + +template +void chunk_stream_ring_group::stream_added( + chunk_stream_group_member &s) +{ + chunk_stream_group::stream_added(s); + this->data_ring.add_producer(); +} + +template +void chunk_stream_ring_group::stream_stop_received( + chunk_stream_group_member &s) +{ + chunk_stream_group::stream_stop_received(s); + this->data_ring.remove_producer(); +} + +template +void chunk_stream_ring_group::stream_pre_stop( + chunk_stream_group_member &s) +{ + // Shut down the rings so that if the caller is no longer servicing them, it will + // not lead to a deadlock during shutdown. + this->data_ring.stop(); + this->free_ring.stop(); + chunk_stream_group::stream_pre_stop(s); +} + +template +void chunk_stream_ring_group::stop() +{ + // Stopping the first stream should do this anyway, but this ensures + // they're stopped even if there are no streams + this->data_ring.stop(); + this->free_ring.stop(); + chunk_stream_group::stop(); + this->graveyard.reset(); // Release chunks from the graveyard +} + +template +chunk_stream_ring_group::~chunk_stream_ring_group() +{ + stop(); +} + } // namespace recv } // namespace spead2 diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index 2feaee90e..a913d9c28 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -18,6 +18,9 @@ * @file */ +#include +#include +#include #include #include @@ -80,12 +83,27 @@ chunk_stream_group::chunk_stream_group(const chunk_stream_group_config &config) chunk_stream_group::~chunk_stream_group() { - flush_chunks(); + stop(); } -void chunk_stream_group::flush_chunks() -{ - std::lock_guard lock(mutex); +void chunk_stream_group::stop() +{ + /* Streams will try to lock the group (and modify `streams`) while + * stopping, so we need to take a copy. During the destructor there + * should be no streams left, so this is not allocating memory during + * destruction. + * + * The copy is not protected by the mutex, so streams can asynchronously + * stop under us. That's okay because the contract for this function is + * that it's not allowed to occur concurrently with destroying streams. + */ + std::unique_lock lock(mutex); + std::vector streams_copy(streams.begin(), streams.end()); + lock.unlock(); + for (auto stream : streams_copy) + stream->stop(); + + lock.lock(); while (chunks.get_head_chunk() != chunks.get_tail_chunk()) chunks.flush_head([this](chunk *c) { release_chunk_unlocked(c, nullptr); }); } @@ -122,6 +140,19 @@ void chunk_stream_group::release_chunk(chunk *c, std::uint64_t *batch_stats) release_chunk_unlocked(c, batch_stats); } +void chunk_stream_group::stream_added(chunk_stream_group_member &s) +{ + std::lock_guard lock(mutex); + bool added = streams.insert(&s).second; + assert(added); // should be impossible to add the same stream twice +} + +void chunk_stream_group::stream_stop_received(chunk_stream_group_member &s) +{ + std::lock_guard lock(mutex); + streams.erase(&s); +} + chunk_stream_group_member::chunk_stream_group_member( io_service_ref io_service, @@ -129,8 +160,10 @@ chunk_stream_group_member::chunk_stream_group_member( const chunk_stream_config &chunk_config, chunk_stream_group &group) : chunk_stream_state(config, chunk_config, detail::chunk_manager_group(group)), - stream(std::move(io_service), adjust_config(config)) + stream(std::move(io_service), adjust_config(config)), + group(group) { + group.stream_added(*this); } void chunk_stream_group_member::heap_ready(live_heap &&lh) @@ -141,11 +174,13 @@ void chunk_stream_group_member::heap_ready(live_heap &&lh) void chunk_stream_group_member::stop_received() { stream::stop_received(); + group.stream_stop_received(*this); flush_chunks(); } void chunk_stream_group_member::stop() { + group.stream_pre_stop(*this); { std::lock_guard lock(shared->queue_mutex); flush_chunks(); From c763ba12b0dfca48664ccc45ea5361c9ca9f56eb Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 22 Jun 2023 09:22:34 +0200 Subject: [PATCH 11/74] Avoid allocating memory in chunk_stream_group::stop Move from `streams` instead of copying. --- include/spead2/recv_chunk_stream_group.h | 7 +++---- src/recv_chunk_stream_group.cpp | 15 +++++++-------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h index 281ff6104..d08abc560 100644 --- a/include/spead2/recv_chunk_stream_group.h +++ b/include/spead2/recv_chunk_stream_group.h @@ -23,7 +23,6 @@ #include #include -#include #include #include #include @@ -171,9 +170,9 @@ class chunk_stream_group virtual ~chunk_stream_group(); /** - * Stop all streams and release all chunks. This function is mostly - * thread-safe, but it is unsafe to call it at the same time as a stream is - * being destroyed. + * Stop all streams and release all chunks. This function must not be + * called concurrently with creating or destroying streams, and no + * new streams should be created after calling this. */ virtual void stop(); }; diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index a913d9c28..553995e97 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -89,18 +89,17 @@ chunk_stream_group::~chunk_stream_group() void chunk_stream_group::stop() { /* Streams will try to lock the group (and modify `streams`) while - * stopping, so we need to take a copy. During the destructor there - * should be no streams left, so this is not allocating memory during - * destruction. + * stopping, so we move the streams set into a local variable. * - * The copy is not protected by the mutex, so streams can asynchronously - * stop under us. That's okay because the contract for this function is - * that it's not allowed to occur concurrently with destroying streams. + * The mutex is not held while stopping streams, so streams can + * asynchronously stop under us. That's okay because the contract for this + * function is that it's not allowed to occur concurrently with destroying + * streams. */ std::unique_lock lock(mutex); - std::vector streams_copy(streams.begin(), streams.end()); + auto streams_local = std::move(streams); lock.unlock(); - for (auto stream : streams_copy) + for (auto stream : streams_local) stream->stop(); lock.lock(); From cad9f0fb261c19b56a1d963f4eeb650f694e5c20 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 22 Jun 2023 09:43:30 +0200 Subject: [PATCH 12/74] Make some method protected not private clang was (quite rightly I believe) complaining about the derived class implementation chaining to the base class when the base class methods were private. --- include/spead2/recv_chunk_stream_group.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h index d08abc560..75d65a8dd 100644 --- a/include/spead2/recv_chunk_stream_group.h +++ b/include/spead2/recv_chunk_stream_group.h @@ -149,6 +149,7 @@ class chunk_stream_group /// Version of release_chunk that does not take the lock void release_chunk_unlocked(chunk *c, std::uint64_t *batch_stats); +protected: /// Called by newly-constructed streams virtual void stream_added(chunk_stream_group_member &s); /** From d627bbafdcb31415e599a7d62d25658f220dbe4f Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 22 Jun 2023 09:53:05 +0200 Subject: [PATCH 13/74] Don't change methods from protected to private when overriding --- include/spead2/recv_chunk_stream_group.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h index 75d65a8dd..60e74bca8 100644 --- a/include/spead2/recv_chunk_stream_group.h +++ b/include/spead2/recv_chunk_stream_group.h @@ -256,6 +256,7 @@ class chunk_stream_ring_group FreeRingbuffer &free_ring, std::unique_ptr &graveyard); +protected: virtual void stream_added(chunk_stream_group_member &s) override; virtual void stream_stop_received(chunk_stream_group_member &s) override; virtual void stream_pre_stop(chunk_stream_group_member &s) override; From f8fe714cfd0910adedfe91f067216285cdeebc95 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 22 Jun 2023 10:51:43 +0200 Subject: [PATCH 14/74] Start wiring up chunk_stream_group to Python This already exposed a bunch of compilation issues that appear just because templates are now being instantiated. chunk_ring_stream is extended to take on the responsibility for making the chunk allocate and ready functions, simplifying adjust_chunk_config and adjust_group_config. The Python wiring is incomplete, as it does not include chunk_stream_group_config. --- include/spead2/recv_chunk_stream.h | 101 ++++++++++++-------- include/spead2/recv_chunk_stream_group.h | 29 ++++-- src/py_recv.cpp | 115 +++++++++++++++-------- 3 files changed, 161 insertions(+), 84 deletions(-) diff --git a/include/spead2/recv_chunk_stream.h b/include/spead2/recv_chunk_stream.h index 1e08506bf..4b38bea4e 100644 --- a/include/spead2/recv_chunk_stream.h +++ b/include/spead2/recv_chunk_stream.h @@ -41,11 +41,18 @@ namespace spead2 namespace recv { +namespace detail +{ + +template class chunk_ring_pair; + +} // namespace detail + /// Storage for a chunk with metadata class chunk { friend class chunk_stream_group; - template friend class chunk_ring_stream; + template friend class detail::chunk_ring_pair; private: /** * Reference count for chunks belonging to stream groups. @@ -538,6 +545,15 @@ class chunk_ring_pair chunk_ring_pair(std::shared_ptr data_ring, std::shared_ptr free_ring); public: + /// Create an allocate function that obtains chunks from the free ring + chunk_allocate_function make_allocate(); + /** + * Create a ready function that pushes chunks to the data ring. + * + * The orig_ready function is called first. + */ + chunk_ready_function make_ready(const chunk_ready_function &orig_ready); + /** * Add a chunk to the free ringbuffer. This takes care of zeroing out * the @ref spead2::recv::chunk::present array, and it will suppress the @@ -578,9 +594,7 @@ class chunk_ring_stream : public detail::chunk_ring_pair &graveyard); + detail::chunk_ring_pair &ring_pair); public: /** @@ -770,34 +784,11 @@ void chunk_ring_pair::add_free_chunk(std::unique } } -} // namespace detail - template -chunk_ring_stream::chunk_ring_stream( - io_service_ref io_service, - const stream_config &config, - const chunk_stream_config &chunk_config, - std::shared_ptr data_ring, - std::shared_ptr free_ring) - : detail::chunk_ring_pair(std::move(data_ring), std::move(free_ring)), - chunk_stream( - io_service, - config, - adjust_chunk_config(chunk_config, *this->data_ring, *this->free_ring, this->graveyard)) -{ - this->data_ring->add_producer(); -} - -template -chunk_stream_config chunk_ring_stream::adjust_chunk_config( - const chunk_stream_config &chunk_config, - DataRingbuffer &data_ring, - FreeRingbuffer &free_ring, - std::unique_ptr &graveyard) +chunk_allocate_function chunk_ring_pair::make_allocate() { - chunk_stream_config new_config = chunk_config; - // Set the allocate callback to get a chunk from the free ringbuffer - new_config.set_allocate([&free_ring] (std::int64_t, std::uint64_t *) -> std::unique_ptr { + FreeRingbuffer &free_ring = *this->free_ring; + return [&free_ring] (std::int64_t, std::uint64_t *) -> std::unique_ptr { try { return free_ring.pop(); @@ -808,12 +799,17 @@ chunk_stream_config chunk_ring_stream::adjust_ch // ignore this chunk return nullptr; } - }); - // Set the ready callback to push chunks to the data ringbuffer - auto orig_ready = chunk_config.get_ready(); - new_config.set_ready( - [&data_ring, &graveyard, orig_ready] (std::unique_ptr &&c, - std::uint64_t *batch_stats) { + }; +} + +template +chunk_ready_function chunk_ring_pair::make_ready( + const chunk_ready_function &orig_ready) +{ + DataRingbuffer &data_ring = *this->data_ring; + std::unique_ptr &graveyard = this->graveyard; + return [&data_ring, &graveyard, orig_ready] (std::unique_ptr &&c, + std::uint64_t *batch_stats) { try { if (orig_ready) @@ -829,7 +825,38 @@ chunk_stream_config chunk_ring_stream::adjust_ch c->graveyard_next = std::move(graveyard); graveyard = std::move(c); } - }); + }; +} + +} // namespace detail + +template +chunk_ring_stream::chunk_ring_stream( + io_service_ref io_service, + const stream_config &config, + const chunk_stream_config &chunk_config, + std::shared_ptr data_ring, + std::shared_ptr free_ring) + : detail::chunk_ring_pair(std::move(data_ring), std::move(free_ring)), + chunk_stream( + io_service, + config, + adjust_chunk_config(chunk_config, *this)) +{ + this->data_ring->add_producer(); +} + +template +chunk_stream_config chunk_ring_stream::adjust_chunk_config( + const chunk_stream_config &chunk_config, + detail::chunk_ring_pair &ring_pair) +{ + chunk_stream_config new_config = chunk_config; + // Set the allocate callback to get a chunk from the free ringbuffer + new_config.set_allocate(ring_pair.make_allocate()); + // Set the ready callback to push chunks to the data ringbuffer + auto orig_ready = chunk_config.get_ready(); + new_config.set_ready(ring_pair.make_ready(chunk_config.get_ready())); return new_config; } diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h index 60e74bca8..73767ae48 100644 --- a/include/spead2/recv_chunk_stream_group.h +++ b/include/spead2/recv_chunk_stream_group.h @@ -252,9 +252,7 @@ class chunk_stream_ring_group /// Create a new @ref chunk_stream_group_config that uses the ringbuffers static chunk_stream_group_config adjust_group_config( const chunk_stream_group_config &config, - DataRingbuffer &data_ring, - FreeRingbuffer &free_ring, - std::unique_ptr &graveyard); + detail::chunk_ring_pair &ring_pair); protected: virtual void stream_added(chunk_stream_group_member &s) override; @@ -277,16 +275,27 @@ chunk_stream_ring_group::chunk_stream_ring_group std::shared_ptr data_ring, std::shared_ptr free_ring) : detail::chunk_ring_pair(std::move(data_ring), std::move(free_ring)), - chunk_stream_group(adjust_group_config(this->data_ring, this->free_ring, this->graveyard)) + chunk_stream_group(adjust_group_config(group_config, *this)) { } +template +chunk_stream_group_config chunk_stream_ring_group::adjust_group_config( + const chunk_stream_group_config &config, + detail::chunk_ring_pair &ring_pair) +{ + chunk_stream_group_config new_config = config; + new_config.set_allocate(ring_pair.make_allocate()); + new_config.set_ready(ring_pair.make_ready(config.get_ready())); + return new_config; +} + template void chunk_stream_ring_group::stream_added( chunk_stream_group_member &s) { chunk_stream_group::stream_added(s); - this->data_ring.add_producer(); + this->data_ring->add_producer(); } template @@ -294,7 +303,7 @@ void chunk_stream_ring_group::stream_stop_receiv chunk_stream_group_member &s) { chunk_stream_group::stream_stop_received(s); - this->data_ring.remove_producer(); + this->data_ring->remove_producer(); } template @@ -303,8 +312,8 @@ void chunk_stream_ring_group::stream_pre_stop( { // Shut down the rings so that if the caller is no longer servicing them, it will // not lead to a deadlock during shutdown. - this->data_ring.stop(); - this->free_ring.stop(); + this->data_ring->stop(); + this->free_ring->stop(); chunk_stream_group::stream_pre_stop(s); } @@ -313,8 +322,8 @@ void chunk_stream_ring_group::stop() { // Stopping the first stream should do this anyway, but this ensures // they're stopped even if there are no streams - this->data_ring.stop(); - this->free_ring.stop(); + this->data_ring->stop(); + this->free_ring->stop(); chunk_stream_group::stop(); this->graveyard.reset(); // Release chunks from the graveyard } diff --git a/src/py_recv.cpp b/src/py_recv.cpp index a4f92e1cb..1ffb389d7 100644 --- a/src/py_recv.cpp +++ b/src/py_recv.cpp @@ -1,4 +1,4 @@ -/* Copyright 2015, 2017, 2020-2022 National Research Foundation (SARAO) +/* Copyright 2015, 2017, 2020-2023 National Research Foundation (SARAO) * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -465,28 +466,37 @@ static void push_chunk(T func, chunk &c) typedef ringbuffer, semaphore_fd, semaphore_fd> chunk_ringbuffer; -class chunk_ring_stream_wrapper : public chunk_ring_stream -{ -private: - exit_stopper stopper{[this] { stop(); }}; +/* Note: ring_stream_wrapper drops the GIL while stopping. We + * can't do that here because stop() can free chunks that were + * in flight, which involves interaction with the Python API. + * I think the only reason ring_stream_wrapper drops the GIL is + * that logging used to directly acquire the GIL, and so if stop() + * did any logging it would deadlock. Now that logging is pushed + * off to a separate thread that should no longer be an issue. + */ +#define EXIT_STOPPER_WRAPPER(cls, base) \ + class cls : public base \ + { \ + private: \ + exit_stopper stopper{[this] { stop(); }}; \ + public: \ + using base::base; \ + virtual void stop() override \ + { \ + stopper.reset(); \ + base::stop(); \ + } \ + } -public: - using chunk_ring_stream::chunk_ring_stream; +// These aliases are needed because a type passed to a macro cannot contain a comma +using chunk_ring_stream_orig = chunk_ring_stream; +using chunk_stream_ring_group_orig = chunk_stream_ring_group; - virtual void stop() override - { - stopper.reset(); - /* Note: ring_stream_wrapper drops the GIL while stopping. We - * can't do that here because stop() can free chunks that were - * in flight, which involves interaction with the Python API. - * I think the only reason ring_stream_wrapper drops the GIL is - * that logging used to directly acquire the GIL, and so if stop() - * did any logging it would deadlock. Now that logging is pushed - * off to a separate thread that should no longer be an issue. - */ - chunk_ring_stream::stop(); - } -}; +EXIT_STOPPER_WRAPPER(chunk_ring_stream_wrapper, chunk_ring_stream_orig); +EXIT_STOPPER_WRAPPER(chunk_stream_ring_group_wrapper, chunk_stream_ring_group_orig); +EXIT_STOPPER_WRAPPER(chunk_stream_group_member_wrapper, chunk_stream_group_member); + +#undef EXIT_STOPPER_WRAPPER /// Register the receiver module with Python py::module register_module(py::module &parent) @@ -882,22 +892,9 @@ py::module register_module(py::module &parent) "extra", [](const chunk &c) -> const memory_allocator::pointer & { return c.extra; }, [](chunk &c, memory_allocator::pointer &&value) { c.extra = std::move(value); }); - py::class_(m, "ChunkRingStream") - .def(py::init, - const stream_config &, - const chunk_stream_config &, - std::shared_ptr, - std::shared_ptr>(), - "thread_pool"_a.none(false), - "config"_a = stream_config(), - "chunk_stream_config"_a, - "data_ringbuffer"_a.none(false), - "free_ringbuffer"_a.none(false), - // Keep the Python ringbuffer objects alive, not just the C++ side. - // This allows Python subclasses to be passed then later retrieved - // from properties. - py::keep_alive<1, 5>(), - py::keep_alive<1, 6>()) + // Don't allow ChunkRingPair to be constructed from Python. It exists + // purely to be a base class. + py::class_>(m, "ChunkRingPair") .def( "add_free_chunk", [](chunk_ring_stream_wrapper &stream, chunk &c) @@ -913,6 +910,25 @@ py::module register_module(py::module &parent) "chunk"_a) .def_property_readonly("data_ringbuffer", SPEAD2_PTMF(chunk_ring_stream_wrapper, get_data_ringbuffer)) .def_property_readonly("free_ringbuffer", SPEAD2_PTMF(chunk_ring_stream_wrapper, get_free_ringbuffer)); + + py::class_, + stream>(m, "ChunkRingStream") + .def(py::init, + const stream_config &, + const chunk_stream_config &, + std::shared_ptr, + std::shared_ptr>(), + "thread_pool"_a.none(false), + "config"_a = stream_config(), + "chunk_stream_config"_a, + "data_ringbuffer"_a.none(false), + "free_ringbuffer"_a.none(false), + // Keep the Python ringbuffer objects alive, not just the C++ side. + // This allows Python subclasses to be passed then later retrieved + // from properties. + py::keep_alive<1, 5>(), + py::keep_alive<1, 6>()); py::class_>(m, "ChunkRingbuffer") .def(py::init(), "maxsize"_a) .def("qsize", SPEAD2_PTMF(chunk_ringbuffer, size)) @@ -967,6 +983,31 @@ py::module register_module(py::module &parent) } }); + py::class_>(m, "ChunkStreamRingGroup") + .def(py::init, + std::shared_ptr>(), + "group_config"_a, + "data_ringbuffer"_a.none(false), + "free_ringbuffer"_a.none(false), + // Keep the Python ringbuffer objects alive, not just the C++ side. + // This allows Python subclasses to be passed then later retrieved + // from properties. + py::keep_alive<1, 2>(), + py::keep_alive<1, 3>()) + .def("stop", SPEAD2_PTMF(chunk_stream_ring_group_wrapper, stop)); + py::class_(m, "ChunkStreamGroupMember") + .def(py::init, + const stream_config &, + const chunk_stream_config &, + chunk_stream_ring_group_wrapper &>(), + "thread_pool"_a.none(false), + "config"_a = stream_config(), + "chunk_stream_config"_a, + "group"_a, + py::keep_alive<1, 4>()); // Keep the group alive + return m; } From c463a8e08240429bd8341f3fa63d3c85e6058639 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 22 Jun 2023 10:54:51 +0200 Subject: [PATCH 15/74] Suppress a warning in release builds --- src/recv_chunk_stream_group.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index 553995e97..91f29f6a8 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -144,6 +144,7 @@ void chunk_stream_group::stream_added(chunk_stream_group_member &s) std::lock_guard lock(mutex); bool added = streams.insert(&s).second; assert(added); // should be impossible to add the same stream twice + (void) added; // suppress warning when NDEBUG is defined } void chunk_stream_group::stream_stop_received(chunk_stream_group_member &s) From cfb915f7ed8595a1f1b67605d71e049dccb82f9d Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 22 Jun 2023 12:56:55 +0200 Subject: [PATCH 16/74] Add Python wrapper for chunk_stream_group_config --- src/py_recv.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/py_recv.cpp b/src/py_recv.cpp index 1ffb389d7..df28f218e 100644 --- a/src/py_recv.cpp +++ b/src/py_recv.cpp @@ -983,6 +983,12 @@ py::module register_module(py::module &parent) } }); + py::class_(m, "ChunkStreamGroupConfig") + .def(py::init(&data_class_constructor)) + .def_property("max_chunks", + SPEAD2_PTMF(chunk_stream_group_config, get_max_chunks), + SPEAD2_PTMF(chunk_stream_group_config, set_max_chunks)); + py::class_>(m, "ChunkStreamRingGroup") .def(py::init Date: Thu, 22 Jun 2023 13:22:29 +0200 Subject: [PATCH 17/74] More Python wiring/fixups for chunk_stream_group --- src/py_recv.cpp | 3 ++- src/recv_chunk_stream_group.cpp | 2 ++ src/spead2/recv/__init__.py | 17 ++++++++++++++--- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/py_recv.cpp b/src/py_recv.cpp index df28f218e..578976bfa 100644 --- a/src/py_recv.cpp +++ b/src/py_recv.cpp @@ -987,7 +987,8 @@ py::module register_module(py::module &parent) .def(py::init(&data_class_constructor)) .def_property("max_chunks", SPEAD2_PTMF(chunk_stream_group_config, get_max_chunks), - SPEAD2_PTMF(chunk_stream_group_config, set_max_chunks)); + SPEAD2_PTMF(chunk_stream_group_config, set_max_chunks)) + .def_readonly_static("DEFAULT_MAX_CHUNKS", &chunk_stream_group_config::default_max_chunks); py::class_>(m, "ChunkStreamRingGroup") diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index 91f29f6a8..418e77538 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -29,6 +29,8 @@ namespace spead2 namespace recv { +constexpr std::size_t chunk_stream_group_config::default_max_chunks; + chunk_stream_group_config &chunk_stream_group_config::set_max_chunks(std::size_t max_chunks) { if (max_chunks == 0) diff --git a/src/spead2/recv/__init__.py b/src/spead2/recv/__init__.py index e785bd868..fdd2ef86f 100644 --- a/src/spead2/recv/__init__.py +++ b/src/spead2/recv/__init__.py @@ -55,9 +55,20 @@ """ from spead2._spead2.recv import ( # noqa: F401 - StreamConfig, RingStreamConfig, Stream, Heap, IncompleteHeap, - Chunk, ChunkStreamConfig, ChunkRingStream, ChunkRingbuffer, - StreamStats, StreamStatConfig + Chunk, + ChunkRingStream, + ChunkRingbuffer, + ChunkStreamConfig, + ChunkStreamGroupConfig, + ChunkStreamGroupMember, + ChunkStreamRingGroup, + Heap, + IncompleteHeap, + RingStreamConfig, + Stream, + StreamConfig, + StreamStatConfig, + StreamStats, ) from . import stream_stat_indices # noqa: F401 try: From c1b3c99b3f07c1cb617ecfcb8d4b0effed4d7762 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 22 Jun 2023 14:30:52 +0200 Subject: [PATCH 18/74] Fix mismatch of ::operator new / operator delete[] This was picked up by valgrind. --- src/recv_chunk_stream.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/recv_chunk_stream.cpp b/src/recv_chunk_stream.cpp index 42a7bee0d..c79586659 100644 --- a/src/recv_chunk_stream.cpp +++ b/src/recv_chunk_stream.cpp @@ -168,7 +168,7 @@ void chunk_stream_state_base::free_place_data::operator()(unsigned char *ptr) co // TODO: should this use std::launder in C++17? auto *place_data = reinterpret_cast(ptr); place_data->~chunk_place_data(); - operator delete[](ptr); + operator delete(ptr); } void chunk_stream_state_base::packet_memcpy( From 5115ca943c6916e50bd47ec7434fcc286333cacc Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 22 Jun 2023 14:31:13 +0200 Subject: [PATCH 19/74] Fix incorrect indexing in some py::keep_alive call This affected the chunk_stream_group feature. --- src/py_recv.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/py_recv.cpp b/src/py_recv.cpp index 578976bfa..f5ec5f060 100644 --- a/src/py_recv.cpp +++ b/src/py_recv.cpp @@ -1001,8 +1001,8 @@ py::module register_module(py::module &parent) // Keep the Python ringbuffer objects alive, not just the C++ side. // This allows Python subclasses to be passed then later retrieved // from properties. - py::keep_alive<1, 2>(), - py::keep_alive<1, 3>()) + py::keep_alive<1, 3>(), + py::keep_alive<1, 4>()) .def("stop", SPEAD2_PTMF(chunk_stream_ring_group_wrapper, stop)); py::class_(m, "ChunkStreamGroupMember") .def(py::init, @@ -1013,7 +1013,7 @@ py::module register_module(py::module &parent) "config"_a = stream_config(), "chunk_stream_config"_a, "group"_a, - py::keep_alive<1, 4>()); // Keep the group alive + py::keep_alive<1, 5>()); // Keep the group alive return m; } From b92c23775b5031264e466c72647f5fc9110f1467 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 22 Jun 2023 14:46:20 +0200 Subject: [PATCH 20/74] Fix a typo in an error message --- src/common_thread_pool.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common_thread_pool.cpp b/src/common_thread_pool.cpp index 215ce0aa5..fb24a1b51 100644 --- a/src/common_thread_pool.cpp +++ b/src/common_thread_pool.cpp @@ -124,7 +124,7 @@ void thread_pool::stop() } catch (std::exception &e) { - log_warning("worker thread throw an exception: %s", e.what()); + log_warning("worker thread threw an exception: %s", e.what()); } } workers.clear(); From 2ba597840306b2a7c4346f88836e2db77707dbfe Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 22 Jun 2023 15:14:46 +0200 Subject: [PATCH 21/74] Make chunk_stream_group constructor explicit --- include/spead2/recv_chunk_stream_group.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h index 73767ae48..56c5e4d21 100644 --- a/include/spead2/recv_chunk_stream_group.h +++ b/include/spead2/recv_chunk_stream_group.h @@ -167,7 +167,7 @@ class chunk_stream_group virtual void stream_pre_stop(chunk_stream_group_member &s) {} public: - chunk_stream_group(const chunk_stream_group_config &config); + explicit chunk_stream_group(const chunk_stream_group_config &config); virtual ~chunk_stream_group(); /** From aa1f03ee3b5c34ded67b39fc3f535297227e6858 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 22 Jun 2023 15:14:58 +0200 Subject: [PATCH 22/74] Fix constructor for chunk_stream_group --- src/recv_chunk_stream_group.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index 418e77538..00799bbd8 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -79,7 +79,7 @@ void chunk_manager_group::ready_chunk(chunk_stream_state &s } // namespace detail chunk_stream_group::chunk_stream_group(const chunk_stream_group_config &config) - : chunks(config.get_max_chunks()) + : config(config), chunks(config.get_max_chunks()) { } From 672b8f5691cf79ba1e447a3fe42fc136687942a7 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 22 Jun 2023 15:21:55 +0200 Subject: [PATCH 23/74] Fix incorrect locking in chunk_stream_group release_chunk_unlocked was nevertheless locking, leading to a self-deadlock. --- src/recv_chunk_stream_group.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index 00799bbd8..c6582f9b1 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -127,7 +127,6 @@ chunk *chunk_stream_group::get_chunk(std::int64_t chunk_id, std::uintptr_t strea void chunk_stream_group::release_chunk_unlocked(chunk *c, std::uint64_t *batch_stats) { - std::lock_guard lock(mutex); if (--c->ref_count == 0) { std::unique_ptr owned(c); From cf76a8e6dada7b9b05867c7ba879099994899169 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 22 Jun 2023 15:27:29 +0200 Subject: [PATCH 24/74] Make Python chunk_stream_group.add_free_chunk work add_free_chunk was moved to a chunk_ring_pair base class, but the signature for the lambda was not updated properly. --- src/py_recv.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/py_recv.cpp b/src/py_recv.cpp index f5ec5f060..0d013c6a9 100644 --- a/src/py_recv.cpp +++ b/src/py_recv.cpp @@ -897,12 +897,12 @@ py::module register_module(py::module &parent) py::class_>(m, "ChunkRingPair") .def( "add_free_chunk", - [](chunk_ring_stream_wrapper &stream, chunk &c) + [](detail::chunk_ring_pair &self, chunk &c) { push_chunk( - [&stream](std::unique_ptr &&wrapper) + [&self](std::unique_ptr &&wrapper) { - stream.add_free_chunk(std::move(wrapper)); + self.add_free_chunk(std::move(wrapper)); }, c ); From d15baeef447bd6b6f0002f3b1364a666bbd10b80 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Fri, 23 Jun 2023 10:04:19 +0200 Subject: [PATCH 25/74] Another redesign of chunk_stream_group synchronisation Now there is one true window and when something falls off the back, we wait for all the streams to get there, but using a condition variable (and dropping the group mutex) to avoid deadlocks. --- include/spead2/recv_chunk_stream.h | 19 ++++++ include/spead2/recv_chunk_stream_group.h | 24 ++++++-- src/recv_chunk_stream_group.cpp | 77 ++++++++++++++++++++---- 3 files changed, 101 insertions(+), 19 deletions(-) diff --git a/include/spead2/recv_chunk_stream.h b/include/spead2/recv_chunk_stream.h index 4b38bea4e..5200451bb 100644 --- a/include/spead2/recv_chunk_stream.h +++ b/include/spead2/recv_chunk_stream.h @@ -249,6 +249,25 @@ class chunk_window explicit chunk_window(std::size_t max_chunks); + /** + * Obtain a pointer to a chunk with ID @a chunk_id. + * + * If @a chunk_id falls outside the window, returns nullptr. + */ + chunk *get_chunk(std::int64_t chunk_id) const + { + if (chunk_id >= head_chunk && chunk_id < tail_chunk) + { + std::size_t pos = chunk_id - head_chunk + head_pos; + const std::size_t max_chunks = chunks.size(); + if (pos >= max_chunks) + pos -= max_chunks; // wrap around the circular storage + return chunks[pos]; + } + else + return nullptr; + } + /** * Obtain a pointer to a chunk with ID @a chunk_id. * diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h index 56c5e4d21..9de2ff441 100644 --- a/include/spead2/recv_chunk_stream_group.h +++ b/include/spead2/recv_chunk_stream_group.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -104,7 +105,9 @@ class chunk_stream_group const chunk_stream_group_config config; - std::mutex mutex; // Protects all the mutable state + std::mutex mutex; ///< Protects all the mutable state + /// Notified when the reference count of a chunk reaches zero + std::condition_variable ready_condition; /** * Circular buffer of chunks under construction. @@ -139,15 +142,15 @@ class chunk_stream_group /** * Decrement chunk reference count. * - * If the reference count reaches zero, the chunk is passed to the ready - * callback. + * If the reference count reaches zero, the chunk is valid to pass to + * the ready callback. * * This function is thread-safe. */ - void release_chunk(chunk *c, std::uint64_t *batch_stats); + void release_chunk(chunk *c); - /// Version of release_chunk that does not take the lock - void release_chunk_unlocked(chunk *c, std::uint64_t *batch_stats); + /// Pass a chunk to the user-provided ready function + void ready_chunk(chunk *c, std::uint64_t *batch_stats); protected: /// Called by newly-constructed streams @@ -184,12 +187,21 @@ class chunk_stream_group class chunk_stream_group_member : private detail::chunk_stream_state, public stream { friend class detail::chunk_manager_group; + friend class chunk_stream_group; private: chunk_stream_group &group; // TODO: redundant - also stored inside the manager virtual void heap_ready(live_heap &&) override; + /** + * Flush all chunks with an ID strictly less than @a chunk_id. + * + * This function returns immediately, and the work is done later on the + * io_service. It is safe to call from any thread. + */ + void async_flush_until(std::int64_t chunk_id); + public: using heap_metadata = detail::chunk_stream_state_base::heap_metadata; diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index c6582f9b1..dde3a7626 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -72,8 +72,7 @@ chunk *chunk_manager_group::allocate_chunk( void chunk_manager_group::ready_chunk(chunk_stream_state &state, chunk *c) { - std::uint64_t *batch_stats = static_cast(&state)->batch_stats.data(); - group.release_chunk(c, batch_stats); + group.release_chunk(c); } } // namespace detail @@ -106,38 +105,72 @@ void chunk_stream_group::stop() lock.lock(); while (chunks.get_head_chunk() != chunks.get_tail_chunk()) - chunks.flush_head([this](chunk *c) { release_chunk_unlocked(c, nullptr); }); + chunks.flush_head([this](chunk *c) { ready_chunk(c, nullptr); }); } chunk *chunk_stream_group::get_chunk(std::int64_t chunk_id, std::uintptr_t stream_id, std::uint64_t *batch_stats) { - std::lock_guard lock(mutex); + std::unique_lock lock(mutex); + /* Any chunk old enough be made ready needs to first be released by the + * member streams. To do that, we request all the streams to flush, then + * wait until it is safe, using the condition variable to wake up + * whenever there is forward progress. + * + * Another thread may call get_chunk in the meantime and advance the + * window, so we must be careful not to assume anything about the + * state after a wait. + */ + const std::size_t max_chunks = config.get_max_chunks(); + if (chunk_id >= chunks.get_head_chunk() + max_chunks) + { + std::int64_t target = chunk_id - max_chunks + 1; // first chunk we don't need to flush + for (chunk_stream_group_member *s : streams) + s->async_flush_until(target); + std::int64_t to_check = chunks.get_head_chunk(); // next chunk to wait for + while (true) + { + bool good = true; + std::int64_t limit = std::min(chunks.get_tail_chunk(), target); + to_check = std::max(chunks.get_head_chunk(), to_check); + for (; to_check < limit; to_check++) + { + chunk *c = chunks.get_chunk(to_check); + if (c && c->ref_count > 0) + { + good = false; // Still need to wait longer for this chunk + break; + } + } + if (good) + break; + ready_condition.wait(lock); + } + } + chunk *c = chunks.get_chunk( chunk_id, stream_id, [this, batch_stats](std::int64_t id) { return config.get_allocate()(id, batch_stats).release(); }, - [this, batch_stats](chunk *c) { release_chunk_unlocked(c, batch_stats); } + [this, batch_stats](chunk *c) { ready_chunk(c, batch_stats); } ); if (c) c->ref_count++; return c; } -void chunk_stream_group::release_chunk_unlocked(chunk *c, std::uint64_t *batch_stats) +void chunk_stream_group::ready_chunk(chunk *c, std::uint64_t *batch_stats) { - if (--c->ref_count == 0) - { - std::unique_ptr owned(c); - config.get_ready()(std::move(owned), batch_stats); - } + std::unique_ptr owned(c); + config.get_ready()(std::move(owned), batch_stats); } -void chunk_stream_group::release_chunk(chunk *c, std::uint64_t *batch_stats) +void chunk_stream_group::release_chunk(chunk *c) { std::lock_guard lock(mutex); - release_chunk_unlocked(c, batch_stats); + if (--c->ref_count == 0) + ready_condition.notify_all(); } void chunk_stream_group::stream_added(chunk_stream_group_member &s) @@ -172,6 +205,24 @@ void chunk_stream_group_member::heap_ready(live_heap &&lh) do_heap_ready(std::move(lh)); } +void chunk_stream_group_member::async_flush_until(std::int64_t chunk_id) +{ + std::shared_ptr shared = this->shared; + // TODO: once we depend on C++14, move rather than copying into the lambda + boost::asio::post(get_io_service(), [shared, chunk_id]() { + std::lock_guard lock(shared->queue_mutex); + if (!shared->self) + return; // We've stopped, which means everything is flushed + chunk_stream_group_member *self = static_cast(shared->self); + while (self->chunks.get_head_chunk() < chunk_id) + { + self->chunks.flush_head([self](chunk *c) { + self->group.release_chunk(c); + }); + } + }); +} + void chunk_stream_group_member::stop_received() { stream::stop_received(); From eca67b763400b88220f9cef8244e5a8398dda2b1 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Fri, 23 Jun 2023 10:39:38 +0200 Subject: [PATCH 26/74] Prevent stream's max_chunks from exceeding the group's This would potentially lead to deadlocks. --- src/recv_chunk_stream_group.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index dde3a7626..c577d0403 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -197,6 +197,8 @@ chunk_stream_group_member::chunk_stream_group_member( stream(std::move(io_service), adjust_config(config)), group(group) { + if (chunk_config.get_max_chunks() > group.config.get_max_chunks()) + throw std::invalid_argument("stream max_chunks must not be larger than group max_chunks"); group.stream_added(*this); } From 108a4bf9ef08135bcf8c0a83f6bc109ed5cfdd34 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Fri, 23 Jun 2023 11:34:54 +0200 Subject: [PATCH 27/74] Ensure that stopping group member streams doesn't lose chunks When a stream flushes a chunk, also flush from the group as far as possible. --- include/spead2/recv_chunk_stream_group.h | 2 +- src/recv_chunk_stream_group.cpp | 26 +++++++++++++++++++----- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h index 9de2ff441..1a25e54be 100644 --- a/include/spead2/recv_chunk_stream_group.h +++ b/include/spead2/recv_chunk_stream_group.h @@ -147,7 +147,7 @@ class chunk_stream_group * * This function is thread-safe. */ - void release_chunk(chunk *c); + void release_chunk(chunk *c, std::uint64_t *batch_stats); /// Pass a chunk to the user-provided ready function void ready_chunk(chunk *c, std::uint64_t *batch_stats); diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index c577d0403..5dde8db27 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -72,7 +72,8 @@ chunk *chunk_manager_group::allocate_chunk( void chunk_manager_group::ready_chunk(chunk_stream_state &state, chunk *c) { - group.release_chunk(c); + std::uint64_t *batch_stats = static_cast(&state)->batch_stats.data(); + group.release_chunk(c, batch_stats); } } // namespace detail @@ -121,7 +122,7 @@ chunk *chunk_stream_group::get_chunk(std::int64_t chunk_id, std::uintptr_t strea * state after a wait. */ const std::size_t max_chunks = config.get_max_chunks(); - if (chunk_id >= chunks.get_head_chunk() + max_chunks) + if (chunk_id >= chunks.get_head_chunk() + std::int64_t(max_chunks)) { std::int64_t target = chunk_id - max_chunks + 1; // first chunk we don't need to flush for (chunk_stream_group_member *s : streams) @@ -166,11 +167,26 @@ void chunk_stream_group::ready_chunk(chunk *c, std::uint64_t *batch_stats) config.get_ready()(std::move(owned), batch_stats); } -void chunk_stream_group::release_chunk(chunk *c) +void chunk_stream_group::release_chunk(chunk *c, std::uint64_t *batch_stats) { std::lock_guard lock(mutex); if (--c->ref_count == 0) + { + /* Proactively flush chunks that have been fully released. + * This ensures that if the member stream is stopping, we + * have a chance to make the chunks ready before we shut + * everything down. + */ + while (chunks.get_head_chunk() != chunks.get_tail_chunk()) + { + chunk *c = chunks.get_chunk(chunks.get_head_chunk()); + if (c && c->ref_count == 0) + chunks.flush_head([this, batch_stats](chunk *c2) { ready_chunk(c2, batch_stats); }); + else + break; + } ready_condition.notify_all(); + } } void chunk_stream_group::stream_added(chunk_stream_group_member &s) @@ -219,7 +235,7 @@ void chunk_stream_group_member::async_flush_until(std::int64_t chunk_id) while (self->chunks.get_head_chunk() < chunk_id) { self->chunks.flush_head([self](chunk *c) { - self->group.release_chunk(c); + self->group.release_chunk(c, self->batch_stats.data()); }); } }); @@ -228,8 +244,8 @@ void chunk_stream_group_member::async_flush_until(std::int64_t chunk_id) void chunk_stream_group_member::stop_received() { stream::stop_received(); - group.stream_stop_received(*this); flush_chunks(); + group.stream_stop_received(*this); } void chunk_stream_group_member::stop() From f91d9462a8c0486fe922975325a8a0b631902117 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Fri, 23 Jun 2023 13:00:27 +0200 Subject: [PATCH 28/74] Add initial unit tests for chunk_stream_group --- tests/test_recv_chunk_stream_group.py | 148 ++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 tests/test_recv_chunk_stream_group.py diff --git a/tests/test_recv_chunk_stream_group.py b/tests/test_recv_chunk_stream_group.py new file mode 100644 index 000000000..1386b0571 --- /dev/null +++ b/tests/test_recv_chunk_stream_group.py @@ -0,0 +1,148 @@ +# Copyright 2023 National Research Foundation (SARAO) +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +import threading +import time + +import numpy as np +import pytest + +import spead2 +import spead2.recv as recv +import spead2.send as send + +from tests.test_recv_chunk_stream import ( + CHUNK_PAYLOAD_SIZE, HEAP_PAYLOAD_SIZE, HEAPS_PER_CHUNK, place_plain_llc +) + +STREAMS = 4 + + +class TestChunkStreamGroupConfig: + def test_default_construct(self): + config = recv.ChunkStreamGroupConfig() + assert config.max_chunks == config.DEFAULT_MAX_CHUNKS + + def test_zero_max_chunks(self): + with pytest.raises(ValueError): + recv.ChunkStreamGroupConfig(max_chunks=0) + + def test_max_chunks(self): + config = recv.ChunkStreamGroupConfig(max_chunks=3) + assert config.max_chunks == 3 + config.max_chunks = 4 + assert config.max_chunks == 4 + + +class TestChunkStreamRingGroup: + @pytest.fixture + def data_ring(self): + return spead2.recv.ChunkRingbuffer(4) + + @pytest.fixture + def free_ring(self): + ring = spead2.recv.ChunkRingbuffer(4) + while not ring.full(): + ring.put( + recv.Chunk( + present=np.zeros(HEAPS_PER_CHUNK, np.uint8), + data=np.zeros(CHUNK_PAYLOAD_SIZE, np.uint8) + ) + ) + return ring + + @pytest.fixture + def queues(self): + return [spead2.InprocQueue() for _ in range(STREAMS)] + + @pytest.fixture + def group(self, data_ring, free_ring): + group_config = recv.ChunkStreamGroupConfig(max_chunks=4) + group = recv.ChunkStreamRingGroup(group_config, data_ring, free_ring) + yield group + group.stop() + + @pytest.fixture + def recv_streams(self, queues, group): + # max_heaps is artificially high to make test_packet_too_old work + config = spead2.recv.StreamConfig(max_heaps=128) + chunk_stream_config = spead2.recv.ChunkStreamConfig( + items=[0x1000, spead2.HEAP_LENGTH_ID], + max_chunks=4, + place=place_plain_llc, + ) + streams = [spead2.recv.ChunkStreamGroupMember( + spead2.ThreadPool(), + config=config, + chunk_stream_config=chunk_stream_config, + group=group + ) for _ in queues] + for stream, queue in zip(streams, queues): + stream.add_inproc_reader(queue) + yield streams + for stream in streams: + stream.stop() + + @pytest.fixture + def send_stream(self, queues): + return send.InprocStream(spead2.ThreadPool(), queues, send.StreamConfig()) + + @pytest.fixture + def item_group(self): + ig = spead2.send.ItemGroup() + ig.add_item(0x1000, 'position', 'position in stream', (), format=[('u', 32)]) + ig.add_item(0x1001, 'payload', 'payload data', (HEAP_PAYLOAD_SIZE,), dtype=np.uint8) + return ig + + def test_full_in_order(self, group, queues, recv_streams, send_stream, data_ring, free_ring, item_group): + chunks = 20 + rng = np.random.default_rng(seed=1) + data = rng.integers(0, 256, chunks * CHUNK_PAYLOAD_SIZE, np.uint8) + data_by_chunk = data.reshape(chunks, -1) + data_by_heap = data.reshape(chunks * HEAPS_PER_CHUNK, -1) + + # Stream groups are impractical to test deterministically, because + # they rely on concurrent forward progress. So we just feed the + # data in slowly enough that we expect heaps provided before a + # sleep to be processed before those after the sleep. + def send_data(): + for i, payload in enumerate(data_by_heap): + if i % STREAMS == 0: + time.sleep(0.005) + item_group['position'].value = i + item_group['payload'].value = payload + heap = item_group.get_heap(data='all', descriptors='none') + send_stream.send_heap(heap, substream_index=i % STREAMS) + # Stop all the queues, which should flush everything and stop the data + # ring. + time.sleep(0.01) + for queue in queues: + queue.stop() + + send_thread = threading.Thread(target=send_data) + send_thread.start() + + for i in range(chunks): + chunk = data_ring.get() + assert chunk.chunk_id == i + np.testing.assert_equal(chunk.present, 1) + np.testing.assert_equal(chunk.data, data_by_chunk[i]) + group.add_free_chunk(chunk) + + # Stopping all the queues should shut down the data ringbuffer + with pytest.raises(spead2.Stopped): + data_ring.get() + + send_thread.join() From d2da04692ca1e62fb55e482d9c2389d479cd92b1 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Fri, 23 Jun 2023 13:45:06 +0200 Subject: [PATCH 29/74] Add chunk_window::empty helper This simplifies some code. Also removed chunk_stream_state::flush_head since it wasn't necessary. --- include/spead2/recv_chunk_stream.h | 17 +++++------------ src/recv_chunk_stream_group.cpp | 4 ++-- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/include/spead2/recv_chunk_stream.h b/include/spead2/recv_chunk_stream.h index 5200451bb..9c3defc1c 100644 --- a/include/spead2/recv_chunk_stream.h +++ b/include/spead2/recv_chunk_stream.h @@ -290,7 +290,7 @@ class chunk_window * We leave it to the while loop below to actually allocate * the chunks. */ - while (head_chunk != tail_chunk) + while (!empty()) flush_head(ready_chunk); head_chunk = tail_chunk = chunk_id - (max_chunks - 1); head_pos = tail_pos = 0; @@ -322,6 +322,7 @@ class chunk_window std::int64_t get_head_chunk() const { return head_chunk; } std::int64_t get_tail_chunk() const { return tail_chunk; } + bool empty() const { return head_chunk == tail_chunk; } }; template class chunk_stream_allocator; @@ -417,8 +418,6 @@ class chunk_stream_state : public chunk_stream_state_base friend chunk_manager_t; chunk_manager_t chunk_manager; - /// Send the oldest chunk to the ready callback - void flush_head(); public: /// Constructor @@ -439,7 +438,7 @@ class chunk_stream_state : public chunk_stream_state_base std::pair allocate( std::size_t size, const packet_header &packet); - /// Send all in-flight chunks to the ready callback + /// Send all in-flight chunks to the ready callback (not thread-safe) void flush_chunks(); }; @@ -671,17 +670,11 @@ stream_config chunk_stream_state::adjust_config(const stream_config &config) return new_config; } -template -void chunk_stream_state::flush_head() -{ - chunks.flush_head([this](chunk *c) { chunk_manager.ready_chunk(*this, c); }); -} - template void chunk_stream_state::flush_chunks() { - while (get_head_chunk() != get_tail_chunk()) - flush_head(); + while (!chunks.empty()) + chunks.flush_head([this](chunk *c) { chunk_manager.ready_chunk(*this, c); }); } template diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index 5dde8db27..657dea6ab 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -105,7 +105,7 @@ void chunk_stream_group::stop() stream->stop(); lock.lock(); - while (chunks.get_head_chunk() != chunks.get_tail_chunk()) + while (!chunks.empty()) chunks.flush_head([this](chunk *c) { ready_chunk(c, nullptr); }); } @@ -177,7 +177,7 @@ void chunk_stream_group::release_chunk(chunk *c, std::uint64_t *batch_stats) * have a chance to make the chunks ready before we shut * everything down. */ - while (chunks.get_head_chunk() != chunks.get_tail_chunk()) + while (!chunks.empty()) { chunk *c = chunks.get_chunk(chunks.get_head_chunk()); if (c && c->ref_count == 0) From c6b888f1f582b9f5a9de36c42a4ab6fb33e87887 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Fri, 23 Jun 2023 13:45:46 +0200 Subject: [PATCH 30/74] Fix bug in stream group flushing It could call flush_heap on an empty chunk_window, which is illegal. --- src/recv_chunk_stream_group.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index 657dea6ab..2b775092b 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -232,7 +232,7 @@ void chunk_stream_group_member::async_flush_until(std::int64_t chunk_id) if (!shared->self) return; // We've stopped, which means everything is flushed chunk_stream_group_member *self = static_cast(shared->self); - while (self->chunks.get_head_chunk() < chunk_id) + while (self->chunks.get_head_chunk() < chunk_id && !self->chunks.empty()) { self->chunks.flush_head([self](chunk *c) { self->group.release_chunk(c, self->batch_stats.data()); From 3a9fcbcd9f285aa15ce16ed77b97d656a2b1d842 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Fri, 23 Jun 2023 13:25:17 +0200 Subject: [PATCH 31/74] Add chunk_stream_group test where one stream does nothing --- tests/test_recv_chunk_stream_group.py | 83 ++++++++++++++++++--------- 1 file changed, 57 insertions(+), 26 deletions(-) diff --git a/tests/test_recv_chunk_stream_group.py b/tests/test_recv_chunk_stream_group.py index 1386b0571..b743f4470 100644 --- a/tests/test_recv_chunk_stream_group.py +++ b/tests/test_recv_chunk_stream_group.py @@ -99,39 +99,40 @@ def recv_streams(self, queues, group): def send_stream(self, queues): return send.InprocStream(spead2.ThreadPool(), queues, send.StreamConfig()) - @pytest.fixture - def item_group(self): + def _send_data(self, send_stream, data, heaps=None): + """Send the data. + + To send only a subset of heaps (or to send out of order), pass the + indices to skip in `heaps`. + """ + data_by_heap = data.reshape(-1, HEAP_PAYLOAD_SIZE) ig = spead2.send.ItemGroup() ig.add_item(0x1000, 'position', 'position in stream', (), format=[('u', 32)]) ig.add_item(0x1001, 'payload', 'payload data', (HEAP_PAYLOAD_SIZE,), dtype=np.uint8) - return ig - - def test_full_in_order(self, group, queues, recv_streams, send_stream, data_ring, free_ring, item_group): - chunks = 20 - rng = np.random.default_rng(seed=1) - data = rng.integers(0, 256, chunks * CHUNK_PAYLOAD_SIZE, np.uint8) - data_by_chunk = data.reshape(chunks, -1) - data_by_heap = data.reshape(chunks * HEAPS_PER_CHUNK, -1) - # Stream groups are impractical to test deterministically, because # they rely on concurrent forward progress. So we just feed the # data in slowly enough that we expect heaps provided before a # sleep to be processed before those after the sleep. - def send_data(): - for i, payload in enumerate(data_by_heap): - if i % STREAMS == 0: - time.sleep(0.005) - item_group['position'].value = i - item_group['payload'].value = payload - heap = item_group.get_heap(data='all', descriptors='none') - send_stream.send_heap(heap, substream_index=i % STREAMS) - # Stop all the queues, which should flush everything and stop the data - # ring. - time.sleep(0.01) - for queue in queues: - queue.stop() - - send_thread = threading.Thread(target=send_data) + if heaps is None: + heaps = range(len(data_by_heap)) + for i in heaps: + ig['position'].value = i + ig['payload'].value = data_by_heap[i] + heap = ig.get_heap(data='all', descriptors='none') + send_stream.send_heap(heap, substream_index=i % STREAMS) + time.sleep(0.001) + # Stop all the queues, which should flush everything and stop the + # data ring. + for queue in send_stream.queues: + queue.stop() + + def test_full_in_order(self, group, queues, recv_streams, send_stream, data_ring, free_ring): + """Send all the data, in order.""" + chunks = 20 + rng = np.random.default_rng(seed=1) + data = rng.integers(0, 256, chunks * CHUNK_PAYLOAD_SIZE, np.uint8) + data_by_chunk = data.reshape(chunks, -1) + send_thread = threading.Thread(target=self._send_data, args=(send_stream, data)) send_thread.start() for i in range(chunks): @@ -146,3 +147,33 @@ def send_data(): data_ring.get() send_thread.join() + + def test_missing_stream(self, group, queues, recv_streams, send_stream, data_ring, free_ring): + """Skip sending data to one of the streams.""" + chunks = 20 + rng = np.random.default_rng(seed=1) + data = rng.integers(0, 256, chunks * CHUNK_PAYLOAD_SIZE, np.uint8) + data_by_heap = data.reshape(chunks, HEAPS_PER_CHUNK, -1) + heaps = [i for i in range(chunks * HEAPS_PER_CHUNK) if i % STREAMS != 2] + send_thread = threading.Thread(target=self._send_data, args=(send_stream, data, heaps)) + send_thread.start() + + expected_present = np.ones(chunks * HEAPS_PER_CHUNK, bool) + expected_present[2::STREAMS] = False + expected_present = expected_present.reshape(chunks, HEAPS_PER_CHUNK) + + for i in range(chunks): + chunk = data_ring.get() + assert chunk.chunk_id == i + np.testing.assert_equal(chunk.present, expected_present[i]) + actual_data = chunk.data.reshape(HEAPS_PER_CHUNK, -1) + for j in range(HEAPS_PER_CHUNK): + if expected_present[i, j]: + np.testing.assert_equal(actual_data[j], data_by_heap[i, j]) + group.add_free_chunk(chunk) + + # Stopping all the queues should shut down the data ringbuffer + with pytest.raises(spead2.Stopped): + data_ring.get() + + send_thread.join() From 88eca2d9f0a7368b65677dc6a44ca2cc055c411e Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Fri, 23 Jun 2023 16:33:34 +0200 Subject: [PATCH 32/74] Add overview documentation for chunk_stream_group --- doc/advanced.rst | 1 + doc/recv-chunk-group.rst | 77 ++++++++++++++++++++++++++++++++++++++++ doc/recv-chunk.rst | 2 ++ 3 files changed, 80 insertions(+) create mode 100644 doc/recv-chunk-group.rst diff --git a/doc/advanced.rst b/doc/advanced.rst index fe6ac3bdf..b048f2b29 100644 --- a/doc/advanced.rst +++ b/doc/advanced.rst @@ -5,4 +5,5 @@ Advanced features :maxdepth: 2 recv-chunk + recv-chunk-group recv-stats diff --git a/doc/recv-chunk-group.rst b/doc/recv-chunk-group.rst new file mode 100644 index 000000000..d64a3fa72 --- /dev/null +++ b/doc/recv-chunk-group.rst @@ -0,0 +1,77 @@ +Chunking stream groups +====================== + +While the :doc:`recv-chunk` allows for high-bandwidth streams to be received +with low overhead, it still has a fundamental scaling limitation: each chunk +can only be constructed from a single thread. :dfn:`Chunk stream groups` allow +this overhead to be overcome, although not without caveats. + +Each stream is still limited to a single thread. However, a :dfn:`group` of +streams can share the same sequence of chunks, with each stream contributing +a subset of the data in each chunk. Making use of this feature requires +that load balancing is implemented at the network level, using different +destination addresses or ports so that the incoming heaps can be multiplexed +into multiple streams. + +As with a single chunk stream, the group keeps a sliding window of chunks and +obtains new ones from an allocation callback. When the window slides forward, +chunks that fall out the back of the window are provided to a ready callback. +Each member stream also has its own sliding window, which can be smaller (but not +larger) than the group's window. When the group's window slides forward, the +streams' windows are adjusted to ensure they still fit within the group's +window. This can lead to chunks being removed from a stream even though there +is still data for them in the stream. In other words, a stream's window +determines how much reordering is tolerated within a stream, while the group's +window determines how out of sync the streams are allowed to become. When +choosing window sizes, one needs to remember that desynchronisation isn't +confined to the network: it can also happen if the threads servicing the +streams aren't all getting the same amount of CPU time. + +The general flow (in C++) is + +1. Create a :cpp:class:`~spead2::recv::chunk_stream_group_config`. +2. Create a :cpp:class:`~spead2::recv::chunk_stream_group`. +3. Create multiple instances of + :cpp:class:`~spead2::recv::chunk_stream_group_member`, each referencing the + group. +4. Add readers to the streams. +5. Process the data. +6. Optionally, call :cpp:func:`spead2::recv::chunk_stream_group::stop()` + (otherwise it will be called on destruction). +7. Destroy the member streams (this must be done before destroying the group). +8. Destroy the group. + +In Python the process is similar, although garbage collection replaces +explicit destruction. + +Ringbuffer convenience API +-------------------------- +As for standalone chunk streams, there is a simplified API using ringbuffers, +which is also the only API available for Python. A +:cpp:class:`~spead2::recv::chunk_stream_ring_group` is a group that allocates +data from one ringbuffer and send ready data to another. The description of +:ref:`that api ` largely applies here too. The +ringbuffers can be shared between groups. + +Caveats +------- +This is an advanced API that sacrifices some user-friendlyness for +performance, and thus some care is needed to use it safely. + +- It is vital that all the streams can make forward progress independently, + as otherwise deadlocks can occur. For example, if they share a thread pool, + the pool must have at least as many threads as streams. It's recommended + that each stream has its own single-threaded thread pool. +- The streams should all be added to the group before adding any readers to + the streams. Things will probably work even if this is not done, but the + design is sufficiently complicated that it is not advisable. +- The stream ID associated with each chunk will be the stream ID of one of the + component streams, but it is undefined which one. +- When the allocate and ready callbacks are invoked, it's not specified which + stream's batch statistics pointer will be passed. For the ready callback, + the `batch_stats` parameter may also be null (currently this can only happen + during :cpp:func:`spead2::recv::chunk_stream_group::stop`). +- Data can be lost, even if the member streams are all lossless, if a stream + falls behind the others. A lossless mode may be added in future. +- Two streams must not write to the same bytes of a chunk (in the payload, + present array or extra data), as this is undefined behaviour in C++. diff --git a/doc/recv-chunk.rst b/doc/recv-chunk.rst index 654691ebb..6188946a7 100644 --- a/doc/recv-chunk.rst +++ b/doc/recv-chunk.rst @@ -105,6 +105,8 @@ At present it is only possible to write a contiguous piece of data per heap. The data is transferred to the chunk even if the heap is incomplete (and hence not marked in the ``present`` array). +.. _recv-chunk-ringbuffer: + Ringbuffer convenience API -------------------------- A subclass is provided that takes care of the allocation and ready callbacks From 9339d5776d59883689f3a9265c25253676edfce9 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Fri, 23 Jun 2023 21:12:24 +0200 Subject: [PATCH 33/74] Add chunk_stream_group_config::eviction_mode --- doc/recv-chunk-group.rst | 22 +++++++++++++--------- include/spead2/recv_chunk_stream_group.h | 24 ++++++++++++++++++++++++ src/py_recv.cpp | 9 ++++++++- src/recv_chunk_stream_group.cpp | 11 +++++++++-- tests/test_recv_chunk_stream_group.py | 8 ++++++++ 5 files changed, 62 insertions(+), 12 deletions(-) diff --git a/doc/recv-chunk-group.rst b/doc/recv-chunk-group.rst index d64a3fa72..bef995560 100644 --- a/doc/recv-chunk-group.rst +++ b/doc/recv-chunk-group.rst @@ -19,13 +19,19 @@ chunks that fall out the back of the window are provided to a ready callback. Each member stream also has its own sliding window, which can be smaller (but not larger) than the group's window. When the group's window slides forward, the streams' windows are adjusted to ensure they still fit within the group's -window. This can lead to chunks being removed from a stream even though there -is still data for them in the stream. In other words, a stream's window -determines how much reordering is tolerated within a stream, while the group's -window determines how out of sync the streams are allowed to become. When -choosing window sizes, one needs to remember that desynchronisation isn't -confined to the network: it can also happen if the threads servicing the -streams aren't all getting the same amount of CPU time. +window. In other words, a stream's window determines how much reordering is +tolerated within a stream, while the group's window determines how out of sync +the streams are allowed to become. + +When desynchronisation does occur, there is a choice of strategies. The default +strategy is eager but potentially lossy: when the group's window moves forward, +the trailing chunk is marked ready as soon as possible, even if this causes +some stream windows to shrink below their normal size. An alternative strategy +is lossless: when the group's window needs to move forward, it is blocked +until all the member streams have caught up. This latter mode is intended for +use with lossless transports such as TCP. However, if one of the component streams +stops functioning (for example, because it is routed on a network path that is +down) it prevents the entire group from making forward progress. The general flow (in C++) is @@ -71,7 +77,5 @@ performance, and thus some care is needed to use it safely. stream's batch statistics pointer will be passed. For the ready callback, the `batch_stats` parameter may also be null (currently this can only happen during :cpp:func:`spead2::recv::chunk_stream_group::stop`). -- Data can be lost, even if the member streams are all lossless, if a stream - falls behind the others. A lossless mode may be added in future. - Two streams must not write to the same bytes of a chunk (in the payload, present array or extra data), as this is undefined behaviour in C++. diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h index 1a25e54be..158fff904 100644 --- a/include/spead2/recv_chunk_stream_group.h +++ b/include/spead2/recv_chunk_stream_group.h @@ -42,8 +42,15 @@ class chunk_stream_group_config /// Default value for @ref set_max_chunks static constexpr std::size_t default_max_chunks = chunk_stream_config::default_max_chunks; + enum class eviction_mode + { + LOSSY, + LOSSLESS + }; + private: std::size_t max_chunks = default_max_chunks; + eviction_mode eviction_mode_ = eviction_mode::LOSSY; chunk_allocate_function allocate; chunk_ready_function ready; @@ -59,6 +66,23 @@ class chunk_stream_group_config /// Return the maximum number of chunks that can be live at the same time. std::size_t get_max_chunks() const { return max_chunks; } + /** + * Set chunk eviction mode. When set to @ref eviction_mode::LOSSLESS, a + * chunk will only be marked ready when all streams have marked it ready + * (due to either stopping or receiving newer data). This is recommended + * when the individual streams have lossless transports (such as TCP). If + * one of the streams stops receiving data (due to a broken network link), + * it will prevent forward progress of the entire group. + * + * Conversely, using @ref eviction_mode::LOSSY (the default) will allow + * progress to continue (with partial data) if one of the streams stops + * receiving data, but one stream falling behind another can lead data + * being discarded even when the underlying transports are lossless. + */ + chunk_stream_group_config &set_eviction_mode(eviction_mode eviction_mode_); + /// Return the current eviction mode + eviction_mode get_eviction_mode() const { return eviction_mode_; } + /// Set the function used to allocate a chunk. chunk_stream_group_config &set_allocate(chunk_allocate_function allocate); /// Get the function used to allocate a chunk. diff --git a/src/py_recv.cpp b/src/py_recv.cpp index 0d013c6a9..28e35777e 100644 --- a/src/py_recv.cpp +++ b/src/py_recv.cpp @@ -983,12 +983,19 @@ py::module register_module(py::module &parent) } }); - py::class_(m, "ChunkStreamGroupConfig") + py::class_ chunk_stream_group_config_cls(m, "ChunkStreamGroupConfig"); + chunk_stream_group_config_cls .def(py::init(&data_class_constructor)) .def_property("max_chunks", SPEAD2_PTMF(chunk_stream_group_config, get_max_chunks), SPEAD2_PTMF(chunk_stream_group_config, set_max_chunks)) + .def_property("eviction_mode", + SPEAD2_PTMF(chunk_stream_group_config, get_eviction_mode), + SPEAD2_PTMF(chunk_stream_group_config, set_eviction_mode)) .def_readonly_static("DEFAULT_MAX_CHUNKS", &chunk_stream_group_config::default_max_chunks); + py::enum_(chunk_stream_group_config_cls, "EvictionMode") + .value("LOSSY", chunk_stream_group_config::eviction_mode::LOSSY) + .value("LOSSLESS", chunk_stream_group_config::eviction_mode::LOSSLESS); py::class_>(m, "ChunkStreamRingGroup") diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index 2b775092b..5e5d99ea9 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -39,6 +39,12 @@ chunk_stream_group_config &chunk_stream_group_config::set_max_chunks(std::size_t return *this; } +chunk_stream_group_config &chunk_stream_group_config::set_eviction_mode(eviction_mode eviction_mode_) +{ + this->eviction_mode_ = eviction_mode_; + return *this; +} + chunk_stream_group_config &chunk_stream_group_config::set_allocate(chunk_allocate_function allocate) { this->allocate = std::move(allocate); @@ -125,8 +131,9 @@ chunk *chunk_stream_group::get_chunk(std::int64_t chunk_id, std::uintptr_t strea if (chunk_id >= chunks.get_head_chunk() + std::int64_t(max_chunks)) { std::int64_t target = chunk_id - max_chunks + 1; // first chunk we don't need to flush - for (chunk_stream_group_member *s : streams) - s->async_flush_until(target); + if (config.get_eviction_mode() == chunk_stream_group_config::eviction_mode::LOSSY) + for (chunk_stream_group_member *s : streams) + s->async_flush_until(target); std::int64_t to_check = chunks.get_head_chunk(); // next chunk to wait for while (true) { diff --git a/tests/test_recv_chunk_stream_group.py b/tests/test_recv_chunk_stream_group.py index b743f4470..9937e32d5 100644 --- a/tests/test_recv_chunk_stream_group.py +++ b/tests/test_recv_chunk_stream_group.py @@ -34,6 +34,7 @@ class TestChunkStreamGroupConfig: def test_default_construct(self): config = recv.ChunkStreamGroupConfig() assert config.max_chunks == config.DEFAULT_MAX_CHUNKS + assert config.eviction_mode == recv.ChunkStreamGroupConfig.EvictionMode.LOSSY def test_zero_max_chunks(self): with pytest.raises(ValueError): @@ -45,6 +46,13 @@ def test_max_chunks(self): config.max_chunks = 4 assert config.max_chunks == 4 + def test_eviction_mode(self): + EvictionMode = recv.ChunkStreamGroupConfig.EvictionMode + config = recv.ChunkStreamGroupConfig(eviction_mode=EvictionMode.LOSSLESS) + assert config.eviction_mode == EvictionMode.LOSSLESS + config.eviction_mode = EvictionMode.LOSSY + assert config.eviction_mode == EvictionMode.LOSSY + class TestChunkStreamRingGroup: @pytest.fixture From 5fb5be814d6512d8fdefb480b2ca1c1a67614e18 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Mon, 26 Jun 2023 12:04:03 +0200 Subject: [PATCH 34/74] Make chunk_stream_group own the streams Instead of the user creating streams which add themselves to the group (and remove themselves on stop), have the group keep a fixed set of streams. This has the advantage that the user is not responsible for keeping references to the streams to keep them alive / destroy them when appropriate. --- include/spead2/recv_chunk_stream_group.h | 92 +++++++++++++++++++----- src/py_recv.cpp | 44 +++++++++--- src/recv_chunk_stream_group.cpp | 76 ++++++++++++-------- tests/test_recv_chunk_stream_group.py | 30 ++++---- 4 files changed, 166 insertions(+), 76 deletions(-) diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h index 158fff904..fda9c9b44 100644 --- a/include/spead2/recv_chunk_stream_group.h +++ b/include/spead2/recv_chunk_stream_group.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -119,6 +120,8 @@ class chunk_stream_group_member; /** * A holder for a collection of streams that share chunks. * + * The public interface must only be called from one thread at a time. + * * @todo write more documentation here */ class chunk_stream_group @@ -144,13 +147,14 @@ class chunk_stream_group detail::chunk_window chunks; /** - * References to the component streams that have not yet been stopped. + * The component streams. * - * Note that these are insufficient to actually keep the streams alive. - * The stream_stop_received callback ensures that we don't end up with - * dangling pointers. + * This is protected by the mutex, except that read-only access is always + * permitted in methods called by the user. This is safe because writes + * only happen in methods called by the user (@ref emplace_back), and the + * user is required to serialise their calls. */ - std::set streams; + std::vector> streams; /** * Obtain the chunk with a given ID. @@ -176,15 +180,30 @@ class chunk_stream_group /// Pass a chunk to the user-provided ready function void ready_chunk(chunk *c, std::uint64_t *batch_stats); + // Helper classes for implementing iterators + template + class dereference + { + public: + decltype(*std::declval()) operator()(const T &ptr) const { return *ptr; } + }; + + template + class dereference_const + { + public: + const decltype(*std::declval()) operator()(const T &ptr) const { return *ptr; } + }; + protected: - /// Called by newly-constructed streams - virtual void stream_added(chunk_stream_group_member &s); + /// Called by @ref emplace_back for newly-constructed streams + virtual void stream_added(chunk_stream_group_member &s) {} /** * Called when a stream stops (whether from the network or the user). * * The stream's @c queue_mutex is locked when this is called. */ - virtual void stream_stop_received(chunk_stream_group_member &s); + virtual void stream_stop_received(chunk_stream_group_member &s) {} /** * Called when the user stops (or destroys) a stream. * @@ -194,9 +213,40 @@ class chunk_stream_group virtual void stream_pre_stop(chunk_stream_group_member &s) {} public: + using iterator = boost::transform_iterator< + dereference>, + std::vector>::iterator + >; + using const_iterator = boost::transform_iterator< + dereference_const>, + std::vector>::const_iterator + >; + explicit chunk_stream_group(const chunk_stream_group_config &config); virtual ~chunk_stream_group(); + // Add a new stream + chunk_stream_group_member &emplace_back( + io_service_ref io_service, + const stream_config &config, + const chunk_stream_config &chunk_config); + + // Add a new stream, possibly of a subclass + template + T &emplace_back(Args&&... args); + + // Provide vector-like access to the streams + std::size_t size() const { return streams.size(); } + bool empty() const { return streams.empty(); } + chunk_stream_group_member &operator[](std::size_t index) { return *streams[index]; } + const chunk_stream_group_member &operator[](std::size_t index) const { return *streams[index]; } + iterator begin() noexcept; + iterator end() noexcept; + const_iterator begin() const noexcept; + const_iterator end() const noexcept; + const_iterator cbegin() const noexcept; + const_iterator cend() const noexcept; + /** * Stop all streams and release all chunks. This function must not be * called concurrently with creating or destroying streams, and no @@ -205,6 +255,17 @@ class chunk_stream_group virtual void stop(); }; +template +T &chunk_stream_group::emplace_back(Args&&... args) +{ + std::lock_guard lock(mutex); + std::unique_ptr stream(new T(*this, std::forward(args)...)); + chunk_stream_group_member &ret = *stream; + streams.push_back(std::move(stream)); + stream_added(ret); + return ret; +} + /** * Single single within a group managed by @ref chunk_stream_group. */ @@ -226,9 +287,7 @@ class chunk_stream_group_member : private detail::chunk_stream_state(m, "ChunkStreamGroupMember"); + py::class_>(m, "ChunkStreamRingGroup") .def(py::init(), py::keep_alive<1, 4>()) + .def( + "emplace_back", + [](chunk_stream_ring_group_wrapper &group, + std::shared_ptr thread_pool, + const stream_config &config, + const chunk_stream_config &chunk_stream_config) -> chunk_stream_group_member & { + return group.emplace_back(std::move(thread_pool), config, chunk_stream_config); + }, + "thread_pool"_a, "config"_a, "chunk_stream_config"_a, + py::return_value_policy::reference_internal + ) + .def("__len__", SPEAD2_PTMF(chunk_stream_ring_group_wrapper, size)) + .def( + "__getitem__", + [](chunk_stream_ring_group_wrapper &group, std::size_t index) -> chunk_stream_group_member & { + if (index < group.size()) + return group[index]; + else + throw py::index_error(); + }, + py::return_value_policy::reference_internal + ) + .def( + "__iter__", + [](chunk_stream_ring_group_wrapper &group) { + return py::make_iterator(group.begin(), group.end()); + }, + py::keep_alive<0, 1>() // keep the group alive while it is iterated + ) .def("stop", SPEAD2_PTMF(chunk_stream_ring_group_wrapper, stop)); - py::class_(m, "ChunkStreamGroupMember") - .def(py::init, - const stream_config &, - const chunk_stream_config &, - chunk_stream_ring_group_wrapper &>(), - "thread_pool"_a.none(false), - "config"_a = stream_config(), - "chunk_stream_config"_a, - "group"_a, - py::keep_alive<1, 5>()); // Keep the group alive return m; } diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index 5e5d99ea9..3e8dd978e 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -94,23 +94,53 @@ chunk_stream_group::~chunk_stream_group() stop(); } +chunk_stream_group::iterator chunk_stream_group::begin() noexcept +{ + return iterator(streams.begin()); +} + +chunk_stream_group::iterator chunk_stream_group::end() noexcept +{ + return iterator(streams.end()); +} + +chunk_stream_group::const_iterator chunk_stream_group::begin() const noexcept +{ + return const_iterator(streams.begin()); +} + +chunk_stream_group::const_iterator chunk_stream_group::end() const noexcept +{ + return const_iterator(streams.end()); +} + +chunk_stream_group::const_iterator chunk_stream_group::cbegin() const noexcept +{ + return const_iterator(streams.begin()); +} + +chunk_stream_group::const_iterator chunk_stream_group::cend() const noexcept +{ + return const_iterator(streams.end()); +} + +chunk_stream_group_member &chunk_stream_group::emplace_back( + io_service_ref io_service, + const stream_config &config, + const chunk_stream_config &chunk_config) +{ + return emplace_back(std::move(io_service), config, chunk_config); +} + void chunk_stream_group::stop() { - /* Streams will try to lock the group (and modify `streams`) while - * stopping, so we move the streams set into a local variable. - * - * The mutex is not held while stopping streams, so streams can - * asynchronously stop under us. That's okay because the contract for this - * function is that it's not allowed to occur concurrently with destroying - * streams. + /* The mutex is not held while stopping streams, so that callbacks + * triggered by stopping the streams can take the lock if necessary. */ - std::unique_lock lock(mutex); - auto streams_local = std::move(streams); - lock.unlock(); - for (auto stream : streams_local) + for (const auto &stream : streams) stream->stop(); - lock.lock(); + std::lock_guard lock(mutex); while (!chunks.empty()) chunks.flush_head([this](chunk *c) { ready_chunk(c, nullptr); }); } @@ -132,7 +162,7 @@ chunk *chunk_stream_group::get_chunk(std::int64_t chunk_id, std::uintptr_t strea { std::int64_t target = chunk_id - max_chunks + 1; // first chunk we don't need to flush if (config.get_eviction_mode() == chunk_stream_group_config::eviction_mode::LOSSY) - for (chunk_stream_group_member *s : streams) + for (const auto &s : streams) s->async_flush_until(target); std::int64_t to_check = chunks.get_head_chunk(); // next chunk to wait for while (true) @@ -196,33 +226,17 @@ void chunk_stream_group::release_chunk(chunk *c, std::uint64_t *batch_stats) } } -void chunk_stream_group::stream_added(chunk_stream_group_member &s) -{ - std::lock_guard lock(mutex); - bool added = streams.insert(&s).second; - assert(added); // should be impossible to add the same stream twice - (void) added; // suppress warning when NDEBUG is defined -} - -void chunk_stream_group::stream_stop_received(chunk_stream_group_member &s) -{ - std::lock_guard lock(mutex); - streams.erase(&s); -} - - chunk_stream_group_member::chunk_stream_group_member( + chunk_stream_group &group, io_service_ref io_service, const stream_config &config, - const chunk_stream_config &chunk_config, - chunk_stream_group &group) + const chunk_stream_config &chunk_config) : chunk_stream_state(config, chunk_config, detail::chunk_manager_group(group)), stream(std::move(io_service), adjust_config(config)), group(group) { if (chunk_config.get_max_chunks() > group.config.get_max_chunks()) throw std::invalid_argument("stream max_chunks must not be larger than group max_chunks"); - group.stream_added(*this); } void chunk_stream_group_member::heap_ready(live_heap &&lh) diff --git a/tests/test_recv_chunk_stream_group.py b/tests/test_recv_chunk_stream_group.py index 9937e32d5..25e431ddb 100644 --- a/tests/test_recv_chunk_stream_group.py +++ b/tests/test_recv_chunk_stream_group.py @@ -76,14 +76,9 @@ def queues(self): return [spead2.InprocQueue() for _ in range(STREAMS)] @pytest.fixture - def group(self, data_ring, free_ring): + def group(self, data_ring, free_ring, queues): group_config = recv.ChunkStreamGroupConfig(max_chunks=4) group = recv.ChunkStreamRingGroup(group_config, data_ring, free_ring) - yield group - group.stop() - - @pytest.fixture - def recv_streams(self, queues, group): # max_heaps is artificially high to make test_packet_too_old work config = spead2.recv.StreamConfig(max_heaps=128) chunk_stream_config = spead2.recv.ChunkStreamConfig( @@ -91,17 +86,16 @@ def recv_streams(self, queues, group): max_chunks=4, place=place_plain_llc, ) - streams = [spead2.recv.ChunkStreamGroupMember( - spead2.ThreadPool(), - config=config, - chunk_stream_config=chunk_stream_config, - group=group - ) for _ in queues] - for stream, queue in zip(streams, queues): + for queue in queues: + group.emplace_back( + spead2.ThreadPool(), + config=config, + chunk_stream_config=chunk_stream_config + ) + for stream, queue in zip(group, queues): stream.add_inproc_reader(queue) - yield streams - for stream in streams: - stream.stop() + yield group + group.stop() @pytest.fixture def send_stream(self, queues): @@ -134,7 +128,7 @@ def _send_data(self, send_stream, data, heaps=None): for queue in send_stream.queues: queue.stop() - def test_full_in_order(self, group, queues, recv_streams, send_stream, data_ring, free_ring): + def test_full_in_order(self, group, queues, send_stream, data_ring, free_ring): """Send all the data, in order.""" chunks = 20 rng = np.random.default_rng(seed=1) @@ -156,7 +150,7 @@ def test_full_in_order(self, group, queues, recv_streams, send_stream, data_ring send_thread.join() - def test_missing_stream(self, group, queues, recv_streams, send_stream, data_ring, free_ring): + def test_missing_stream(self, group, queues, send_stream, data_ring, free_ring): """Skip sending data to one of the streams.""" chunks = 20 rng = np.random.default_rng(seed=1) From 714f5c3e09dd00f5b59d1fbfd5e1be6d1d551a89 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Mon, 26 Jun 2023 14:54:53 +0200 Subject: [PATCH 35/74] Add ChunkRingPair to public interface --- src/spead2/recv/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/spead2/recv/__init__.py b/src/spead2/recv/__init__.py index fdd2ef86f..78ce41e99 100644 --- a/src/spead2/recv/__init__.py +++ b/src/spead2/recv/__init__.py @@ -56,6 +56,7 @@ from spead2._spead2.recv import ( # noqa: F401 Chunk, + ChunkRingPair, ChunkRingStream, ChunkRingbuffer, ChunkStreamConfig, From dda788c95fe06f7779a8a35ace7c1b824bc9c9c3 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Mon, 26 Jun 2023 15:11:56 +0200 Subject: [PATCH 36/74] Fix Python wrapper for chunk_ring_group It was using the wrong class to bind the getters for data_ringbuffer and free_ringbuffer. --- src/py_recv.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/py_recv.cpp b/src/py_recv.cpp index f4ba9c232..b0506c55e 100644 --- a/src/py_recv.cpp +++ b/src/py_recv.cpp @@ -895,10 +895,11 @@ py::module register_module(py::module &parent) [](chunk &c, memory_allocator::pointer &&value) { c.extra = std::move(value); }); // Don't allow ChunkRingPair to be constructed from Python. It exists // purely to be a base class. - py::class_>(m, "ChunkRingPair") + using chunk_ring_pair = detail::chunk_ring_pair; + py::class_(m, "ChunkRingPair") .def( "add_free_chunk", - [](detail::chunk_ring_pair &self, chunk &c) + [](chunk_ring_pair &self, chunk &c) { push_chunk( [&self](std::unique_ptr &&wrapper) @@ -909,8 +910,8 @@ py::module register_module(py::module &parent) ); }, "chunk"_a) - .def_property_readonly("data_ringbuffer", SPEAD2_PTMF(chunk_ring_stream_wrapper, get_data_ringbuffer)) - .def_property_readonly("free_ringbuffer", SPEAD2_PTMF(chunk_ring_stream_wrapper, get_free_ringbuffer)); + .def_property_readonly("data_ringbuffer", SPEAD2_PTMF(chunk_ring_pair, get_data_ringbuffer)) + .def_property_readonly("free_ringbuffer", SPEAD2_PTMF(chunk_ring_pair, get_free_ringbuffer)); py::class_, From a62fef288c0fb03d2bad21bd0eaa01a4a14489b7 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Tue, 27 Jun 2023 12:05:01 +0200 Subject: [PATCH 37/74] Fix a bug with unwanted sharing of stats between stream_config Copying a stream_config would leave them pointing at the same vector, with correctness consequences if either copy later adds more statistics. This happens as part of chunk_stream::adjust_config. This could also lead to race conditions if a config was modified after constructing a stream with it, since it would affect and stream_stats instances that it shared with. The type has been changed to shared_ptr> throughout to prevent modifying shared data. This makes add_stat much less efficient (it copies the vector every time) but that is not on the critical path. --- doc/changelog.rst | 8 ++++++++ include/spead2/recv_stream.h | 17 ++++++++++++----- src/recv_stream.cpp | 19 +++++++++---------- 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/doc/changelog.rst b/doc/changelog.rst index ddb9dafeb..f08fdfc3b 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -5,6 +5,14 @@ Changelog - Update :meth:`!test_async_flush` and :meth:`!test_async_flush_fail` to keep handles to async tasks, to prevent them being garbage collected too early. +- Fix a bug where copying a :cpp:class:`spead2::recv::stream_config` would not + deep copy the names of custom statistics, and so any statistics added to the + copy would also affect the original, and there were also potential race + conditions if a stream config was modified while holding stream statistics. +- Fix a bug (caused by the bug above) where passing a + :cpp:class:`spead2::recv::stream_config` to construct a + :cpp:class:`spead2::recv::chunk_stream` would modify the config. Passing + the same config to construct two chunk streams would fail with an error. .. rubric:: 3.11.1 diff --git a/include/spead2/recv_stream.h b/include/spead2/recv_stream.h index c72e0e0e1..3a1ca7bda 100644 --- a/include/spead2/recv_stream.h +++ b/include/spead2/recv_stream.h @@ -184,7 +184,7 @@ class stream_stats_iterator : public boost::iterator_facade< class stream_stats { private: - std::shared_ptr> config; + std::shared_ptr> config; std::vector values; public: @@ -206,9 +206,9 @@ class stream_stats /// Construct with the default set of statistics, and all zero values stream_stats(); /// Construct with all zero values - explicit stream_stats(std::shared_ptr> config); + explicit stream_stats(std::shared_ptr> config); /// Construct with provided values - stream_stats(std::shared_ptr> config, + stream_stats(std::shared_ptr> config, std::vector values); /* Copy constructor and copy assignment need to be implemented manually @@ -357,8 +357,15 @@ class stream_config bool allow_out_of_order = false; /// A user-defined identifier for a stream std::uintptr_t stream_id = 0; - /// Statistics (includes the built-in ones) - std::shared_ptr> stats; + /** Statistics (includes the built-in ones) + * + * This is a shared_ptr so that instances of @ref stream_stats can share + * it. Every modification creates a new vector (copy-on-write). This is + * potentially very inefficient, since it creates a copy even when there + * are no sharers, but there are not expected to be huge numbers of + * statistics. + */ + std::shared_ptr> stats; public: stream_config(); diff --git a/src/recv_stream.cpp b/src/recv_stream.cpp index 98d16e80d..fa3fcd847 100644 --- a/src/recv_stream.cpp +++ b/src/recv_stream.cpp @@ -98,7 +98,7 @@ static std::size_t get_stat_index( } -static std::shared_ptr> make_default_stats() +static std::shared_ptr> make_default_stats() { auto stats = std::make_shared>(); // Keep this in sync with the stream_stat_* constexprs in the header @@ -121,21 +121,21 @@ static std::shared_ptr> make_default_stats() * Sharing this means the compatibility check for operator+ requires only a * pointer comparison rather than comparing arrays. */ -static std::shared_ptr> default_stats = make_default_stats(); +static std::shared_ptr> default_stats = make_default_stats(); stream_stats::stream_stats() : stream_stats(default_stats) { } -stream_stats::stream_stats(std::shared_ptr> config) +stream_stats::stream_stats(std::shared_ptr> config) : stream_stats(config, std::vector(config->size())) { // Note: annoyingly, can't use std::move(config) above, because we access // config to get the size to use for the vector. } -stream_stats::stream_stats(std::shared_ptr> config, +stream_stats::stream_stats(std::shared_ptr> config, std::vector values) : config(std::move(config)), values(std::move(values)), @@ -358,12 +358,11 @@ std::size_t stream_config::add_stat(std::string name, stream_stat_config::mode m { if (spead2::recv::get_stat_index_nothrow(*stats, name) != stats->size()) throw std::invalid_argument("A statistic called " + name + " already exists"); - // If we're pointing at the default, make a copy so that we don't modify - // the default. - if (stats == default_stats) - stats = std::make_shared>(*default_stats); - std::size_t index = stats->size(); - stats->emplace_back(std::move(name), mode); + // Make a copy so that we don't modify any shared copies + auto new_stats = std::make_shared>(*stats); + std::size_t index = new_stats->size(); + new_stats->emplace_back(std::move(name), mode); + stats = std::move(new_stats); return index; } From c67734cfb3cfb90e974a9cc83ef5cfb2aae0e260 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Tue, 27 Jun 2023 15:34:40 +0200 Subject: [PATCH 38/74] Update recv/__init__.pyi for chunk stream groups --- src/spead2/recv/__init__.pyi | 39 +++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/src/spead2/recv/__init__.pyi b/src/spead2/recv/__init__.pyi index 9f30fccc2..37d0cd5e0 100644 --- a/src/spead2/recv/__init__.pyi +++ b/src/spead2/recv/__init__.pyi @@ -1,4 +1,4 @@ -# Copyright 2019-2022 National Research Foundation (SARAO) +# Copyright 2019-2023 National Research Foundation (SARAO) # # This program is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free @@ -255,14 +255,43 @@ class ChunkRingbuffer(_ChunkRingbuffer): def put(self, chunk: Chunk) -> None: ... def __iter__(self) -> Iterator[Chunk]: ... -class ChunkRingStream(_Stream): +class ChunkRingPair: + def add_free_chunk(self, chunk: Chunk) -> None: ... + @property + def data_ringbuffer(self) -> _ChunkRingbuffer: ... + @property + def free_ringbuffer(self) -> _ChunkRingbuffer: ... + +class ChunkRingStream(_Stream, ChunkRingPair): def __init__( self, thread_pool: spead2.ThreadPool, config: StreamConfig, chunk_config: ChunkStreamConfig, data_ringbuffer: _ChunkRingbuffer, free_ringbuffer: _ChunkRingbuffer) -> None: ... - def add_free_chunk(self, chunk: Chunk) -> None: ... +class ChunkStreamGroupConfig: + class EvictionMode(enum.Enum): + LOSSY = ... + LOSSLESS = ... + + DEFAULT_MAX_CHUNKS: ClassVar[int] @property - def data_ringbuffer(self) -> _ChunkRingbuffer: ... + def max_chunks(self) -> int: ... @property - def free_ringbuffer(self) -> _ChunkRingbuffer: ... + def eviction_mode(self) -> ChunkStreamGroupConfig.EvictionMode: ... + + def __init__(self, *, max_chunks=..., eviction_mode=...) -> None: ... + +class ChunkStreamRingGroup(ChunkRingPair): + def __init__( + self, group_config: ChunkStreamGroupConfig, data_ringbuffer: _ChunkRingbuffer, + free_ringbuffer: _ChunkRingbuffer) -> None: ... + def emplace_back( + self, thread_pool: spead2.ThreadPool, config: spead2.StreamConfig, + chunk_stream_config: spead2.ChunkStreamConfig) -> None: ... + def __len__(self) -> int: ... + def __getitem__(self, index: int) -> spead2.ChunkStreamGroupMember: ... + def __iter__(self) -> Iterator[spead2.ChunkStreamGroupMember]: ... + def stop(self) -> None: ... + +class ChunkStreamGroupMember(_Stream): + pass From 1dd4c97dac1c88714d3f89ad6b6f8c41eebef446 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Tue, 27 Jun 2023 15:41:57 +0200 Subject: [PATCH 39/74] Fix type annotation for ChunkRingStream constructor --- doc/changelog.rst | 2 ++ src/spead2/recv/__init__.pyi | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/changelog.rst b/doc/changelog.rst index f08fdfc3b..a3155f893 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -13,6 +13,8 @@ Changelog :cpp:class:`spead2::recv::stream_config` to construct a :cpp:class:`spead2::recv::chunk_stream` would modify the config. Passing the same config to construct two chunk streams would fail with an error. +- Fix the type annotation for the :py:class:`~.ChunkRingStream` constructor: + the parameter name for `chunk_stream_config` was incorrect. .. rubric:: 3.11.1 diff --git a/src/spead2/recv/__init__.pyi b/src/spead2/recv/__init__.pyi index 37d0cd5e0..a89658b1f 100644 --- a/src/spead2/recv/__init__.pyi +++ b/src/spead2/recv/__init__.pyi @@ -265,7 +265,7 @@ class ChunkRingPair: class ChunkRingStream(_Stream, ChunkRingPair): def __init__( self, thread_pool: spead2.ThreadPool, config: StreamConfig, - chunk_config: ChunkStreamConfig, + chunk_stream_config: ChunkStreamConfig, data_ringbuffer: _ChunkRingbuffer, free_ringbuffer: _ChunkRingbuffer) -> None: ... class ChunkStreamGroupConfig: From caebb330d9989e89547765a0c5d754dd7a8f93eb Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Tue, 27 Jun 2023 16:34:31 +0200 Subject: [PATCH 40/74] Make stream_base::shared_state private It's now only exposed indirectly via reader::handler_context and the new stream_base::get_queue_mutex and stream_base::post. --- include/spead2/recv_chunk_stream.h | 2 +- include/spead2/recv_stream.h | 59 +++++++++++++++++++++++++++++- src/recv_chunk_stream.cpp | 2 +- src/recv_chunk_stream_group.cpp | 17 +++------ 4 files changed, 66 insertions(+), 14 deletions(-) diff --git a/include/spead2/recv_chunk_stream.h b/include/spead2/recv_chunk_stream.h index 9c3defc1c..57262df9a 100644 --- a/include/spead2/recv_chunk_stream.h +++ b/include/spead2/recv_chunk_stream.h @@ -890,7 +890,7 @@ void chunk_ring_stream::stop() { // Locking is probably not needed, as all readers are terminated by // chunk_stream::stop(). But it should be safe. - std::lock_guard lock(shared->queue_mutex); + std::lock_guard lock(get_queue_mutex()); this->graveyard.reset(); // free chunks that didn't make it into data_ring } } diff --git a/include/spead2/recv_stream.h b/include/spead2/recv_stream.h index 3a1ca7bda..aa592e5bc 100644 --- a/include/spead2/recv_stream.h +++ b/include/spead2/recv_stream.h @@ -570,7 +570,7 @@ class stream_base /// Stream configuration const stream_config config; -protected: +private: struct shared_state { /** @@ -667,6 +667,50 @@ class stream_base */ virtual void stop_received(); + std::mutex &get_queue_mutex() const { return shared->queue_mutex; } + + /** + * Schedule a function to be called on an executor, with the lock held. + * This is a fire-and-forget operation. If the stream is stopped before the + * callback fires, the callback is silently ignored. + */ + template + void post(ExecutionContext &ex, F &&func) + { + class wrapper + { + private: + std::shared_ptr shared; + typename std::decay::type func; + + public: + wrapper(std::shared_ptr shared, F&& func) + : shared(std::move(shared)), func(std::forward(func)) + { + } + + /* Prevent copying, while allowing moving (copying is safe but inefficient) + * Move assignment is not implemented because it fails to compile if + * F is not move-assignable. This can probably be solved with + * std::enable_if, but it doesn't seem worth the effort. + */ + wrapper(const wrapper &) = delete; + wrapper &operator=(const wrapper &) = delete; + wrapper(wrapper &&) = default; + + void operator()() const + { + std::lock_guard(shared->queue_mutex); + stream_base *self = shared->self; + if (self) + func(*self); + } + }; + + // TODO: can do this with a lambda (with perfect forwarding) in C++14 + boost::asio::post(ex, wrapper(shared, std::forward(func))); + } + public: /** * State for a batch of calls to @ref add_packet. Constructing this object @@ -940,6 +984,19 @@ class stream : protected stream_base /// Actual implementation of @ref stop void stop_impl(); + using stream_base::post; // Make base class version visible, despite being overloaded + + /** + * Schedule a function to be called on the stream's io_service, with the + * lock held. This is a fire-and-forget operation. If the stream is stopped + * before the callback fires, the callback is silently dropped. + */ + template + void post(F &&func) + { + post(get_io_service(), std::forward(func)); + } + public: using stream_base::get_config; using stream_base::get_stats; diff --git a/src/recv_chunk_stream.cpp b/src/recv_chunk_stream.cpp index c79586659..ba7911a4e 100644 --- a/src/recv_chunk_stream.cpp +++ b/src/recv_chunk_stream.cpp @@ -270,7 +270,7 @@ void chunk_stream::stop_received() void chunk_stream::stop() { { - std::lock_guard lock(shared->queue_mutex); + std::lock_guard lock(get_queue_mutex()); flush_chunks(); } stream::stop(); diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index 3e8dd978e..0d3656486 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -246,17 +246,12 @@ void chunk_stream_group_member::heap_ready(live_heap &&lh) void chunk_stream_group_member::async_flush_until(std::int64_t chunk_id) { - std::shared_ptr shared = this->shared; - // TODO: once we depend on C++14, move rather than copying into the lambda - boost::asio::post(get_io_service(), [shared, chunk_id]() { - std::lock_guard lock(shared->queue_mutex); - if (!shared->self) - return; // We've stopped, which means everything is flushed - chunk_stream_group_member *self = static_cast(shared->self); - while (self->chunks.get_head_chunk() < chunk_id && !self->chunks.empty()) + post([chunk_id](stream_base &s) { + chunk_stream_group_member &self = static_cast(s); + while (self.chunks.get_head_chunk() < chunk_id && !self.chunks.empty()) { - self->chunks.flush_head([self](chunk *c) { - self->group.release_chunk(c, self->batch_stats.data()); + self.chunks.flush_head([&self](chunk *c) { + self.group.release_chunk(c, self.batch_stats.data()); }); } }); @@ -273,7 +268,7 @@ void chunk_stream_group_member::stop() { group.stream_pre_stop(*this); { - std::lock_guard lock(shared->queue_mutex); + std::lock_guard lock(get_queue_mutex()); flush_chunks(); } stream::stop(); From 3c085f7bcb60f6016b36ffe8161816cc008c3128 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Wed, 28 Jun 2023 09:56:46 +0200 Subject: [PATCH 41/74] Fix some errors from doxygen --- include/spead2/recv_chunk_stream.h | 14 +++++++------- include/spead2/recv_stream.h | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/spead2/recv_chunk_stream.h b/include/spead2/recv_chunk_stream.h index 57262df9a..a7ae60748 100644 --- a/include/spead2/recv_chunk_stream.h +++ b/include/spead2/recv_chunk_stream.h @@ -327,7 +327,7 @@ class chunk_window template class chunk_stream_allocator; -/// Parts of chunk_stream_state that don't depend on the chunk manager +/// Parts of @ref chunk_stream_state that don't depend on the chunk manager class chunk_stream_state_base { protected: @@ -350,9 +350,10 @@ class chunk_stream_state_base chunk_window chunks; /** - * Scratch area for use by @ref allocate. This contains not just the @ref - * chunk_place_data, but also the various arrays it points to. They're - * allocated contiguously to minimise the number of cache lines accessed. + * Scratch area for use by @ref chunk_place_function. This contains not + * just the @ref chunk_place_data, but also the various arrays it points + * to. They're allocated contiguously to minimise the number of cache lines + * accessed. */ std::unique_ptr place_data_storage; chunk_place_data *place_data; @@ -392,9 +393,8 @@ class chunk_stream_state_base const chunk_stream_config &get_chunk_config() const { return chunk_config; } /** - * Get the @ref heap_metadata associated with a heap payload pointer. - * If the pointer was not allocated by a chunk stream, returns @c - * nullptr. + * Get the metadata associated with a heap payload pointer. If the pointer + * was not allocated by a chunk stream, returns @c nullptr. */ static const heap_metadata *get_heap_metadata(const memory_allocator::pointer &ptr); }; diff --git a/include/spead2/recv_stream.h b/include/spead2/recv_stream.h index aa592e5bc..f014a7def 100644 --- a/include/spead2/recv_stream.h +++ b/include/spead2/recv_stream.h @@ -625,7 +625,7 @@ class stream_base /** * Callback called when a heap is being ejected from the live list. * The heap might or might not be complete. The - * @ref shared_state::queue_mutex will be + * @ref spead2::recv::stream_base::shared_state::queue_mutex will be * locked during this call, which will block @ref stop and @ref flush. */ virtual void heap_ready(live_heap &&) {} @@ -661,7 +661,7 @@ class stream_base * It is undefined what happens if @ref add_packet is called after a stream * is stopped. * - * This is called with @ref shared_state::queue_mutex + * This is called with @ref spead2::recv::stream_base::shared_state::queue_mutex * locked. Users must not call this function themselves; instead, call @ref * stop. */ @@ -878,7 +878,7 @@ class reader * with extra arguments prefixed, so it should have the signature * void handler(handler_context ctx, stream_base::add_packet_state &state, ...); * - * The @ref handler_context can be passed (by rvalue + * The @ref reader::handler_context can be passed (by rvalue * reference) to a single call to @ref bind_handler, which is cheaper * than the overload that doesn't take it (it avoids manipulating reference * counts on a @c std::shared_ptr). @@ -895,7 +895,7 @@ class reader } /** - * Overload that takes an existing @ref handler_context. + * Overload that takes an existing @ref reader::handler_context. */ template bound_handler::type> bind_handler(handler_context ctx, T &&handler) const From bdd4c3b6c4cb3f6b99e1f3b040d530d31294a28b Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Wed, 28 Jun 2023 10:55:57 +0200 Subject: [PATCH 42/74] Add reference C++ documentation for chunk stream groups --- doc/cpp-recv-chunk-group.rst | 18 ++++++++++++ doc/cpp.rst | 1 + include/spead2/recv_chunk_stream_group.h | 36 ++++++++++-------------- 3 files changed, 34 insertions(+), 21 deletions(-) create mode 100644 doc/cpp-recv-chunk-group.rst diff --git a/doc/cpp-recv-chunk-group.rst b/doc/cpp-recv-chunk-group.rst new file mode 100644 index 000000000..6b7fa04c4 --- /dev/null +++ b/doc/cpp-recv-chunk-group.rst @@ -0,0 +1,18 @@ +Chunking stream groups +====================== + +For an overview, refer to :doc:`recv-chunk-group`. This page is a reference for the +C++ API. + +.. doxygenclass:: spead2::recv::chunk_stream_group_config + :members: + +.. doxygenclass:: spead2::recv::chunk_stream_group + :members: + +.. doxygenclass:: spead2::recv::chunk_stream_group_member + :members: + +Ringbuffer convenience API +-------------------------- +.. doxygenclass:: spead2::recv::chunk_stream_ring_group diff --git a/doc/cpp.rst b/doc/cpp.rst index 976c6cd06..236ed96cd 100644 --- a/doc/cpp.rst +++ b/doc/cpp.rst @@ -40,3 +40,4 @@ search path, and you need to set :envvar:`PKG_CONFIG_PATH` to cpp-logging cpp-ibverbs cpp-recv-chunk + cpp-recv-chunk-group diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h index fda9c9b44..8d3292474 100644 --- a/include/spead2/recv_chunk_stream_group.h +++ b/include/spead2/recv_chunk_stream_group.h @@ -43,10 +43,15 @@ class chunk_stream_group_config /// Default value for @ref set_max_chunks static constexpr std::size_t default_max_chunks = chunk_stream_config::default_max_chunks; + /** + * Eviction mode when it is necessary to advance the group window. See the + * @verbatim embed:rst:inline :doc:`overview ` @endverbatim + * for more details. + */ enum class eviction_mode { - LOSSY, - LOSSLESS + LOSSY, ///< force streams to release incomplete chunks + LOSSLESS ///< a chunk will only be marked ready when all streams have marked it ready }; private: @@ -67,19 +72,7 @@ class chunk_stream_group_config /// Return the maximum number of chunks that can be live at the same time. std::size_t get_max_chunks() const { return max_chunks; } - /** - * Set chunk eviction mode. When set to @ref eviction_mode::LOSSLESS, a - * chunk will only be marked ready when all streams have marked it ready - * (due to either stopping or receiving newer data). This is recommended - * when the individual streams have lossless transports (such as TCP). If - * one of the streams stops receiving data (due to a broken network link), - * it will prevent forward progress of the entire group. - * - * Conversely, using @ref eviction_mode::LOSSY (the default) will allow - * progress to continue (with partial data) if one of the streams stops - * receiving data, but one stream falling behind another can lead data - * being discarded even when the underlying transports are lossless. - */ + /// Set chunk eviction mode. See @ref eviction_mode. chunk_stream_group_config &set_eviction_mode(eviction_mode eviction_mode_); /// Return the current eviction mode eviction_mode get_eviction_mode() const { return eviction_mode_; } @@ -118,11 +111,11 @@ class chunk_manager_group class chunk_stream_group_member; /** - * A holder for a collection of streams that share chunks. + * A holder for a collection of streams that share chunks. The group owns the + * component streams, and takes care of stopping and destroying them when the + * group is stopped or destroyed. * * The public interface must only be called from one thread at a time. - * - * @todo write more documentation here */ class chunk_stream_group { @@ -335,9 +328,10 @@ class chunk_stream_group_member : private detail::chunk_stream_state>, typename FreeRingbuffer = ringbuffer>> From 15ac3d742cf3f70e784354683ba52ab6d69fc3a2 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Wed, 28 Jun 2023 11:22:56 +0200 Subject: [PATCH 43/74] More documentation chunk chunk stream group C++ API --- doc/recv-chunk-group.rst | 8 +++----- include/spead2/recv_chunk_stream_group.h | 23 ++++++++++++++++++++--- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/doc/recv-chunk-group.rst b/doc/recv-chunk-group.rst index bef995560..c5f3c1587 100644 --- a/doc/recv-chunk-group.rst +++ b/doc/recv-chunk-group.rst @@ -37,15 +37,13 @@ The general flow (in C++) is 1. Create a :cpp:class:`~spead2::recv::chunk_stream_group_config`. 2. Create a :cpp:class:`~spead2::recv::chunk_stream_group`. -3. Create multiple instances of - :cpp:class:`~spead2::recv::chunk_stream_group_member`, each referencing the - group. +3. Use :cpp:func:`~spead2::recv::chunk_stream_group::emplace_back` to + create the streams. 4. Add readers to the streams. 5. Process the data. 6. Optionally, call :cpp:func:`spead2::recv::chunk_stream_group::stop()` (otherwise it will be called on destruction). -7. Destroy the member streams (this must be done before destroying the group). -8. Destroy the group. +7. Destroy the group. In Python the process is similar, although garbage collection replaces explicit destruction. diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h index 8d3292474..b489ee1aa 100644 --- a/include/spead2/recv_chunk_stream_group.h +++ b/include/spead2/recv_chunk_stream_group.h @@ -218,27 +218,44 @@ class chunk_stream_group explicit chunk_stream_group(const chunk_stream_group_config &config); virtual ~chunk_stream_group(); - // Add a new stream + /// Add a new stream chunk_stream_group_member &emplace_back( io_service_ref io_service, const stream_config &config, const chunk_stream_config &chunk_config); - // Add a new stream, possibly of a subclass + /// Add a new stream, possibly of a subclass template T &emplace_back(Args&&... args); - // Provide vector-like access to the streams + /** + * @name Vector-like access to the streams. + * Iterator invalidation rules are the same as for @c std::vector. + * @{ + */ + /// Number of streams std::size_t size() const { return streams.size(); } + /// Whether there are any streams bool empty() const { return streams.empty(); } + /// Get the stream at a given index chunk_stream_group_member &operator[](std::size_t index) { return *streams[index]; } + /// Get the stream at a given index const chunk_stream_group_member &operator[](std::size_t index) const { return *streams[index]; } + /// Get an iterator to the first stream iterator begin() noexcept; + /// Get an iterator past the last stream iterator end() noexcept; + /// Get an iterator to the first stream const_iterator begin() const noexcept; + /// Get a const iterator past the last stream const_iterator end() const noexcept; + /// Get an iterator to the first stream const_iterator cbegin() const noexcept; + /// Get a const iterator past the last stream const_iterator cend() const noexcept; + /** + * @} + */ /** * Stop all streams and release all chunks. This function must not be From 6e1eb3b0ffe44fdd39c2a9b6ad75af14331ad006 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 29 Jun 2023 09:12:21 +0200 Subject: [PATCH 44/74] Doc that chunk_stream_group presents a vector-like interface --- include/spead2/recv_chunk_stream_group.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h index b489ee1aa..14153a95f 100644 --- a/include/spead2/recv_chunk_stream_group.h +++ b/include/spead2/recv_chunk_stream_group.h @@ -115,6 +115,9 @@ class chunk_stream_group_member; * component streams, and takes care of stopping and destroying them when the * group is stopped or destroyed. * + * It presents an interface similar to @c std::vector for observing the set + * of attached streams. + * * The public interface must only be called from one thread at a time. */ class chunk_stream_group @@ -230,7 +233,8 @@ class chunk_stream_group /** * @name Vector-like access to the streams. - * Iterator invalidation rules are the same as for @c std::vector. + * Iterator invalidation rules are the same as for @c std::vector i.e., + * modifying the set of streams invalidates iterators. * @{ */ /// Number of streams From 19936e1fe0d24f64436db69484c8c072122e315a Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 29 Jun 2023 11:44:10 +0200 Subject: [PATCH 45/74] Remove proactive flushing in chunk_stream_group I think it could potentially cause back-filled chunks (after a jump) to be evicted before another stream had a chance to acquire them. The proactive flushing was previously needed to ensure that if the network stopped all the streams, the group's chunks were flushed before the last call to data_ring->remove_producer so that the chunks could be retrieved. This is now solved in stream_stop_received by detecting the last stream stopping and doing this flush. --- include/spead2/recv_chunk_stream_group.h | 16 ++++++++-- src/recv_chunk_stream_group.cpp | 39 +++++++++++++----------- 2 files changed, 35 insertions(+), 20 deletions(-) diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h index 14153a95f..9483a7436 100644 --- a/include/spead2/recv_chunk_stream_group.h +++ b/include/spead2/recv_chunk_stream_group.h @@ -152,6 +152,9 @@ class chunk_stream_group */ std::vector> streams; + /// Number of elements of stream for which stream_stop_received has not been called. + std::size_t live_streams = 0; + /** * Obtain the chunk with a given ID. * @@ -173,7 +176,10 @@ class chunk_stream_group */ void release_chunk(chunk *c, std::uint64_t *batch_stats); - /// Pass a chunk to the user-provided ready function + /** + * Pass a chunk to the user-provided ready function. The caller is + * responsible for ensuring that c->ref_count is zero. + */ void ready_chunk(chunk *c, std::uint64_t *batch_stats); // Helper classes for implementing iterators @@ -192,14 +198,17 @@ class chunk_stream_group }; protected: - /// Called by @ref emplace_back for newly-constructed streams + /** + * Called by @ref emplace_back for newly-constructed streams. The group's + * mutex is held when this is called. + */ virtual void stream_added(chunk_stream_group_member &s) {} /** * Called when a stream stops (whether from the network or the user). * * The stream's @c queue_mutex is locked when this is called. */ - virtual void stream_stop_received(chunk_stream_group_member &s) {} + virtual void stream_stop_received(chunk_stream_group_member &s); /** * Called when the user stops (or destroys) a stream. * @@ -276,6 +285,7 @@ T &chunk_stream_group::emplace_back(Args&&... args) std::unique_ptr stream(new T(*this, std::forward(args)...)); chunk_stream_group_member &ret = *stream; streams.push_back(std::move(stream)); + live_streams++; stream_added(ret); return ret; } diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index 0d3656486..e98fcb5f2 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -136,13 +136,33 @@ void chunk_stream_group::stop() { /* The mutex is not held while stopping streams, so that callbacks * triggered by stopping the streams can take the lock if necessary. + * + * It's safe to iterate streams without the mutex because this function + * is called by the user, so a simultaneous call to emplace_back would + * violate the requirement that the user doesn't call the API from more + * than one thread at a time. + * + * The last stream to stop will flush the window (see + * stream_stop_received). */ for (const auto &stream : streams) stream->stop(); +} +void chunk_stream_group::stream_stop_received(chunk_stream_group_member &s) +{ std::lock_guard lock(mutex); - while (!chunks.empty()) - chunks.flush_head([this](chunk *c) { ready_chunk(c, nullptr); }); + if (--live_streams == 0) + { + // Once all the streams have stopped, make all the chunks in the + // window available. It's not necessary to check c->ref_count + // because no stream can have a reference once they're all stopped. + std::uint64_t *batch_stats = s.batch_stats.data(); + while (!chunks.empty()) + { + chunks.flush_head([this, batch_stats](chunk *c) { ready_chunk(c, batch_stats); }); + } + } } chunk *chunk_stream_group::get_chunk(std::int64_t chunk_id, std::uintptr_t stream_id, std::uint64_t *batch_stats) @@ -208,22 +228,7 @@ void chunk_stream_group::release_chunk(chunk *c, std::uint64_t *batch_stats) { std::lock_guard lock(mutex); if (--c->ref_count == 0) - { - /* Proactively flush chunks that have been fully released. - * This ensures that if the member stream is stopping, we - * have a chance to make the chunks ready before we shut - * everything down. - */ - while (!chunks.empty()) - { - chunk *c = chunks.get_chunk(chunks.get_head_chunk()); - if (c && c->ref_count == 0) - chunks.flush_head([this, batch_stats](chunk *c2) { ready_chunk(c2, batch_stats); }); - else - break; - } ready_condition.notify_all(); - } } chunk_stream_group_member::chunk_stream_group_member( From 3482abfc7f1d2493b8077e58395f25318dcc451c Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 29 Jun 2023 12:24:29 +0200 Subject: [PATCH 46/74] Simplify flushing in chunk_stream_group::get_chunk It also allows each heap to be flushed as soon as it is ready, rather than waiting for all flushed heaps to be ready before any of them get flushed (although practically it might not make any difference). --- src/recv_chunk_stream_group.cpp | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index e98fcb5f2..97330246e 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -184,24 +184,13 @@ chunk *chunk_stream_group::get_chunk(std::int64_t chunk_id, std::uintptr_t strea if (config.get_eviction_mode() == chunk_stream_group_config::eviction_mode::LOSSY) for (const auto &s : streams) s->async_flush_until(target); - std::int64_t to_check = chunks.get_head_chunk(); // next chunk to wait for - while (true) + while (chunks.get_head_chunk() < std::min(chunks.get_tail_chunk(), target)) { - bool good = true; - std::int64_t limit = std::min(chunks.get_tail_chunk(), target); - to_check = std::max(chunks.get_head_chunk(), to_check); - for (; to_check < limit; to_check++) - { - chunk *c = chunks.get_chunk(to_check); - if (c && c->ref_count > 0) - { - good = false; // Still need to wait longer for this chunk - break; - } - } - if (good) - break; - ready_condition.wait(lock); + chunk *c = chunks.get_chunk(chunks.get_head_chunk()); + if (c->ref_count == 0) + chunks.flush_head([this, batch_stats](chunk *c2) { ready_chunk(c2, batch_stats); }); + else + ready_condition.wait(lock); } } @@ -211,7 +200,10 @@ chunk *chunk_stream_group::get_chunk(std::int64_t chunk_id, std::uintptr_t strea [this, batch_stats](std::int64_t id) { return config.get_allocate()(id, batch_stats).release(); }, - [this, batch_stats](chunk *c) { ready_chunk(c, batch_stats); } + [](chunk *) { + // Should be unreachable, as we've done the necessary flushing above + assert(false); + } ); if (c) c->ref_count++; @@ -220,6 +212,7 @@ chunk *chunk_stream_group::get_chunk(std::int64_t chunk_id, std::uintptr_t strea void chunk_stream_group::ready_chunk(chunk *c, std::uint64_t *batch_stats) { + assert(c->ref_count == 0); std::unique_ptr owned(c); config.get_ready()(std::move(owned), batch_stats); } From 2cddd9f774746726fc5106ec6b4709daf5bd2acf Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 29 Jun 2023 12:35:24 +0200 Subject: [PATCH 47/74] Fix corner case in async_flush_until If the tail timestamp was less than the target, it would empty the window but not advance the head+tail, so chunks from prior to the target could still re-enter the window. --- include/spead2/recv_chunk_stream.h | 10 ++++++++++ src/recv_chunk_stream_group.cpp | 9 +++------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/include/spead2/recv_chunk_stream.h b/include/spead2/recv_chunk_stream.h index a7ae60748..4bae17f3e 100644 --- a/include/spead2/recv_chunk_stream.h +++ b/include/spead2/recv_chunk_stream.h @@ -247,6 +247,16 @@ class chunk_window head_pos = 0; // wrap around the circular buffer } + /// Flush until the head is at least @a target + template + void flush_until(std::int64_t target, const F &ready_chunk) + { + while (head_chunk != tail_chunk && head_chunk < target) + flush_head(ready_chunk); + if (head_chunk == tail_chunk && head_chunk < target) + head_chunk = tail_chunk = target; + } + explicit chunk_window(std::size_t max_chunks); /** diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index 97330246e..29dcff347 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -246,12 +246,9 @@ void chunk_stream_group_member::async_flush_until(std::int64_t chunk_id) { post([chunk_id](stream_base &s) { chunk_stream_group_member &self = static_cast(s); - while (self.chunks.get_head_chunk() < chunk_id && !self.chunks.empty()) - { - self.chunks.flush_head([&self](chunk *c) { - self.group.release_chunk(c, self.batch_stats.data()); - }); - } + self.chunks.flush_until(chunk_id, [&self](chunk *c) { + self.group.release_chunk(c, self.batch_stats.data()); + }); }); } From afab1f771650ba0d6520bd913aa06faffb725a8f Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 29 Jun 2023 13:59:02 +0200 Subject: [PATCH 48/74] Reduce number of calls to async_flush_until If multiple streams try to get the same chunk ID from a group at the same time, it's possible that each of them would have issued an async_flush_until to all the others, causing O(N^2) total calls. Improve this by tracking what we flushed to previously. Additionally, use this information to ensure that newly-added streams also flush to this point. This fixes a potential deadlock if a new stream is added once things are already running, and it sees a heap that's inside the group's window but behind the flush_until point, and then it never advances further. --- include/spead2/recv_chunk_stream_group.h | 35 ++++++++++++++++-------- src/recv_chunk_stream_group.cpp | 6 +++- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h index 9483a7436..49153ab23 100644 --- a/include/spead2/recv_chunk_stream_group.h +++ b/include/spead2/recv_chunk_stream_group.h @@ -152,6 +152,11 @@ class chunk_stream_group */ std::vector> streams; + /** + * Last value passed to all streams' async_flush_until. + */ + std::int64_t last_flush_until = 0; + /// Number of elements of stream for which stream_stop_received has not been called. std::size_t live_streams = 0; @@ -278,18 +283,6 @@ class chunk_stream_group virtual void stop(); }; -template -T &chunk_stream_group::emplace_back(Args&&... args) -{ - std::lock_guard lock(mutex); - std::unique_ptr stream(new T(*this, std::forward(args)...)); - chunk_stream_group_member &ret = *stream; - streams.push_back(std::move(stream)); - live_streams++; - stream_added(ret); - return ret; -} - /** * Single single within a group managed by @ref chunk_stream_group. */ @@ -390,6 +383,24 @@ class chunk_stream_ring_group ~chunk_stream_ring_group(); }; + +template +T &chunk_stream_group::emplace_back(Args&&... args) +{ + std::lock_guard lock(mutex); + std::unique_ptr stream(new T(*this, std::forward(args)...)); + chunk_stream_group_member &ret = *stream; + streams.push_back(std::move(stream)); + live_streams++; + if (config.get_eviction_mode() == chunk_stream_group_config::eviction_mode::LOSSY + && last_flush_until > 0) + { + ret.async_flush_until(last_flush_until); + } + stream_added(ret); + return ret; +} + template chunk_stream_ring_group::chunk_stream_ring_group( const chunk_stream_group_config &group_config, diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index 29dcff347..7f934d8ca 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -181,9 +181,13 @@ chunk *chunk_stream_group::get_chunk(std::int64_t chunk_id, std::uintptr_t strea if (chunk_id >= chunks.get_head_chunk() + std::int64_t(max_chunks)) { std::int64_t target = chunk_id - max_chunks + 1; // first chunk we don't need to flush - if (config.get_eviction_mode() == chunk_stream_group_config::eviction_mode::LOSSY) + if (config.get_eviction_mode() == chunk_stream_group_config::eviction_mode::LOSSY + && target > last_flush_until) + { for (const auto &s : streams) s->async_flush_until(target); + last_flush_until = target; + } while (chunks.get_head_chunk() < std::min(chunks.get_tail_chunk(), target)) { chunk *c = chunks.get_chunk(chunks.get_head_chunk()); From 53927e72ac4273eb9d2997b967c6bebc06fb2989 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 29 Jun 2023 14:20:06 +0200 Subject: [PATCH 49/74] Remove unnecessary reset of head_pos = tail_pos = 0 It's potentially going to hurt cache locality by jumping the pos pointers away from where activity most recently happened. --- include/spead2/recv_chunk_stream.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/spead2/recv_chunk_stream.h b/include/spead2/recv_chunk_stream.h index 4bae17f3e..615bae5db 100644 --- a/include/spead2/recv_chunk_stream.h +++ b/include/spead2/recv_chunk_stream.h @@ -303,7 +303,6 @@ class chunk_window while (!empty()) flush_head(ready_chunk); head_chunk = tail_chunk = chunk_id - (max_chunks - 1); - head_pos = tail_pos = 0; } while (chunk_id >= tail_chunk) { From 186dea69f9a3ce52baba82b08d5d41d236766217 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 29 Jun 2023 14:40:00 +0200 Subject: [PATCH 50/74] Add some developer documentation --- doc/dev-recv-chunk-group.rst | 49 ++++++++++++++++++++++++++++++++ doc/dev-recv-destruction.rst | 54 ++++++++++++++++++++++++++++++++++++ doc/developer.rst | 12 ++++++++ doc/index.rst | 1 + 4 files changed, 116 insertions(+) create mode 100644 doc/dev-recv-chunk-group.rst create mode 100644 doc/dev-recv-destruction.rst create mode 100644 doc/developer.rst diff --git a/doc/dev-recv-chunk-group.rst b/doc/dev-recv-chunk-group.rst new file mode 100644 index 000000000..2c51db0ed --- /dev/null +++ b/doc/dev-recv-chunk-group.rst @@ -0,0 +1,49 @@ +Synchronisation in chunk stream groups +====================================== +.. cpp:namespace-push:: spead2::recv + +For chunk stream groups to achieve the goal of allowing multi-core scaling, it +is necessary to minimise locking. The implementation achieves this by avoiding +any packet- or heap-granularity locking, and performing locking only at chunk +granularity. Chunks are assumed to be large enough that this minimises total +overhead, although it should be noted that these locks are expected to be +highly contended and there may be further work possible to reduce the +overheads. + +To avoid the need for heap-level locking, each member stream has its own +sliding window with pointers to the chunks, so that heaps which fall inside an +existing chunk can be serviced without locking. However, this causes a problem +when flushing chunks from the group's window: a stream might still be writing +to the chunk at the time. Additionally, it might not be possible to allocate a +new chunk until an old chunk is flushed e.g., if there is a fixed pool of +chunks rather than dynamic allocation. + +Each chunk has a reference count, indicating the number of streams that still +have the chunk in their window. This reference count is non-atomic since it is +protected by the group's mutex. When the group wishes to evict a chunk, it +first needs to wait for the reference count of the head chunk to drop to zero. +It needs a way to be notified that it should try again, which is provided by a +condition variable. Using a condition variable (rather than, say, replacing +the simple reference count with a semaphore) allows the group mutex to be +dropped while waiting, which prevents the deadlocks that might otherwise occur +if the mutex was held while waiting and another stream was attemping to lock +the group mutex to make forward progress. + +In lossless eviction mode, this is all that is needed, although it is +non-trivial to see that this won't deadlock with all the streams sitting in +the wait loop waiting for other streams to make forward progress. That this +cannot happen is due to the requirement that the stream's window cannot be +larger than the group's. Consider the active call to +:cpp:func:`chunk_stream_group::get_chunk` with the smallest chunk ID. That +stream is guaranteed to have already readied any chunk due to be evicted from +the group, and the same is true of any other stream that is waiting in +:cpp:func:`~chunk_stream_group::get_chunk`, and so forward progress depends +only on streams that are not blocked in +:cpp:func:`~chunk_stream_group::get_chunk`. + +In lossy eviction mode, we need to make sure that such streams make forward +progress even if no new packets arrive on them. This is achieved by posting an +asynchronous callback to all streams requesting them to flush out chunks that +are now too old. + +.. cpp:namespace-pop:: diff --git a/doc/dev-recv-destruction.rst b/doc/dev-recv-destruction.rst new file mode 100644 index 000000000..3510ac1b4 --- /dev/null +++ b/doc/dev-recv-destruction.rst @@ -0,0 +1,54 @@ +Destruction of receive streams +============================== +The asynchronous and parallel nature of spead2 makes destroying a receive +stream a tricky operation: there may be pending asio completion handlers that +will try to push packets into the stream, leading to a race condition. While +asio guarantees that closing a socket will cancel any pending asynchronous +operations on that socket, this doesn't account for cases where the operation +has already completed but the completion handler is either pending or is +currently running. + +Up to version 3.11, this was handled by a shutdown protocol +between :cpp:class:`spead2::recv::stream` and +:cpp:class:`spead2::recv::reader`. The reader was required to notify the +stream when it had completely shut down, and +:cpp:func:`spead2::recv::stream::stop` would block until all readers had +performed this notification (via a semaphore). This protocol was complicated, +and it relied on the reader being able to make forward progress while the +thread calling :cpp:func:`~spead2::recv::stream::stop` was blocked. + +Newer versions take a different approach based on shared pointers. The ideal +case would be to have the whole stream always managed by a shared pointer, so +that a completion handler that interfaces with the stream could keep a copy of +the shared pointer and thus keep it alive as long as needed. However, that is +not possible to do in a backwards-compatible way. Instead, a minimal set of +fields is placed inside a shared pointer, namely: + +- The ``queue_mutex`` +- A flag indicating whether the stream has stopped. + +For convenience, the flag is encoded as a pointer, which holds either a +pointer to the stream (if not stopped) or a null pointer (if stopped). Each +completion handler holds a shared reference to this structure. When it wishes +to access the stream, it should: + +1. Lock the mutex. +2. Get the pointer back to the stream from the shared structure, aborting if + it gets a null pointer. +3. Manipulate the stream. +4. Drop the mutex. + +This prevents use-after-free errors because the stream cannot be destroyed +without first stopping, and stopping locks the mutex. Hence, the stream cannot +disappear asynchronously during step 3. Note that it can, however, stop +during step 3 if the completion handler causes it to stop. + +Using shared pointers in this way can add overhead because atomically +incrementing and decrementing reference counts can be expensive, particularly +if it causes cache line migrations between processor cores. To minimise +reference count manipulation, the :cpp:class:`~spead2::recv::reader` class +encapsulates this workflow in its +:cpp:class:`~spead2::recv::reader::bind_handler` member function, which +provides the facilities to move the shared pointer along a linear chain of +completion handlers so that the reference count does not need to be +adjusted. diff --git a/doc/developer.rst b/doc/developer.rst new file mode 100644 index 000000000..c2ecbb60a --- /dev/null +++ b/doc/developer.rst @@ -0,0 +1,12 @@ +Developer documentation +======================= + +This section documents internal design decisions that users will generally not +need to be aware of, although some of it may be useful if you plan to subclass +the C++ classes to extend functionality. + +.. toctree:: + :maxdepth: 2 + + dev-recv-destruction + dev-recv-chunk-group diff --git a/doc/index.rst b/doc/index.rst index a91affd5d..611117781 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -18,6 +18,7 @@ Contents: perf tools migrate-3 + developer changelog license From 8405e452906d406abe6c1692f29542f342b7b84f Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 29 Jun 2023 17:30:19 +0200 Subject: [PATCH 51/74] Add example C++ program using chunk_stream_ring_group --- examples/Makefile.am | 5 +- examples/recv_chunk_group_example.cpp | 113 ++++++++++++++++++++++++++ examples/recv_chunk_ring_example.cpp | 2 - 3 files changed, 117 insertions(+), 3 deletions(-) create mode 100644 examples/recv_chunk_group_example.cpp diff --git a/examples/Makefile.am b/examples/Makefile.am index 22ce1962a..2b00376b9 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -1,4 +1,4 @@ -# Copyright 2016 National Research Foundation (SARAO) +# Copyright 2016, 2020-2021, 2023 National Research Foundation (SARAO) # # This program is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free @@ -18,6 +18,7 @@ include $(top_srcdir)/src/Makefile.inc.am noinst_PROGRAMS = \ recv_chunk_example \ recv_chunk_ring_example \ + recv_chunk_group_example \ test_recv \ test_send \ test_ringbuffer @@ -33,6 +34,8 @@ recv_chunk_example_SOURCES = recv_chunk_example.cpp recv_chunk_ring_example_SOURCES = recv_chunk_ring_example.cpp +recv_chunk_group_example_SOURCES = recv_chunk_group_example.cpp + if SPEAD2_USE_CUDA V_NVCC = $(v_NVCC_@AM_V@) diff --git a/examples/recv_chunk_group_example.cpp b/examples/recv_chunk_group_example.cpp new file mode 100644 index 000000000..60565b30c --- /dev/null +++ b/examples/recv_chunk_group_example.cpp @@ -0,0 +1,113 @@ +/* Copyright 2023 National Research Foundation (SARAO) + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + +/** + * @file + * + * This is an example of using the chunk stream group API with ringbuffers. + * To test it, run + * spead2_send localhost:8888 localhost:8889 --heaps 1000 --heap-size 65536 --rate 10. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static constexpr std::size_t heap_payload_size = 65536; +static constexpr std::size_t heaps_per_chunk = 64; +static constexpr std::size_t chunk_payload_size = heaps_per_chunk * heap_payload_size; + +static void chunk_place(spead2::recv::chunk_place_data *data, std::size_t data_size) +{ + // We requested only the heap ID and size + auto heap_cnt = data->items[0]; + auto payload_size = data->items[1]; + // If the payload size doesn't match, discard the heap (could be descriptors etc). + if (payload_size == heap_payload_size) + { + data->chunk_id = heap_cnt / heaps_per_chunk; + data->heap_index = heap_cnt % heaps_per_chunk; + data->heap_offset = data->heap_index * heap_payload_size; + } +} + +int main() +{ + constexpr std::size_t num_streams = 2; + constexpr int max_chunks = 4; + auto chunk_config = spead2::recv::chunk_stream_config() + .set_items({spead2::HEAP_CNT_ID, spead2::HEAP_LENGTH_ID}) + .set_max_chunks(max_chunks) + .set_place(chunk_place); + auto stream_config = spead2::recv::stream_config(); + auto group_config = spead2::recv::chunk_stream_group_config() + .set_max_chunks(max_chunks); + using chunk_ringbuffer = spead2::ringbuffer>; + auto data_ring = std::make_shared(max_chunks); + auto free_ring = std::make_shared(max_chunks); + auto allocator = std::make_shared(); + + spead2::recv::chunk_stream_ring_group<> group(group_config, data_ring, free_ring); + spead2::thread_pool workers[num_streams]; + for (std::size_t i = 0; i < num_streams; i++) + { + group.emplace_back(workers[i], stream_config, chunk_config); + } + for (int i = 0; i < max_chunks; i++) + { + std::unique_ptr chunk{new spead2::recv::chunk}; + chunk->present = allocator->allocate(heaps_per_chunk, nullptr); + chunk->present_size = heaps_per_chunk; + chunk->data = allocator->allocate(chunk_payload_size, nullptr); + group.add_free_chunk(std::move(chunk)); + } + + for (std::size_t i = 0; i < num_streams; i++) + { + boost::asio::ip::udp::endpoint endpoint(boost::asio::ip::address_v4::any(), 8888 + i); + group[i].emplace_reader( + endpoint, spead2::recv::udp_reader::default_max_size, 1024 * 1024); + } + while (true) + { + try + { + auto chunk = data_ring->pop(); + auto n_present = std::accumulate( + chunk->present.get(), + chunk->present.get() + chunk->present_size, std::size_t(0)); + std::cout << "Received chunk " << chunk->chunk_id << " with " + << n_present << " / " << heaps_per_chunk << " heaps\n"; + group.add_free_chunk(std::move(chunk)); + } + catch (spead2::ringbuffer_stopped &) + { + break; + } + } + + return 0; +} diff --git a/examples/recv_chunk_ring_example.cpp b/examples/recv_chunk_ring_example.cpp index bcd7afcc6..a44b9f80b 100644 --- a/examples/recv_chunk_ring_example.cpp +++ b/examples/recv_chunk_ring_example.cpp @@ -40,8 +40,6 @@ static constexpr std::size_t heap_payload_size = 65536; static constexpr std::size_t heaps_per_chunk = 64; static constexpr std::size_t chunk_payload_size = heaps_per_chunk * heap_payload_size; -static std::shared_ptr allocator; - static void chunk_place(spead2::recv::chunk_place_data *data, std::size_t data_size) { // We requested only the heap ID and size From ac79277b5e307d920e38a421f90a23380f57ceac Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 29 Jun 2023 17:34:14 +0200 Subject: [PATCH 52/74] Add Python example code for ChunkStreamRingGroup --- examples/recv_chunk_group_example.py | 87 ++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100755 examples/recv_chunk_group_example.py diff --git a/examples/recv_chunk_group_example.py b/examples/recv_chunk_group_example.py new file mode 100755 index 000000000..27fb4b598 --- /dev/null +++ b/examples/recv_chunk_group_example.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 + +# Copyright 2023 National Research Foundation (SARAO) +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +# This is an example of using the chunk stream group receive API with +# ringbuffers. To test it, run +# spead2_send localhost:8888 localhost:8889 --heaps 1000 --heap-size 65536 --rate 10 + +from spead2.numba import intp_to_voidptr +import spead2.recv +from spead2.recv.numba import chunk_place_data + +import numba +from numba import types +import numpy as np +import scipy + +HEAP_PAYLOAD_SIZE = 65536 +HEAPS_PER_CHUNK = 64 +CHUNK_PAYLOAD_SIZE = HEAPS_PER_CHUNK * HEAP_PAYLOAD_SIZE + + +@numba.cfunc(types.void(types.CPointer(chunk_place_data), types.uintp), nopython=True) +def chunk_place(data_ptr, data_size): + data = numba.carray(data_ptr, 1) + items = numba.carray(intp_to_voidptr(data[0].items), 2, dtype=np.int64) + heap_cnt = items[0] + payload_size = items[1] + # If the payload size doesn't match, discard the heap (could be descriptors etc). + if payload_size == HEAP_PAYLOAD_SIZE: + data[0].chunk_id = heap_cnt // HEAPS_PER_CHUNK + data[0].heap_index = heap_cnt % HEAPS_PER_CHUNK + data[0].heap_offset = data[0].heap_index * HEAP_PAYLOAD_SIZE + + +def main(): + NUM_STREAMS = 2 + MAX_CHUNKS = 4 + place_callback = scipy.LowLevelCallable( + chunk_place.ctypes, + signature='void (void *, size_t)' + ) + chunk_config = spead2.recv.ChunkStreamConfig( + items=[spead2.HEAP_CNT_ID, spead2.HEAP_LENGTH_ID], + max_chunks=MAX_CHUNKS, + place=place_callback) + group_config = spead2.recv.ChunkStreamGroupConfig(max_chunks=MAX_CHUNKS) + data_ring = spead2.recv.ChunkRingbuffer(MAX_CHUNKS) + free_ring = spead2.recv.ChunkRingbuffer(MAX_CHUNKS) + group = spead2.recv.ChunkStreamRingGroup(group_config, data_ring, free_ring) + for _ in range(NUM_STREAMS): + group.emplace_back( + spead2.ThreadPool(), + spead2.recv.StreamConfig(), + chunk_config + ) + for _ in range(MAX_CHUNKS): + chunk = spead2.recv.Chunk( + present=np.empty(HEAPS_PER_CHUNK, np.uint8), + data=np.empty(CHUNK_PAYLOAD_SIZE, np.uint8) + ) + group.add_free_chunk(chunk) + for i in range(NUM_STREAMS): + group[i].add_udp_reader(8888 + i, buffer_size=1024 * 1024, bind_hostname='127.0.0.1') + for chunk in data_ring: + n_present = np.sum(chunk.present) + print( + f"Received chunk {chunk.chunk_id} with " + f"{n_present} / {HEAPS_PER_CHUNK} heaps") + group.add_free_chunk(chunk) + + +if __name__ == '__main__': + main() From 18836875ed3ccf609bd68a3483e887af9ea79ebc Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Fri, 30 Jun 2023 10:38:37 +0200 Subject: [PATCH 53/74] Fix a null pointer dereference bug --- src/recv_chunk_stream_group.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index 7f934d8ca..129e4376c 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -191,7 +191,7 @@ chunk *chunk_stream_group::get_chunk(std::int64_t chunk_id, std::uintptr_t strea while (chunks.get_head_chunk() < std::min(chunks.get_tail_chunk(), target)) { chunk *c = chunks.get_chunk(chunks.get_head_chunk()); - if (c->ref_count == 0) + if (!c || c->ref_count == 0) chunks.flush_head([this, batch_stats](chunk *c2) { ready_chunk(c2, batch_stats); }); else ready_condition.wait(lock); From 015db87b0bd511fd98899d818d20b52b6f970b8a Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Fri, 30 Jun 2023 10:49:42 +0200 Subject: [PATCH 54/74] Add `config` property to ChunkStreamRingGroup Also add the corresponding getting to the C++ API. For consistency, I also renamed the `group_config` constructor argument to `config` in Python. --- include/spead2/recv_chunk_stream_group.h | 2 ++ src/py_recv.cpp | 4 +++- src/spead2/recv/__init__.pyi | 4 +++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h index 49153ab23..5a3ee1aff 100644 --- a/include/spead2/recv_chunk_stream_group.h +++ b/include/spead2/recv_chunk_stream_group.h @@ -235,6 +235,8 @@ class chunk_stream_group explicit chunk_stream_group(const chunk_stream_group_config &config); virtual ~chunk_stream_group(); + const chunk_stream_group_config &get_config() const { return config; } + /// Add a new stream chunk_stream_group_member &emplace_back( io_service_ref io_service, diff --git a/src/py_recv.cpp b/src/py_recv.cpp index b0506c55e..20e012ffc 100644 --- a/src/py_recv.cpp +++ b/src/py_recv.cpp @@ -1006,7 +1006,7 @@ py::module register_module(py::module &parent) .def(py::init, std::shared_ptr>(), - "group_config"_a, + "config"_a, "data_ringbuffer"_a.none(false), "free_ringbuffer"_a.none(false), // Keep the Python ringbuffer objects alive, not just the C++ side. @@ -1014,6 +1014,8 @@ py::module register_module(py::module &parent) // from properties. py::keep_alive<1, 3>(), py::keep_alive<1, 4>()) + .def_property_readonly( + "config", SPEAD2_PTMF(chunk_stream_ring_group_wrapper, get_config)) .def( "emplace_back", [](chunk_stream_ring_group_wrapper &group, diff --git a/src/spead2/recv/__init__.pyi b/src/spead2/recv/__init__.pyi index a89658b1f..c7d5c1252 100644 --- a/src/spead2/recv/__init__.pyi +++ b/src/spead2/recv/__init__.pyi @@ -283,8 +283,10 @@ class ChunkStreamGroupConfig: class ChunkStreamRingGroup(ChunkRingPair): def __init__( - self, group_config: ChunkStreamGroupConfig, data_ringbuffer: _ChunkRingbuffer, + self, config: ChunkStreamGroupConfig, data_ringbuffer: _ChunkRingbuffer, free_ringbuffer: _ChunkRingbuffer) -> None: ... + @property + def config(self) -> ChunkStreamGroupConfig: ... def emplace_back( self, thread_pool: spead2.ThreadPool, config: spead2.StreamConfig, chunk_stream_config: spead2.ChunkStreamConfig) -> None: ... From 64ba802f7eec330e81d225e1f6ce0db0dba23ba2 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Fri, 30 Jun 2023 14:25:48 +0200 Subject: [PATCH 55/74] chunk_stream_group: remove refcounts Reference counts on chunks led to some corner cases that were probably buggy and definitely hard to reason about, involving either streams with empty windows (where the head = tail value still impacted which streams could be allocated in future, but no references were held) and null chunks. Instead, have the group keep track of the head_chunk of each stream, so that it can determine when no stream can *ever* request the chunk we're about to flush (important in lossless eviction mode). --- doc/dev-recv-chunk-group.rst | 20 ++-- doc/recv-chunk-group.rst | 6 +- include/spead2/recv_chunk_stream.h | 68 ++++++++----- include/spead2/recv_chunk_stream_group.h | 50 ++++++--- src/recv_chunk_stream_group.cpp | 66 +++++++----- tests/test_recv_chunk_stream_group.py | 124 ++++++++++++++--------- 6 files changed, 212 insertions(+), 122 deletions(-) diff --git a/doc/dev-recv-chunk-group.rst b/doc/dev-recv-chunk-group.rst index 2c51db0ed..6231ed737 100644 --- a/doc/dev-recv-chunk-group.rst +++ b/doc/dev-recv-chunk-group.rst @@ -18,16 +18,16 @@ to the chunk at the time. Additionally, it might not be possible to allocate a new chunk until an old chunk is flushed e.g., if there is a fixed pool of chunks rather than dynamic allocation. -Each chunk has a reference count, indicating the number of streams that still -have the chunk in their window. This reference count is non-atomic since it is -protected by the group's mutex. When the group wishes to evict a chunk, it -first needs to wait for the reference count of the head chunk to drop to zero. -It needs a way to be notified that it should try again, which is provided by a -condition variable. Using a condition variable (rather than, say, replacing -the simple reference count with a semaphore) allows the group mutex to be -dropped while waiting, which prevents the deadlocks that might otherwise occur -if the mutex was held while waiting and another stream was attemping to lock -the group mutex to make forward progress. +The group keeps its own copy of the head pointers (oldest heap) from the +individual streams, protected by the group mutex rather than the stream +mutexes. This allows the group to track the oldest chunk that any stream owns +or may potentially own in future (``min_head_chunk``). When the group wishes to +evict a chunk, it first needs to wait for ``min_head_chunk`` to become greater +than the ID of the chunk to be evicted. The wait is achieved using a condition +variable that is notified whenever ``min_head_chunk`` increases. This allows +the group mutex to be dropped while waiting, which prevents the deadlocks that +might otherwise occur if the mutex was held while waiting and another stream +was attemping to lock the group mutex to make forward progress. In lossless eviction mode, this is all that is needed, although it is non-trivial to see that this won't deadlock with all the streams sitting in diff --git a/doc/recv-chunk-group.rst b/doc/recv-chunk-group.rst index c5f3c1587..ddb16ce88 100644 --- a/doc/recv-chunk-group.rst +++ b/doc/recv-chunk-group.rst @@ -66,9 +66,9 @@ performance, and thus some care is needed to use it safely. as otherwise deadlocks can occur. For example, if they share a thread pool, the pool must have at least as many threads as streams. It's recommended that each stream has its own single-threaded thread pool. -- The streams should all be added to the group before adding any readers to - the streams. Things will probably work even if this is not done, but the - design is sufficiently complicated that it is not advisable. +- The streams must all be added to the group before adding any readers to + the streams. Once data has group has received some data, an exception will + be thrown if one attempts to add a new stream. - The stream ID associated with each chunk will be the stream ID of one of the component streams, but it is undefined which one. - When the allocate and ready callbacks are invoked, it's not specified which diff --git a/include/spead2/recv_chunk_stream.h b/include/spead2/recv_chunk_stream.h index 615bae5db..33a1540ad 100644 --- a/include/spead2/recv_chunk_stream.h +++ b/include/spead2/recv_chunk_stream.h @@ -54,14 +54,6 @@ class chunk friend class chunk_stream_group; template friend class detail::chunk_ring_pair; private: - /** - * Reference count for chunks belonging to stream groups. - * - * This must only be manipulated from a single thread at a time e.g. - * with the group's mutex locked. - */ - std::size_t ref_count = 0; - /// Linked list of chunks to dispose of at shutdown std::unique_ptr graveyard_next; @@ -247,14 +239,38 @@ class chunk_window head_pos = 0; // wrap around the circular buffer } + /// Send the oldest chunk to the ready callback + template + void flush_head(const F1 &ready_chunk, const F2 &head_updated) + { + flush_head(ready_chunk); + head_updated(head_chunk); + } + + /// Send all the chunks to the ready callback + template + void flush_all(const F1 &ready_chunk, const F2 &head_updated) + { + if (!empty()) + { + while (!empty()) + flush_head(ready_chunk); + head_updated(head_chunk); + } + } + /// Flush until the head is at least @a target - template - void flush_until(std::int64_t target, const F &ready_chunk) + template + void flush_until(std::int64_t target, const F1 &ready_chunk, const F2 &head_updated) { - while (head_chunk != tail_chunk && head_chunk < target) - flush_head(ready_chunk); - if (head_chunk == tail_chunk && head_chunk < target) - head_chunk = tail_chunk = target; + if (head_chunk < target) + { + while (head_chunk != tail_chunk && head_chunk < target) + flush_head(ready_chunk); + if (head_chunk == tail_chunk && head_chunk < target) + head_chunk = tail_chunk = target; + head_updated(target); + } } explicit chunk_window(std::size_t max_chunks); @@ -282,11 +298,14 @@ class chunk_window * Obtain a pointer to a chunk with ID @a chunk_id. * * If @a chunk_id is behind the window, returns nullptr. If it is ahead of - * the window, the window is advanced using @a ready_chunk and @a allocate_chunk. + * the window, the window is advanced using @a allocate_chunk and + * @a ready_chunk. If the head_chunk is updated, the new value is passed to + * @a head_updated. */ - template + template chunk *get_chunk( - std::int64_t chunk_id, std::uintptr_t stream_id, const F1 &allocate_chunk, const F2 &ready_chunk) + std::int64_t chunk_id, std::uintptr_t stream_id, + const F1 &allocate_chunk, const F2 &ready_chunk, const F3 &head_updated) { const std::size_t max_chunks = chunks.size(); if (chunk_id >= head_chunk) @@ -300,14 +319,13 @@ class chunk_window * We leave it to the while loop below to actually allocate * the chunks. */ - while (!empty()) - flush_head(ready_chunk); + flush_all(ready_chunk, head_updated); head_chunk = tail_chunk = chunk_id - (max_chunks - 1); } while (chunk_id >= tail_chunk) { if (std::size_t(tail_chunk - head_chunk) == max_chunks) - flush_head(ready_chunk); + flush_head(ready_chunk, head_updated); chunks[tail_pos] = allocate_chunk(tail_chunk); if (chunks[tail_pos]) { @@ -459,6 +477,7 @@ class chunk_manager_simple std::uint64_t *get_batch_stats(chunk_stream_state &state) const; chunk *allocate_chunk(chunk_stream_state &state, std::int64_t chunk_id); void ready_chunk(chunk_stream_state &state, chunk *c); + void head_updated(chunk_stream_state &state, std::int64_t head_chunk) {} }; /** @@ -682,8 +701,10 @@ stream_config chunk_stream_state::adjust_config(const stream_config &config) template void chunk_stream_state::flush_chunks() { - while (!chunks.empty()) - chunks.flush_head([this](chunk *c) { chunk_manager.ready_chunk(*this, c); }); + chunks.flush_all( + [this](chunk *c) { chunk_manager.ready_chunk(*this, c); }, + [this](std::int64_t head_chunk) { chunk_manager.head_updated(*this, head_chunk); } + ); } template @@ -754,7 +775,8 @@ chunk_stream_state::allocate(std::size_t size, const packet_header &packet) chunk_id, stream_id, [this](std::int64_t chunk_id) { return chunk_manager.allocate_chunk(*this, chunk_id); }, - [this](chunk *c) { chunk_manager.ready_chunk(*this, c); } + [this](chunk *c) { chunk_manager.ready_chunk(*this, c); }, + [this](std::int64_t head_chunk) { chunk_manager.head_updated(*this, head_chunk); } ); if (chunk_ptr) { diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h index 5a3ee1aff..32eabac79 100644 --- a/include/spead2/recv_chunk_stream_group.h +++ b/include/spead2/recv_chunk_stream_group.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -103,7 +104,8 @@ class chunk_manager_group std::uint64_t *get_batch_stats(chunk_stream_state &state) const; chunk *allocate_chunk(chunk_stream_state &state, std::int64_t chunk_id); - void ready_chunk(chunk_stream_state &state, chunk *c); + void ready_chunk(chunk_stream_state &state, chunk *c) {} + void head_updated(chunk_stream_state &state, std::int64_t head_chunk); }; } // namespace detail @@ -152,6 +154,15 @@ class chunk_stream_group */ std::vector> streams; + /** + * Copy of the head chunk ID from each stream. This copy is protected by + * the group's mutex rather than the streams'. + */ + std::vector head_chunks; + + /// Minimum element of head_chunks + std::int64_t min_head_chunk = 0; + /** * Last value passed to all streams' async_flush_until. */ @@ -172,18 +183,22 @@ class chunk_stream_group chunk *get_chunk(std::int64_t chunk_id, std::uintptr_t stream_id, std::uint64_t *batch_stats); /** - * Decrement chunk reference count. - * - * If the reference count reaches zero, the chunk is valid to pass to - * the ready callback. - * - * This function is thread-safe. + * Update the head_chunk copy for a stream. This version assumes the caller takes + * the mutex, and is only used internally. */ - void release_chunk(chunk *c, std::uint64_t *batch_stats); + void stream_head_updated_unlocked(chunk_stream_group_member &s, std::int64_t head_chunk); + + /** + * Called by a stream to report movement in its head pointer. This function + * takes the group mutex. + */ + void stream_head_updated(chunk_stream_group_member &s, std::int64_t head_chunk); /** * Pass a chunk to the user-provided ready function. The caller is - * responsible for ensuring that c->ref_count is zero. + * responsible for ensuring that the chunk is no longer in use. + * + * The caller must hold the group mutex. */ void ready_chunk(chunk *c, std::uint64_t *batch_stats); @@ -295,6 +310,7 @@ class chunk_stream_group_member : private detail::chunk_stream_state T &chunk_stream_group::emplace_back(Args&&... args) { std::lock_guard lock(mutex); - std::unique_ptr stream(new T(*this, std::forward(args)...)); + if (chunks.get_tail_chunk() != 0 || last_flush_until != 0) + { + throw std::runtime_error("Cannot add a stream after group has started receiving data"); + } + std::unique_ptr stream(new T( + *this, streams.size(), std::forward(args)...)); chunk_stream_group_member &ret = *stream; streams.push_back(std::move(stream)); + head_chunks.push_back(0); + min_head_chunk = 0; // shouldn't be necessary, but just in case live_streams++; - if (config.get_eviction_mode() == chunk_stream_group_config::eviction_mode::LOSSY - && last_flush_until > 0) - { - ret.async_flush_until(last_flush_until); - } stream_added(ret); return ret; } diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index 129e4376c..3c07ab2e0 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include @@ -76,10 +78,10 @@ chunk *chunk_manager_group::allocate_chunk( return group.get_chunk(chunk_id, state.stream_id, state.place_data->batch_stats); } -void chunk_manager_group::ready_chunk(chunk_stream_state &state, chunk *c) +void chunk_manager_group::head_updated( + chunk_stream_state &state, std::int64_t head_chunk) { - std::uint64_t *batch_stats = static_cast(&state)->batch_stats.data(); - group.release_chunk(c, batch_stats); + group.stream_head_updated(static_cast(state), head_chunk); } } // namespace detail @@ -152,16 +154,18 @@ void chunk_stream_group::stop() void chunk_stream_group::stream_stop_received(chunk_stream_group_member &s) { std::lock_guard lock(mutex); + // Set the head_chunk to the largest possible value, so that this stream + // no longer blocks anything. + stream_head_updated_unlocked(s, std::numeric_limits::max()); if (--live_streams == 0) { // Once all the streams have stopped, make all the chunks in the - // window available. It's not necessary to check c->ref_count - // because no stream can have a reference once they're all stopped. + // window available. std::uint64_t *batch_stats = s.batch_stats.data(); - while (!chunks.empty()) - { - chunks.flush_head([this, batch_stats](chunk *c) { ready_chunk(c, batch_stats); }); - } + chunks.flush_all( + [this, batch_stats](chunk *c) { ready_chunk(c, batch_stats); }, + [](std::int64_t) {} + ); } } @@ -190,9 +194,8 @@ chunk *chunk_stream_group::get_chunk(std::int64_t chunk_id, std::uintptr_t strea } while (chunks.get_head_chunk() < std::min(chunks.get_tail_chunk(), target)) { - chunk *c = chunks.get_chunk(chunks.get_head_chunk()); - if (!c || c->ref_count == 0) - chunks.flush_head([this, batch_stats](chunk *c2) { ready_chunk(c2, batch_stats); }); + if (min_head_chunk > chunks.get_head_chunk()) + chunks.flush_head([this, batch_stats](chunk *c) { ready_chunk(c, batch_stats); }); else ready_condition.wait(lock); } @@ -207,35 +210,48 @@ chunk *chunk_stream_group::get_chunk(std::int64_t chunk_id, std::uintptr_t strea [](chunk *) { // Should be unreachable, as we've done the necessary flushing above assert(false); - } + }, + [](std::int64_t) {} // Don't need notification for head moving ); - if (c) - c->ref_count++; return c; } void chunk_stream_group::ready_chunk(chunk *c, std::uint64_t *batch_stats) { - assert(c->ref_count == 0); + assert(c->chunk_id < min_head_chunk); std::unique_ptr owned(c); config.get_ready()(std::move(owned), batch_stats); } -void chunk_stream_group::release_chunk(chunk *c, std::uint64_t *batch_stats) +void chunk_stream_group::stream_head_updated_unlocked(chunk_stream_group_member &s, std::int64_t head_chunk) +{ + std::size_t stream_index = s.group_index; + std::int64_t old = head_chunks[stream_index]; + head_chunks[stream_index] = head_chunk; + // Update min_head_chunk. We can skip the work if we weren't previously the oldest. + if (min_head_chunk == old) + { + min_head_chunk = *std::min_element(head_chunks.begin(), head_chunks.end()); + if (min_head_chunk != old) + ready_condition.notify_all(); + } +} + +void chunk_stream_group::stream_head_updated(chunk_stream_group_member &s, std::int64_t head_chunk) { std::lock_guard lock(mutex); - if (--c->ref_count == 0) - ready_condition.notify_all(); + stream_head_updated_unlocked(s, head_chunk); } chunk_stream_group_member::chunk_stream_group_member( chunk_stream_group &group, + std::size_t group_index, io_service_ref io_service, const stream_config &config, const chunk_stream_config &chunk_config) : chunk_stream_state(config, chunk_config, detail::chunk_manager_group(group)), stream(std::move(io_service), adjust_config(config)), - group(group) + group(group), group_index(group_index) { if (chunk_config.get_max_chunks() > group.config.get_max_chunks()) throw std::invalid_argument("stream max_chunks must not be larger than group max_chunks"); @@ -250,9 +266,13 @@ void chunk_stream_group_member::async_flush_until(std::int64_t chunk_id) { post([chunk_id](stream_base &s) { chunk_stream_group_member &self = static_cast(s); - self.chunks.flush_until(chunk_id, [&self](chunk *c) { - self.group.release_chunk(c, self.batch_stats.data()); - }); + self.chunks.flush_until( + chunk_id, + [](chunk *) {}, + [&self](std::int64_t head_chunk) { + self.group.stream_head_updated(self, head_chunk); + } + ); }); } diff --git a/tests/test_recv_chunk_stream_group.py b/tests/test_recv_chunk_stream_group.py index 25e431ddb..6aa91034e 100644 --- a/tests/test_recv_chunk_stream_group.py +++ b/tests/test_recv_chunk_stream_group.py @@ -28,6 +28,8 @@ ) STREAMS = 4 +LOSSY_PARAM = pytest.param(recv.ChunkStreamGroupConfig.EvictionMode.LOSSY, id="lossy") +LOSSLESS_PARAM = pytest.param(recv.ChunkStreamGroupConfig.EvictionMode.LOSSLESS, id="lossless") class TestChunkStreamGroupConfig: @@ -75,9 +77,13 @@ def free_ring(self): def queues(self): return [spead2.InprocQueue() for _ in range(STREAMS)] + @pytest.fixture(params=[LOSSY_PARAM, LOSSLESS_PARAM]) + def eviction_mode(self, request): + return request.param + @pytest.fixture - def group(self, data_ring, free_ring, queues): - group_config = recv.ChunkStreamGroupConfig(max_chunks=4) + def group(self, eviction_mode, data_ring, free_ring, queues): + group_config = recv.ChunkStreamGroupConfig(max_chunks=4, eviction_mode=eviction_mode) group = recv.ChunkStreamRingGroup(group_config, data_ring, free_ring) # max_heaps is artificially high to make test_packet_too_old work config = spead2.recv.StreamConfig(max_heaps=128) @@ -101,81 +107,103 @@ def group(self, data_ring, free_ring, queues): def send_stream(self, queues): return send.InprocStream(spead2.ThreadPool(), queues, send.StreamConfig()) - def _send_data(self, send_stream, data, heaps=None): + def _send_data(self, send_stream, data, eviction_mode, heaps=None): """Send the data. To send only a subset of heaps (or to send out of order), pass the indices to skip in `heaps`. """ + lossy = (eviction_mode == recv.ChunkStreamGroupConfig.EvictionMode.LOSSY) data_by_heap = data.reshape(-1, HEAP_PAYLOAD_SIZE) ig = spead2.send.ItemGroup() ig.add_item(0x1000, 'position', 'position in stream', (), format=[('u', 32)]) ig.add_item(0x1001, 'payload', 'payload data', (HEAP_PAYLOAD_SIZE,), dtype=np.uint8) - # Stream groups are impractical to test deterministically, because - # they rely on concurrent forward progress. So we just feed the - # data in slowly enough that we expect heaps provided before a - # sleep to be processed before those after the sleep. - if heaps is None: - heaps = range(len(data_by_heap)) + # In lossy mode the behaviour is inherently non-deterministic. + # We just feed the data in slowly enough that we expect heaps provided + # before a sleep to be processed before those after the sleep. for i in heaps: ig['position'].value = i ig['payload'].value = data_by_heap[i] heap = ig.get_heap(data='all', descriptors='none') send_stream.send_heap(heap, substream_index=i % STREAMS) - time.sleep(0.001) - # Stop all the queues, which should flush everything and stop the - # data ring. - for queue in send_stream.queues: - queue.stop() + if lossy: + time.sleep(0.001) - def test_full_in_order(self, group, queues, send_stream, data_ring, free_ring): - """Send all the data, in order.""" - chunks = 20 - rng = np.random.default_rng(seed=1) - data = rng.integers(0, 256, chunks * CHUNK_PAYLOAD_SIZE, np.uint8) - data_by_chunk = data.reshape(chunks, -1) - send_thread = threading.Thread(target=self._send_data, args=(send_stream, data)) - send_thread.start() + def _verify(self, group, data, expected_present): + expected_present = expected_present.reshape(-1, HEAPS_PER_CHUNK) + chunks = len(expected_present) + data_by_heap = data.reshape(chunks, HEAPS_PER_CHUNK, -1) - for i in range(chunks): - chunk = data_ring.get() + for i in range(len(expected_present)): + chunk = group.data_ringbuffer.get() assert chunk.chunk_id == i - np.testing.assert_equal(chunk.present, 1) - np.testing.assert_equal(chunk.data, data_by_chunk[i]) + np.testing.assert_equal(chunk.present, expected_present[i]) + actual_data = chunk.data.reshape(HEAPS_PER_CHUNK, -1) + for j in range(HEAPS_PER_CHUNK): + if expected_present[i, j]: + np.testing.assert_equal(actual_data[j], data_by_heap[i, j]) group.add_free_chunk(chunk) # Stopping all the queues should shut down the data ringbuffer with pytest.raises(spead2.Stopped): - data_ring.get() + group.data_ringbuffer.get() + + def _test_simple(self, group, send_stream, chunks, heaps): + """Send a given set of heaps (in order) and check that they arrive correctly.""" + rng = np.random.default_rng(seed=1) + data = rng.integers(0, 256, chunks * CHUNK_PAYLOAD_SIZE, np.uint8) + data_by_heap = data.reshape(chunks, HEAPS_PER_CHUNK, -1) + + def send(): + self._send_data(send_stream, data, group.config.eviction_mode, heaps) + # Stop all the queues, which should flush everything and stop the + # data ring. + for queue in send_stream.queues: + queue.stop() + + send_thread = threading.Thread(target=send) + send_thread.start() + + expected_present = np.zeros(chunks * HEAPS_PER_CHUNK, np.uint8) + expected_present[heaps] = True + self._verify(group, data, expected_present) send_thread.join() - def test_missing_stream(self, group, queues, send_stream, data_ring, free_ring): + def test_full_in_order(self, group, send_stream): + """Send all the data, in order.""" + chunks = 20 + heaps = list(range(chunks * HEAPS_PER_CHUNK)) + self._test_simple(group, send_stream, chunks, heaps) + + def test_missing_stream(self, group, send_stream): """Skip sending data to one of the streams.""" chunks = 20 + heaps = [i for i in range(chunks * HEAPS_PER_CHUNK) if i % STREAMS != 2] + self._test_simple(group, send_stream, chunks, heaps) + + @pytest.mark.parametrize("eviction_mode", [LOSSLESS_PARAM]) + def test_lossless_late_stream(self, group, send_stream): + """Send one stream later than the others, to make sure lossless mode really works.""" rng = np.random.default_rng(seed=1) + chunks = 20 data = rng.integers(0, 256, chunks * CHUNK_PAYLOAD_SIZE, np.uint8) - data_by_heap = data.reshape(chunks, HEAPS_PER_CHUNK, -1) - heaps = [i for i in range(chunks * HEAPS_PER_CHUNK) if i % STREAMS != 2] - send_thread = threading.Thread(target=self._send_data, args=(send_stream, data, heaps)) + heaps1 = [i for i in range(chunks * HEAPS_PER_CHUNK) if i % STREAMS != 2] + heaps2 = [i for i in range(chunks * HEAPS_PER_CHUNK) if i % STREAMS == 2] + + def send(): + self._send_data(send_stream, data, group.config.eviction_mode, heaps1) + time.sleep(0.01) + self._send_data(send_stream, data, group.config.eviction_mode, heaps2) + # Stop all the queues, which should flush everything and stop the + # data ring. + for queue in send_stream.queues: + queue.stop() + + send_thread = threading.Thread(target=send) send_thread.start() - expected_present = np.ones(chunks * HEAPS_PER_CHUNK, bool) - expected_present[2::STREAMS] = False - expected_present = expected_present.reshape(chunks, HEAPS_PER_CHUNK) - - for i in range(chunks): - chunk = data_ring.get() - assert chunk.chunk_id == i - np.testing.assert_equal(chunk.present, expected_present[i]) - actual_data = chunk.data.reshape(HEAPS_PER_CHUNK, -1) - for j in range(HEAPS_PER_CHUNK): - if expected_present[i, j]: - np.testing.assert_equal(actual_data[j], data_by_heap[i, j]) - group.add_free_chunk(chunk) - - # Stopping all the queues should shut down the data ringbuffer - with pytest.raises(spead2.Stopped): - data_ring.get() + expected_present = np.ones(chunks * HEAPS_PER_CHUNK, np.uint8) + self._verify(group, data, expected_present) send_thread.join() From 836aa746110743603f68c24ef944b50041517ad8 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Fri, 30 Jun 2023 15:01:56 +0200 Subject: [PATCH 56/74] Eliminate live_streams from chunk_stream_group Instead, re-instate proactive flushing in stream_head_updated, which automatically handles the problem that live_streams was meant to solve. --- include/spead2/recv_chunk_stream_group.h | 4 ---- src/recv_chunk_stream_group.cpp | 26 +++++++++--------------- 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h index 32eabac79..4c1e37259 100644 --- a/include/spead2/recv_chunk_stream_group.h +++ b/include/spead2/recv_chunk_stream_group.h @@ -168,9 +168,6 @@ class chunk_stream_group */ std::int64_t last_flush_until = 0; - /// Number of elements of stream for which stream_stop_received has not been called. - std::size_t live_streams = 0; - /** * Obtain the chunk with a given ID. * @@ -418,7 +415,6 @@ T &chunk_stream_group::emplace_back(Args&&... args) streams.push_back(std::move(stream)); head_chunks.push_back(0); min_head_chunk = 0; // shouldn't be necessary, but just in case - live_streams++; stream_added(ret); return ret; } diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index 3c07ab2e0..65bb3cb56 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -157,16 +157,6 @@ void chunk_stream_group::stream_stop_received(chunk_stream_group_member &s) // Set the head_chunk to the largest possible value, so that this stream // no longer blocks anything. stream_head_updated_unlocked(s, std::numeric_limits::max()); - if (--live_streams == 0) - { - // Once all the streams have stopped, make all the chunks in the - // window available. - std::uint64_t *batch_stats = s.batch_stats.data(); - chunks.flush_all( - [this, batch_stats](chunk *c) { ready_chunk(c, batch_stats); }, - [](std::int64_t) {} - ); - } } chunk *chunk_stream_group::get_chunk(std::int64_t chunk_id, std::uintptr_t stream_id, std::uint64_t *batch_stats) @@ -192,12 +182,9 @@ chunk *chunk_stream_group::get_chunk(std::int64_t chunk_id, std::uintptr_t strea s->async_flush_until(target); last_flush_until = target; } - while (chunks.get_head_chunk() < std::min(chunks.get_tail_chunk(), target)) + while (chunks.get_head_chunk() < target) { - if (min_head_chunk > chunks.get_head_chunk()) - chunks.flush_head([this, batch_stats](chunk *c) { ready_chunk(c, batch_stats); }); - else - ready_condition.wait(lock); + ready_condition.wait(lock); } } @@ -208,7 +195,7 @@ chunk *chunk_stream_group::get_chunk(std::int64_t chunk_id, std::uintptr_t strea return config.get_allocate()(id, batch_stats).release(); }, [](chunk *) { - // Should be unreachable, as we've done the necessary flushing above + // Should be unreachable, as we've ensured this by waiting above assert(false); }, [](std::int64_t) {} // Don't need notification for head moving @@ -233,7 +220,14 @@ void chunk_stream_group::stream_head_updated_unlocked(chunk_stream_group_member { min_head_chunk = *std::min_element(head_chunks.begin(), head_chunks.end()); if (min_head_chunk != old) + { + chunks.flush_until( + min_head_chunk, + [this, &s](chunk *c) { ready_chunk(c, s.batch_stats.data()); }, + [](std::int64_t) {} + ); ready_condition.notify_all(); + } } } From 3703e0d78428ed9a824e011807dd6204f923834b Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Fri, 30 Jun 2023 16:16:27 +0200 Subject: [PATCH 57/74] Fix deadlock on group stop in lossless mode There are a few changes here: - Stopping any stream stops the whole group - Stopping the group in lossless eviction mode uses async_flush_until to ensure that all streams flush their data and hence get unblocked. --- include/spead2/recv_chunk_stream_group.h | 36 ++++++++++------------- src/recv_chunk_stream_group.cpp | 37 ++++++++++++++---------- tests/test_recv_chunk_stream_group.py | 22 ++++++++++++-- 3 files changed, 57 insertions(+), 38 deletions(-) diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h index 4c1e37259..4987b921b 100644 --- a/include/spead2/recv_chunk_stream_group.h +++ b/include/spead2/recv_chunk_stream_group.h @@ -226,13 +226,6 @@ class chunk_stream_group * The stream's @c queue_mutex is locked when this is called. */ virtual void stream_stop_received(chunk_stream_group_member &s); - /** - * Called when the user stops (or destroys) a stream. - * - * This is called before the caller actually stops the stream, and without - * the stream's @c queue_mutex. - */ - virtual void stream_pre_stop(chunk_stream_group_member &s) {} public: using iterator = boost::transform_iterator< @@ -346,6 +339,14 @@ class chunk_stream_group_member : private detail::chunk_stream_state::stream_stop_receiv } template -void chunk_stream_ring_group::stream_pre_stop( - chunk_stream_group_member &s) +void chunk_stream_ring_group::stop() { // Shut down the rings so that if the caller is no longer servicing them, it will // not lead to a deadlock during shutdown. this->data_ring->stop(); this->free_ring->stop(); - chunk_stream_group::stream_pre_stop(s); -} - -template -void chunk_stream_ring_group::stop() -{ - // Stopping the first stream should do this anyway, but this ensures - // they're stopped even if there are no streams - this->data_ring->stop(); - this->free_ring->stop(); chunk_stream_group::stop(); this->graveyard.reset(); // Release chunks from the graveyard } diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index 65bb3cb56..5ce47ae04 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -143,12 +143,20 @@ void chunk_stream_group::stop() * is called by the user, so a simultaneous call to emplace_back would * violate the requirement that the user doesn't call the API from more * than one thread at a time. - * - * The last stream to stop will flush the window (see - * stream_stop_received). */ + if (config.get_eviction_mode() == chunk_stream_group_config::eviction_mode::LOSSLESS) + { + /* Stopping a stream that is currently waiting in get_chunk could + * deadlock. In lossy mode, there are already provisions to guarantee + * forward progress, but in lossless mode we need some help. + */ + for (const auto &stream : streams) + { + stream->async_flush_until(std::numeric_limits::max()); + } + } for (const auto &stream : streams) - stream->stop(); + stream->stop1(); } void chunk_stream_group::stream_stop_received(chunk_stream_group_member &s) @@ -270,16 +278,8 @@ void chunk_stream_group_member::async_flush_until(std::int64_t chunk_id) }); } -void chunk_stream_group_member::stop_received() -{ - stream::stop_received(); - flush_chunks(); - group.stream_stop_received(*this); -} - -void chunk_stream_group_member::stop() +void chunk_stream_group_member::stop1() { - group.stream_pre_stop(*this); { std::lock_guard lock(get_queue_mutex()); flush_chunks(); @@ -287,9 +287,16 @@ void chunk_stream_group_member::stop() stream::stop(); } -chunk_stream_group_member::~chunk_stream_group_member() +void chunk_stream_group_member::stop_received() { - stop(); + stream::stop_received(); + flush_chunks(); + group.stream_stop_received(*this); +} + +void chunk_stream_group_member::stop() +{ + group.stop(); } } // namespace recv diff --git a/tests/test_recv_chunk_stream_group.py b/tests/test_recv_chunk_stream_group.py index 6aa91034e..505bcd003 100644 --- a/tests/test_recv_chunk_stream_group.py +++ b/tests/test_recv_chunk_stream_group.py @@ -152,7 +152,6 @@ def _test_simple(self, group, send_stream, chunks, heaps): """Send a given set of heaps (in order) and check that they arrive correctly.""" rng = np.random.default_rng(seed=1) data = rng.integers(0, 256, chunks * CHUNK_PAYLOAD_SIZE, np.uint8) - data_by_heap = data.reshape(chunks, HEAPS_PER_CHUNK, -1) def send(): self._send_data(send_stream, data, group.config.eviction_mode, heaps) @@ -185,8 +184,8 @@ def test_missing_stream(self, group, send_stream): @pytest.mark.parametrize("eviction_mode", [LOSSLESS_PARAM]) def test_lossless_late_stream(self, group, send_stream): """Send one stream later than the others, to make sure lossless mode really works.""" - rng = np.random.default_rng(seed=1) chunks = 20 + rng = np.random.default_rng(seed=1) data = rng.integers(0, 256, chunks * CHUNK_PAYLOAD_SIZE, np.uint8) heaps1 = [i for i in range(chunks * HEAPS_PER_CHUNK) if i % STREAMS != 2] heaps2 = [i for i in range(chunks * HEAPS_PER_CHUNK) if i % STREAMS == 2] @@ -207,3 +206,22 @@ def send(): self._verify(group, data, expected_present) send_thread.join() + + def test_unblock_stop(self, group, send_stream): + """Stop the group without stopping the queues.""" + chunks = 20 + # Leave one stream half-missing, to really jam things up + n_heaps = chunks * HEAPS_PER_CHUNK + heaps = [i for i in range(n_heaps) if i < n_heaps // 2 or i % STREAMS != 2] + rng = np.random.default_rng(seed=1) + data = rng.integers(0, 256, chunks * CHUNK_PAYLOAD_SIZE, np.uint8) + + self._send_data(send_stream, data, group.config.eviction_mode, heaps) + time.sleep(0.01) # Give it time to consume some of the data + group.stop() + + # We don't care how many chunks we get, as long as the loop + # terminates. + for i, chunk in enumerate(group.data_ringbuffer): + assert chunk.chunk_id == i + group.add_free_chunk(chunk) From e8fa2a762c3a25cd275c76e2852717ba90ac6560 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Fri, 30 Jun 2023 17:14:04 +0200 Subject: [PATCH 58/74] Eliminate min_head_chunk It was always the same as chunks.get_head_chunk(), so I just made that explicit. Also some documentation tidying. --- doc/cpp-recv-chunk-group.rst | 1 + doc/dev-recv-chunk-group.rst | 29 ++++++++++++++++-------- doc/recv-chunk-group.rst | 23 +++++++++++-------- include/spead2/recv_chunk_stream_group.h | 24 ++++++++------------ src/recv_chunk_stream_group.cpp | 28 +++++++++++------------ 5 files changed, 56 insertions(+), 49 deletions(-) diff --git a/doc/cpp-recv-chunk-group.rst b/doc/cpp-recv-chunk-group.rst index 6b7fa04c4..4de6c171c 100644 --- a/doc/cpp-recv-chunk-group.rst +++ b/doc/cpp-recv-chunk-group.rst @@ -16,3 +16,4 @@ C++ API. Ringbuffer convenience API -------------------------- .. doxygenclass:: spead2::recv::chunk_stream_ring_group + :members: diff --git a/doc/dev-recv-chunk-group.rst b/doc/dev-recv-chunk-group.rst index 6231ed737..4e1b0c398 100644 --- a/doc/dev-recv-chunk-group.rst +++ b/doc/dev-recv-chunk-group.rst @@ -18,16 +18,18 @@ to the chunk at the time. Additionally, it might not be possible to allocate a new chunk until an old chunk is flushed e.g., if there is a fixed pool of chunks rather than dynamic allocation. -The group keeps its own copy of the head pointers (oldest heap) from the +The group keeps its own copy of the head positions (oldest chunk) from the individual streams, protected by the group mutex rather than the stream -mutexes. This allows the group to track the oldest chunk that any stream owns -or may potentially own in future (``min_head_chunk``). When the group wishes to -evict a chunk, it first needs to wait for ``min_head_chunk`` to become greater -than the ID of the chunk to be evicted. The wait is achieved using a condition -variable that is notified whenever ``min_head_chunk`` increases. This allows -the group mutex to be dropped while waiting, which prevents the deadlocks that -might otherwise occur if the mutex was held while waiting and another stream -was attemping to lock the group mutex to make forward progress. +mutexes. The group then maintains its head chunk position to match the oldest +head position of any of the member streams. When the group wishes to +evict a chunk, it simply needs to wait for all streams to make enough progress +that the group's head moves past that chunk. + +The wait is achieved using a condition variable that is notified whenever the +head position increases. This allows the group mutex to be dropped while +waiting, which prevents the deadlocks that might otherwise occur if the mutex +was held while waiting and another stream was attemping to lock the group mutex +to make forward progress. In lossless eviction mode, this is all that is needed, although it is non-trivial to see that this won't deadlock with all the streams sitting in @@ -44,6 +46,13 @@ only on streams that are not blocked in In lossy eviction mode, we need to make sure that such streams make forward progress even if no new packets arrive on them. This is achieved by posting an asynchronous callback to all streams requesting them to flush out chunks that -are now too old. +are now too old. The callback will never reach streams that have already +stopped; we handle this at the time the stream stops, by treating it as having +a head of ``INT64_MAX``. + +While lossless mode is normally allowed to block indefinitely, we do need to +interrupt things in :cpp:func:`chunk_stream_group::stop`. This is handled +similarly to lossy eviction mode, where all streams are requested to flush up +to ``INT64_MAX``. .. cpp:namespace-pop:: diff --git a/doc/recv-chunk-group.rst b/doc/recv-chunk-group.rst index ddb16ce88..eb5baeda2 100644 --- a/doc/recv-chunk-group.rst +++ b/doc/recv-chunk-group.rst @@ -1,5 +1,6 @@ Chunking stream groups ====================== +.. cpp:namespace-push:: spead2::recv While the :doc:`recv-chunk` allows for high-bandwidth streams to be received with low overhead, it still has a fundamental scaling limitation: each chunk @@ -35,13 +36,13 @@ down) it prevents the entire group from making forward progress. The general flow (in C++) is -1. Create a :cpp:class:`~spead2::recv::chunk_stream_group_config`. -2. Create a :cpp:class:`~spead2::recv::chunk_stream_group`. -3. Use :cpp:func:`~spead2::recv::chunk_stream_group::emplace_back` to +1. Create a :cpp:class:`chunk_stream_group_config`. +2. Create a :cpp:class:`chunk_stream_group`. +3. Use :cpp:func:`chunk_stream_group::emplace_back` to create the streams. 4. Add readers to the streams. 5. Process the data. -6. Optionally, call :cpp:func:`spead2::recv::chunk_stream_group::stop()` +6. Optionally, call :cpp:func:`chunk_stream_group::stop()` (otherwise it will be called on destruction). 7. Destroy the group. @@ -52,7 +53,7 @@ Ringbuffer convenience API -------------------------- As for standalone chunk streams, there is a simplified API using ringbuffers, which is also the only API available for Python. A -:cpp:class:`~spead2::recv::chunk_stream_ring_group` is a group that allocates +:cpp:class:`chunk_stream_ring_group` is a group that allocates data from one ringbuffer and send ready data to another. The description of :ref:`that api ` largely applies here too. The ringbuffers can be shared between groups. @@ -67,13 +68,15 @@ performance, and thus some care is needed to use it safely. the pool must have at least as many threads as streams. It's recommended that each stream has its own single-threaded thread pool. - The streams must all be added to the group before adding any readers to - the streams. Once data has group has received some data, an exception will - be thrown if one attempts to add a new stream. + the streams. Once a group has received some data, an exception will be thrown + if one attempts to add a new stream. - The stream ID associated with each chunk will be the stream ID of one of the component streams, but it is undefined which one. - When the allocate and ready callbacks are invoked, it's not specified which - stream's batch statistics pointer will be passed. For the ready callback, - the `batch_stats` parameter may also be null (currently this can only happen - during :cpp:func:`spead2::recv::chunk_stream_group::stop`). + stream's batch statistics pointer will be passed. - Two streams must not write to the same bytes of a chunk (in the payload, present array or extra data), as this is undefined behaviour in C++. +- Calling :cpp:func:`~stream::stop` on a member stream will stop the whole + group. + +.. cpp:namespace-pop:: diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h index 4987b921b..2244458b1 100644 --- a/include/spead2/recv_chunk_stream_group.h +++ b/include/spead2/recv_chunk_stream_group.h @@ -120,7 +120,8 @@ class chunk_stream_group_member; * It presents an interface similar to @c std::vector for observing the set * of attached streams. * - * The public interface must only be called from one thread at a time. + * The public interface must only be called from one thread at a time, and + * all streams must be added before any readers are attached to them. */ class chunk_stream_group { @@ -157,12 +158,11 @@ class chunk_stream_group /** * Copy of the head chunk ID from each stream. This copy is protected by * the group's mutex rather than the streams'. + * + * The minimum element must always be equal to @c chunks.get_head_chunk(). */ std::vector head_chunks; - /// Minimum element of head_chunks - std::int64_t min_head_chunk = 0; - /** * Last value passed to all streams' async_flush_until. */ @@ -282,11 +282,7 @@ class chunk_stream_group * @} */ - /** - * Stop all streams and release all chunks. This function must not be - * called concurrently with creating or destroying streams, and no - * new streams should be created after calling this. - */ + /// Stop all streams and release all chunks. virtual void stop(); }; @@ -324,7 +320,7 @@ class chunk_stream_group_member : private detail::chunk_stream_state lock(mutex); + /* Streams should not be requesting chunks older than their heads, and the group + * head is at least as old as any stream head. + */ + assert(chunk_id >= chunks.get_head_chunk()); /* Any chunk old enough be made ready needs to first be released by the * member streams. To do that, we request all the streams to flush, then * wait until it is safe, using the condition variable to wake up @@ -180,7 +184,7 @@ chunk *chunk_stream_group::get_chunk(std::int64_t chunk_id, std::uintptr_t strea * state after a wait. */ const std::size_t max_chunks = config.get_max_chunks(); - if (chunk_id >= chunks.get_head_chunk() + std::int64_t(max_chunks)) + if (std::uint64_t(chunk_id - chunks.get_head_chunk()) >= max_chunks) { std::int64_t target = chunk_id - max_chunks + 1; // first chunk we don't need to flush if (config.get_eviction_mode() == chunk_stream_group_config::eviction_mode::LOSSY @@ -213,7 +217,6 @@ chunk *chunk_stream_group::get_chunk(std::int64_t chunk_id, std::uintptr_t strea void chunk_stream_group::ready_chunk(chunk *c, std::uint64_t *batch_stats) { - assert(c->chunk_id < min_head_chunk); std::unique_ptr owned(c); config.get_ready()(std::move(owned), batch_stats); } @@ -223,19 +226,16 @@ void chunk_stream_group::stream_head_updated_unlocked(chunk_stream_group_member std::size_t stream_index = s.group_index; std::int64_t old = head_chunks[stream_index]; head_chunks[stream_index] = head_chunk; - // Update min_head_chunk. We can skip the work if we weren't previously the oldest. - if (min_head_chunk == old) + // Update so that our head chunk is min(head_chunks). We can skip the work + // if we weren't previously the oldest. + if (chunks.get_head_chunk() == old) { - min_head_chunk = *std::min_element(head_chunks.begin(), head_chunks.end()); - if (min_head_chunk != old) - { - chunks.flush_until( - min_head_chunk, - [this, &s](chunk *c) { ready_chunk(c, s.batch_stats.data()); }, - [](std::int64_t) {} - ); - ready_condition.notify_all(); - } + auto min_head_chunk = *std::min_element(head_chunks.begin(), head_chunks.end()); + chunks.flush_until( + min_head_chunk, + [this, &s](chunk *c) { ready_chunk(c, s.batch_stats.data()); }, + [this](std::int64_t) { ready_condition.notify_all(); } + ); } } From 08068975d6cbba25833be367336b5754edf4c733 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Mon, 3 Jul 2023 16:08:45 +0200 Subject: [PATCH 59/74] Add py-recv-chunk-group reference documentation --- doc/py-recv-chunk-group.rst | 89 +++++++++++++++++++++++++++++++++++++ doc/py-recv-chunk.rst | 16 +++---- doc/py.rst | 1 + 3 files changed, 98 insertions(+), 8 deletions(-) create mode 100644 doc/py-recv-chunk-group.rst diff --git a/doc/py-recv-chunk-group.rst b/doc/py-recv-chunk-group.rst new file mode 100644 index 000000000..6b57fa68d --- /dev/null +++ b/doc/py-recv-chunk-group.rst @@ -0,0 +1,89 @@ +Chunking stream groups +====================== + +For an overview, refer to :doc:`recv-chunk-group`. This page is a reference for the +Python API. It extends the API for :doc:`chunks `. + +.. py:class:: spead2.recv.ChunkStreamGroupConfig(**kwargs) + + Parameters for a chunk stream group. The configuration options + can either be passed to the constructor (as keyword arguments) or set as + properties after construction. + + :param int max_chunks: + The maximum number of chunks that can be live at the same time. + :param EvictionMode eviction_mode: + The chunk eviction mode. + + .. py:class:: EvictionMode + + Eviction mode when it is necessary to advance the group window. See + the :doc:`overview ` for more details. + + .. py:attribute:: LOSSY + + force streams to release incomplete chunks + + .. py:attribute:: LOSSLESS + + a chunk will only be marked ready when all streams have marked it + ready + +.. py:class:: spead2.recv.ChunkStreamRingGroup(config, data_ringbuffer, free_ringbuffer) + + Stream group that uses ringbuffers to manage chunks. + + When a fresh chunk is needed, it is retrieved from a ringbuffer of free + chunks (the "free ring"). When a chunk is flushed, it is pushed to a "data + ring". These may be shared between groups, but both will be stopped as soon + as any of the members streams are stopped. The intended use case is + parallel groups that are started and stopped together. + + It behaves like a :py:class:`~collections.abc.Sequence` of the contained + streams. + + :param config: Group configuration + :type config: :py:class:`spead2.recv.ChunkStreamGroupConfig` + :param data_ringbuffer: Ringbuffer onto which the completed chunks are placed. + :type data_ringbuffer: :py:class:`spead2.recv.ChunkRingbuffer` + :param free_ringbuffer: Ringbuffer from which new chunks are obtained. + :type free_ringbuffer: :py:class:`spead2.recv.ChunkRingbuffer` + + .. py:attribute:: data_ringbuffer + + The data ringbuffer given to the constructor. + + .. py:attribute:: free_ringbuffer + + The free ringbuffer given to the constructor. + + .. py:method:: add_free_chunk(chunk) + + Add a chunk to the free ringbuffer. This takes care of zeroing out the + :py:attr:`.Chunk.present` array, and it will suppress the + :exc:`spead2.Stopped` exception if the free ringbuffer has been stopped. + + If the free ring is full, it will raise :exc:`spead2.Full` rather than + blocking. The free ringbuffer should be constructed with enough slots that + this does not happen. + + .. py:method:: emplace_back(thread_pool, config, chunk_stream_config) + + Add a new stream. + + :param thread_pool: Thread pool handling the I/O + :type thread_pool: :py:class:`spead2.ThreadPool` + :param config: Stream configuration + :type config: :py:class:`spead2.recv.StreamConfig` + :param chunk_config: Chunking configuration + :type chunk_config: :py:class:`spead2.recv.ChunkStreamConfig` + :rtype: :py:class:`spead2.recv.ChunkStreamGroupMember` + +.. py:class:: spead2.recv.ChunkStreamGroupMember + + A component stream in a :py:class:`~spead2.recv.ChunkStreamRingGroup`. + This class cannot be instantiated directly. Use + :py:meth:`.ChunkStreamRingGroup.emplace_back` instead. + + It provides the same methods for adding readers as + :py:class:`spead2.recv.Stream`. diff --git a/doc/py-recv-chunk.rst b/doc/py-recv-chunk.rst index da3ecf0a2..587d3b65c 100644 --- a/doc/py-recv-chunk.rst +++ b/doc/py-recv-chunk.rst @@ -225,18 +225,18 @@ Reference .. py:attribute:: data_ringbuffer - The data ringbuffer given to the constructor. + The data ringbuffer given to the constructor. .. py:attribute:: free_ringbuffer - The free ringbuffer given to the constructor. + The free ringbuffer given to the constructor. .. py:method:: add_free_chunk(chunk) - Add a chunk to the free ringbuffer. This takes care of zeroing out the - :py:attr:`.Chunk.present` array, and it will suppress the - :exc:`spead2.Stopped` exception if the free ringbuffer has been stopped. + Add a chunk to the free ringbuffer. This takes care of zeroing out the + :py:attr:`.Chunk.present` array, and it will suppress the + :exc:`spead2.Stopped` exception if the free ringbuffer has been stopped. - If the free ring is full, it will raise :exc:`spead2.Full` rather than - blocking. The free ringbuffer should be constructed with enough slots that - this does not happen. + If the free ring is full, it will raise :exc:`spead2.Full` rather than + blocking. The free ringbuffer should be constructed with enough slots that + this does not happen. diff --git a/doc/py.rst b/doc/py.rst index c53a3bd31..fa55c1366 100644 --- a/doc/py.rst +++ b/doc/py.rst @@ -21,3 +21,4 @@ with the C++ backend. py-logging py-ibverbs py-recv-chunk + py-recv-chunk-group From 9608637f452776a7adb4fbbe6eb09bdccfcce4ff Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Mon, 3 Jul 2023 16:09:09 +0200 Subject: [PATCH 60/74] Fix some errors in ChunkStreamRingGroup docs --- src/spead2/recv/__init__.pyi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/spead2/recv/__init__.pyi b/src/spead2/recv/__init__.pyi index c7d5c1252..5d1bb14a2 100644 --- a/src/spead2/recv/__init__.pyi +++ b/src/spead2/recv/__init__.pyi @@ -289,10 +289,10 @@ class ChunkStreamRingGroup(ChunkRingPair): def config(self) -> ChunkStreamGroupConfig: ... def emplace_back( self, thread_pool: spead2.ThreadPool, config: spead2.StreamConfig, - chunk_stream_config: spead2.ChunkStreamConfig) -> None: ... + chunk_stream_config: spead2.ChunkStreamConfig) -> ChunkStreamGroupMember: ... def __len__(self) -> int: ... - def __getitem__(self, index: int) -> spead2.ChunkStreamGroupMember: ... - def __iter__(self) -> Iterator[spead2.ChunkStreamGroupMember]: ... + def __getitem__(self, index: int) -> ChunkStreamGroupMember: ... + def __iter__(self) -> Iterator[ChunkStreamGroupMember]: ... def stop(self) -> None: ... class ChunkStreamGroupMember(_Stream): From af9463cec98966fb2d0f4e9af4fe4d9c58e4cb1a Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Mon, 3 Jul 2023 17:51:42 +0200 Subject: [PATCH 61/74] Implement Sequence protocol for ChunkStreamRingGroup It was partially implemented, but now it's fully implemented and registered as a virtual subclass of collections.abc.Sequence. As part of this, the C++ implementation of __iter__ was removed, in favour of the mixin version from collections.abc. The latter is more robust as it won't access undefined memory if the sequence is mutated while iterating. --- src/py_recv.cpp | 24 +++++-- src/spead2/recv/__init__.py | 18 ++++- src/spead2/recv/__init__.pyi | 6 +- tests/test_recv_chunk_stream_group.py | 95 +++++++++++++++++++++++++++ 4 files changed, 130 insertions(+), 13 deletions(-) diff --git a/src/py_recv.cpp b/src/py_recv.cpp index 20e012ffc..26b20a900 100644 --- a/src/py_recv.cpp +++ b/src/py_recv.cpp @@ -1030,8 +1030,10 @@ py::module register_module(py::module &parent) .def("__len__", SPEAD2_PTMF(chunk_stream_ring_group_wrapper, size)) .def( "__getitem__", - [](chunk_stream_ring_group_wrapper &group, std::size_t index) -> chunk_stream_group_member & { - if (index < group.size()) + [](chunk_stream_ring_group_wrapper &group, std::ptrdiff_t index) -> chunk_stream_group_member & { + if (index < 0) + index += group.size(); + if (index >= 0 && std::size_t(index) < group.size()) return group[index]; else throw py::index_error(); @@ -1039,11 +1041,19 @@ py::module register_module(py::module &parent) py::return_value_policy::reference_internal ) .def( - "__iter__", - [](chunk_stream_ring_group_wrapper &group) { - return py::make_iterator(group.begin(), group.end()); - }, - py::keep_alive<0, 1>() // keep the group alive while it is iterated + "__getitem__", + [](chunk_stream_ring_group_wrapper &group, const py::slice &slice) { + py::list out; + std::size_t start, stop, step, length; + if (!slice.compute(group.size(), &start, &stop, &step, &length)) + throw py::error_already_set(); + py::object self = py::cast(group); + for (std::size_t i = 0; i < length; i++) { + out.append(py::cast(group[start], py::return_value_policy::reference_internal, self)); + start += step; + } + return out; + } ) .def("stop", SPEAD2_PTMF(chunk_stream_ring_group_wrapper, stop)); diff --git a/src/spead2/recv/__init__.py b/src/spead2/recv/__init__.py index 78ce41e99..a097bfb4f 100644 --- a/src/spead2/recv/__init__.py +++ b/src/spead2/recv/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2015, 2020-2021 National Research Foundation (SARAO) +# Copyright 2015, 2020-2023 National Research Foundation (SARAO) # # This program is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free @@ -54,6 +54,8 @@ bytes, in the order they appeared in the original packet. """ +from collections.abc import Sequence as _Sequence + from spead2._spead2.recv import ( # noqa: F401 Chunk, ChunkRingPair, @@ -62,7 +64,7 @@ ChunkStreamConfig, ChunkStreamGroupConfig, ChunkStreamGroupMember, - ChunkStreamRingGroup, + ChunkStreamRingGroup as _ChunkStreamRingGroup, Heap, IncompleteHeap, RingStreamConfig, @@ -76,3 +78,15 @@ from spead2._spead2.recv import UdpIbvConfig # noqa: F401 except ImportError: pass + +# Ideally we'd inherit from _Sequence, but that gives errors about +# mismatched metaclasses. So instead we copy the mixin methods. +class ChunkStreamRingGroup(_ChunkStreamRingGroup): + count = _Sequence.count + index = _Sequence.index + __iter__ = _Sequence.__iter__ + __contains__ = _Sequence.__contains__ + __reversed__ = _Sequence.__reversed__ + + +_Sequence.register(ChunkStreamRingGroup) diff --git a/src/spead2/recv/__init__.pyi b/src/spead2/recv/__init__.pyi index 5d1bb14a2..7cc759965 100644 --- a/src/spead2/recv/__init__.pyi +++ b/src/spead2/recv/__init__.pyi @@ -13,6 +13,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +import collections.abc import enum import socket from typing import Iterator, Iterable, Any, List, Tuple, Sequence, Union, Text, Optional, ClassVar, overload @@ -281,7 +282,7 @@ class ChunkStreamGroupConfig: def __init__(self, *, max_chunks=..., eviction_mode=...) -> None: ... -class ChunkStreamRingGroup(ChunkRingPair): +class ChunkStreamRingGroup(ChunkRingPair, collections.abc.Sequence[ChunkStreamGroupMember]): def __init__( self, config: ChunkStreamGroupConfig, data_ringbuffer: _ChunkRingbuffer, free_ringbuffer: _ChunkRingbuffer) -> None: ... @@ -290,9 +291,6 @@ class ChunkStreamRingGroup(ChunkRingPair): def emplace_back( self, thread_pool: spead2.ThreadPool, config: spead2.StreamConfig, chunk_stream_config: spead2.ChunkStreamConfig) -> ChunkStreamGroupMember: ... - def __len__(self) -> int: ... - def __getitem__(self, index: int) -> ChunkStreamGroupMember: ... - def __iter__(self) -> Iterator[ChunkStreamGroupMember]: ... def stop(self) -> None: ... class ChunkStreamGroupMember(_Stream): diff --git a/tests/test_recv_chunk_stream_group.py b/tests/test_recv_chunk_stream_group.py index 505bcd003..f1dcb95e9 100644 --- a/tests/test_recv_chunk_stream_group.py +++ b/tests/test_recv_chunk_stream_group.py @@ -13,6 +13,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +import collections.abc import threading import time @@ -56,6 +57,100 @@ def test_eviction_mode(self): assert config.eviction_mode == EvictionMode.LOSSY +class TestChunkStreamRingGroupSequence: + """Test that ChunkStreamRingGroup behaves like a sequence.""" + @pytest.fixture + def config(self): + return spead2.recv.ChunkStreamGroupConfig() + + @pytest.fixture + def data_ring(self): + return spead2.recv.ChunkRingbuffer(4) + + @pytest.fixture + def free_ring(self): + ring = spead2.recv.ChunkRingbuffer(4) + + def make_group(self, n_streams): + group = spead2.recv.ChunkStreamRingGroup( + spead2.recv.ChunkStreamGroupConfig(), + spead2.recv.ChunkRingbuffer(4), + spead2.recv.ChunkRingbuffer(4) + ) + streams = [] + for _ in range(n_streams): + streams.append( + group.emplace_back( + spead2.ThreadPool(), + spead2.recv.StreamConfig(), + spead2.recv.ChunkStreamConfig(place=place_plain_llc) + ) + ) + return group, streams + + def test_len(self): + group, _ = self.make_group(5) + assert len(group) == 5 + + def test_getitem_simple(self): + group, streams = self.make_group(3) + assert group[0] is streams[0] + assert group[1] is streams[1] + assert group[2] is streams[2] + + def test_getitem_wrap(self): + group, streams = self.make_group(3) + assert group[-1] is streams[-1] + assert group[-2] is streams[-2] + assert group[-3] is streams[-3] + + def test_getitem_bad(self): + group, streams = self.make_group(3) + with pytest.raises(IndexError): + group[3] + with pytest.raises(IndexError): + group[-4] + + def test_getitem_slice(self): + group, streams = self.make_group(5) + assert group[1:3] == streams[1:3] + assert group[4:0:-2] == streams[4:0:-2] + assert group[1:-1:2] == streams[1:-1:2] + + def test_iter(self): + group, streams = self.make_group(5) + assert list(group) == streams + + def test_reversed(self): + group, streams = self.make_group(5) + assert list(reversed(group)) == list(reversed(streams)) + + def test_contains(self): + group, streams = self.make_group(2) + assert streams[0] in group + assert streams[1] in group + assert None not in group + + def test_count(self): + group, streams = self.make_group(2) + assert group.count(streams[0]) == 1 + assert group.count(streams[1]) == 1 + assert group.count(group) == 0 + + def test_index(self): + group, streams = self.make_group(2) + assert group.index(streams[0]) == 0 + assert group.index(streams[1]) == 1 + assert group.index(streams[1], 1, 2) == 1 + with pytest.raises(ValueError): + group.index(None) + with pytest.raises(ValueError): + group.index(streams[0], 1) + + def test_registered(self): + assert issubclass(spead2.recv.ChunkStreamRingGroup, collections.abc.Sequence) + + class TestChunkStreamRingGroup: @pytest.fixture def data_ring(self): From e60da411a763d0d379e37aa616dac2767fd6b959 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Tue, 4 Jul 2023 08:51:38 +0200 Subject: [PATCH 62/74] Remove some unused code --- tests/test_recv_chunk_stream_group.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tests/test_recv_chunk_stream_group.py b/tests/test_recv_chunk_stream_group.py index f1dcb95e9..193fcd7f0 100644 --- a/tests/test_recv_chunk_stream_group.py +++ b/tests/test_recv_chunk_stream_group.py @@ -59,17 +59,6 @@ def test_eviction_mode(self): class TestChunkStreamRingGroupSequence: """Test that ChunkStreamRingGroup behaves like a sequence.""" - @pytest.fixture - def config(self): - return spead2.recv.ChunkStreamGroupConfig() - - @pytest.fixture - def data_ring(self): - return spead2.recv.ChunkRingbuffer(4) - - @pytest.fixture - def free_ring(self): - ring = spead2.recv.ChunkRingbuffer(4) def make_group(self, n_streams): group = spead2.recv.ChunkStreamRingGroup( From 07d659c4640f92728252913eaaa70bb0c8fa4d9c Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Tue, 4 Jul 2023 09:02:27 +0200 Subject: [PATCH 63/74] Fix a flake8 error --- src/spead2/recv/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/spead2/recv/__init__.py b/src/spead2/recv/__init__.py index a097bfb4f..c72ecd1e8 100644 --- a/src/spead2/recv/__init__.py +++ b/src/spead2/recv/__init__.py @@ -79,6 +79,7 @@ except ImportError: pass + # Ideally we'd inherit from _Sequence, but that gives errors about # mismatched metaclasses. So instead we copy the mixin methods. class ChunkStreamRingGroup(_ChunkStreamRingGroup): From d51a8520e229f852079f9c60d76857106bcb0c26 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Tue, 4 Jul 2023 10:40:26 +0200 Subject: [PATCH 64/74] Add a unit test to validate a return value policy Ensures that ChunkStreamRingGroup.__getitem__(slice) returns streams that keep the group alive. --- tests/test_recv_chunk_stream_group.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_recv_chunk_stream_group.py b/tests/test_recv_chunk_stream_group.py index 193fcd7f0..253220024 100644 --- a/tests/test_recv_chunk_stream_group.py +++ b/tests/test_recv_chunk_stream_group.py @@ -14,8 +14,10 @@ # along with this program. If not, see . import collections.abc +import gc import threading import time +import weakref import numpy as np import pytest @@ -106,6 +108,16 @@ def test_getitem_slice(self): assert group[4:0:-2] == streams[4:0:-2] assert group[1:-1:2] == streams[1:-1:2] + def test_getitem_slice_gc(self): + """Test that the streams returned by getitem keep the group alive.""" + group = self.make_group(5)[0] + group_weak = weakref.ref(group) + streams = group[1:3] + del group + for i in range(5): # Try extra hard to GC on pypy + gc.collect() + assert group_weak() is not None + def test_iter(self): group, streams = self.make_group(5) assert list(group) == streams From c8664452830ecaab78662e4298f96e029e0e2f64 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Tue, 4 Jul 2023 11:38:54 +0200 Subject: [PATCH 65/74] Fix chunk_stream_group deadlock When fast-forwarding the window, get_chunk was not correctly calling head_updated to reflect the fast-forward. --- include/spead2/recv_chunk_stream.h | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/include/spead2/recv_chunk_stream.h b/include/spead2/recv_chunk_stream.h index 33a1540ad..512edc34a 100644 --- a/include/spead2/recv_chunk_stream.h +++ b/include/spead2/recv_chunk_stream.h @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -247,16 +248,19 @@ class chunk_window head_updated(head_chunk); } - /// Send all the chunks to the ready callback + /** + * Send all the chunks to the ready callback. Afterwards, + * the head and tail are both advanced to @a next_chunk. + */ template - void flush_all(const F1 &ready_chunk, const F2 &head_updated) + void flush_all(std::int64_t next_chunk, const F1 &ready_chunk, const F2 &head_updated) { - if (!empty()) - { - while (!empty()) - flush_head(ready_chunk); + std::int64_t orig_head = head_chunk; + while (!empty()) + flush_head(ready_chunk); + head_chunk = tail_chunk = next_chunk; + if (head_chunk != orig_head) head_updated(head_chunk); - } } /// Flush until the head is at least @a target @@ -319,8 +323,7 @@ class chunk_window * We leave it to the while loop below to actually allocate * the chunks. */ - flush_all(ready_chunk, head_updated); - head_chunk = tail_chunk = chunk_id - (max_chunks - 1); + flush_all(chunk_id - (max_chunks - 1), ready_chunk, head_updated); } while (chunk_id >= tail_chunk) { @@ -702,6 +705,7 @@ template void chunk_stream_state::flush_chunks() { chunks.flush_all( + std::numeric_limits::max(), [this](chunk *c) { chunk_manager.ready_chunk(*this, c); }, [this](std::int64_t head_chunk) { chunk_manager.head_updated(*this, head_chunk); } ); From f42ed76ab41fca06e265928413d25915a90f3579 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Tue, 4 Jul 2023 15:14:09 +0200 Subject: [PATCH 66/74] Fix error with very large chunk ID A chunk ID close to 2^63 could lead to overflow bugs. Fix it by using unsigned chunk IDs when dealing with head and tail chunk IDs. This does require some more careful handling for determining whether a chunk ID is behind the head, since it's a comparison of a signed and an unsigned value. --- include/spead2/recv_chunk_stream.h | 48 ++++++++------ include/spead2/recv_chunk_stream_group.h | 14 ++--- src/recv_chunk_stream.cpp | 5 +- src/recv_chunk_stream_group.cpp | 26 ++++---- tests/test_recv_chunk_stream_group.py | 80 +++++++++++++++++++++--- 5 files changed, 125 insertions(+), 48 deletions(-) diff --git a/include/spead2/recv_chunk_stream.h b/include/spead2/recv_chunk_stream.h index 512edc34a..daf610cb0 100644 --- a/include/spead2/recv_chunk_stream.h +++ b/include/spead2/recv_chunk_stream.h @@ -214,13 +214,16 @@ namespace detail /** * Sliding window of chunk pointers. + * + * @internal The chunk IDs are kept as unsigned values, so that the tail can + * be larger than any actual chunk ID. */ class chunk_window { private: /// Circular buffer of chunks under construction. std::vector chunks; - std::int64_t head_chunk = 0, tail_chunk = 0; ///< chunk IDs of valid chunk range + std::uint64_t head_chunk = 0, tail_chunk = 0; ///< chunk IDs of valid chunk range std::size_t head_pos = 0, tail_pos = 0; ///< Positions corresponding to @ref head and @ref tail in @ref chunks public: @@ -253,9 +256,9 @@ class chunk_window * the head and tail are both advanced to @a next_chunk. */ template - void flush_all(std::int64_t next_chunk, const F1 &ready_chunk, const F2 &head_updated) + void flush_all(std::uint64_t next_chunk, const F1 &ready_chunk, const F2 &head_updated) { - std::int64_t orig_head = head_chunk; + std::uint64_t orig_head = head_chunk; while (!empty()) flush_head(ready_chunk); head_chunk = tail_chunk = next_chunk; @@ -265,13 +268,13 @@ class chunk_window /// Flush until the head is at least @a target template - void flush_until(std::int64_t target, const F1 &ready_chunk, const F2 &head_updated) + void flush_until(std::uint64_t target, const F1 &ready_chunk, const F2 &head_updated) { if (head_chunk < target) { while (head_chunk != tail_chunk && head_chunk < target) flush_head(ready_chunk); - if (head_chunk == tail_chunk && head_chunk < target) + if (head_chunk < target) head_chunk = tail_chunk = target; head_updated(target); } @@ -284,7 +287,7 @@ class chunk_window * * If @a chunk_id falls outside the window, returns nullptr. */ - chunk *get_chunk(std::int64_t chunk_id) const + chunk *get_chunk(std::uint64_t chunk_id) const { if (chunk_id >= head_chunk && chunk_id < tail_chunk) { @@ -308,15 +311,17 @@ class chunk_window */ template chunk *get_chunk( - std::int64_t chunk_id, std::uintptr_t stream_id, + std::uint64_t chunk_id, std::uintptr_t stream_id, const F1 &allocate_chunk, const F2 &ready_chunk, const F3 &head_updated) { + // chunk_id must be a valid int64_t + assert(chunk_id <= std::uint64_t(std::numeric_limits::max())); const std::size_t max_chunks = chunks.size(); if (chunk_id >= head_chunk) { // We've moved beyond the end of our current window, and need to // allocate fresh chunks. - if (chunk_id >= tail_chunk + std::int64_t(max_chunks)) + if (chunk_id >= tail_chunk && chunk_id - tail_chunk >= max_chunks) { /* We've jumped ahead so far that the entire current window * is stale. Flush it all and fast-forward to the new window. @@ -327,7 +332,7 @@ class chunk_window } while (chunk_id >= tail_chunk) { - if (std::size_t(tail_chunk - head_chunk) == max_chunks) + if (tail_chunk - head_chunk == max_chunks) flush_head(ready_chunk, head_updated); chunks[tail_pos] = allocate_chunk(tail_chunk); if (chunks[tail_pos]) @@ -350,8 +355,8 @@ class chunk_window return nullptr; } - std::int64_t get_head_chunk() const { return head_chunk; } - std::int64_t get_tail_chunk() const { return tail_chunk; } + std::uint64_t get_head_chunk() const { return head_chunk; } + std::uint64_t get_tail_chunk() const { return tail_chunk; } bool empty() const { return head_chunk == tail_chunk; } }; @@ -395,8 +400,13 @@ class chunk_stream_state_base void do_heap_ready(live_heap &&lh); protected: - std::int64_t get_head_chunk() const { return chunks.get_head_chunk(); } - std::int64_t get_tail_chunk() const { return chunks.get_tail_chunk(); } + std::uint64_t get_head_chunk() const { return chunks.get_head_chunk(); } + std::uint64_t get_tail_chunk() const { return chunks.get_tail_chunk(); } + bool chunk_too_old(std::int64_t chunk_id) const + { + // Need to check against 0 explicitly to avoid signed/unsigned mixup + return chunk_id < 0 || std::uint64_t(chunk_id) < chunks.get_head_chunk(); + } public: /// Constructor @@ -480,7 +490,7 @@ class chunk_manager_simple std::uint64_t *get_batch_stats(chunk_stream_state &state) const; chunk *allocate_chunk(chunk_stream_state &state, std::int64_t chunk_id); void ready_chunk(chunk_stream_state &state, chunk *c); - void head_updated(chunk_stream_state &state, std::int64_t head_chunk) {} + void head_updated(chunk_stream_state &state, std::uint64_t head_chunk) {} }; /** @@ -705,9 +715,9 @@ template void chunk_stream_state::flush_chunks() { chunks.flush_all( - std::numeric_limits::max(), + std::numeric_limits::max(), [this](chunk *c) { chunk_manager.ready_chunk(*this, c); }, - [this](std::int64_t head_chunk) { chunk_manager.head_updated(*this, head_chunk); } + [this](std::uint64_t head_chunk) { chunk_manager.head_updated(*this, head_chunk); } ); } @@ -763,8 +773,8 @@ chunk_stream_state::allocate(std::size_t size, const packet_header &packet) place_data->extra_offset = 0; place_data->extra_size = 0; chunk_config.get_place()(place_data, sizeof(*place_data)); - auto chunk_id = place_data->chunk_id; - if (chunk_id < get_head_chunk()) + std::int64_t chunk_id = place_data->chunk_id; + if (chunk_too_old(chunk_id)) { // We don't want this heap. metadata.chunk_id = -1; @@ -780,7 +790,7 @@ chunk_stream_state::allocate(std::size_t size, const packet_header &packet) stream_id, [this](std::int64_t chunk_id) { return chunk_manager.allocate_chunk(*this, chunk_id); }, [this](chunk *c) { chunk_manager.ready_chunk(*this, c); }, - [this](std::int64_t head_chunk) { chunk_manager.head_updated(*this, head_chunk); } + [this](std::uint64_t head_chunk) { chunk_manager.head_updated(*this, head_chunk); } ); if (chunk_ptr) { diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h index 2244458b1..ee5644a5f 100644 --- a/include/spead2/recv_chunk_stream_group.h +++ b/include/spead2/recv_chunk_stream_group.h @@ -105,7 +105,7 @@ class chunk_manager_group std::uint64_t *get_batch_stats(chunk_stream_state &state) const; chunk *allocate_chunk(chunk_stream_state &state, std::int64_t chunk_id); void ready_chunk(chunk_stream_state &state, chunk *c) {} - void head_updated(chunk_stream_state &state, std::int64_t head_chunk); + void head_updated(chunk_stream_state &state, std::uint64_t head_chunk); }; } // namespace detail @@ -161,12 +161,12 @@ class chunk_stream_group * * The minimum element must always be equal to @c chunks.get_head_chunk(). */ - std::vector head_chunks; + std::vector head_chunks; /** * Last value passed to all streams' async_flush_until. */ - std::int64_t last_flush_until = 0; + std::uint64_t last_flush_until = 0; /** * Obtain the chunk with a given ID. @@ -177,19 +177,19 @@ class chunk_stream_group * * This function is thread-safe. */ - chunk *get_chunk(std::int64_t chunk_id, std::uintptr_t stream_id, std::uint64_t *batch_stats); + chunk *get_chunk(std::uint64_t chunk_id, std::uintptr_t stream_id, std::uint64_t *batch_stats); /** * Update the head_chunk copy for a stream. This version assumes the caller takes * the mutex, and is only used internally. */ - void stream_head_updated_unlocked(chunk_stream_group_member &s, std::int64_t head_chunk); + void stream_head_updated_unlocked(chunk_stream_group_member &s, std::uint64_t head_chunk); /** * Called by a stream to report movement in its head pointer. This function * takes the group mutex. */ - void stream_head_updated(chunk_stream_group_member &s, std::int64_t head_chunk); + void stream_head_updated(chunk_stream_group_member &s, std::uint64_t head_chunk); /** * Pass a chunk to the user-provided ready function. The caller is @@ -306,7 +306,7 @@ class chunk_stream_group_member : private detail::chunk_stream_statechunk_ptr && metadata->chunk_id >= get_head_chunk() + if (metadata && metadata->chunk_ptr + && !chunk_too_old(metadata->chunk_id) && !get_chunk_config().get_packet_presence_payload_size()) { assert(metadata->heap_index < metadata->chunk_ptr->present_size); diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index c906e109b..53626289d 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -79,7 +79,7 @@ chunk *chunk_manager_group::allocate_chunk( } void chunk_manager_group::head_updated( - chunk_stream_state &state, std::int64_t head_chunk) + chunk_stream_state &state, std::uint64_t head_chunk) { group.stream_head_updated(static_cast(state), head_chunk); } @@ -152,7 +152,7 @@ void chunk_stream_group::stop() */ for (const auto &stream : streams) { - stream->async_flush_until(std::numeric_limits::max()); + stream->async_flush_until(std::numeric_limits::max()); } } for (const auto &stream : streams) @@ -164,10 +164,10 @@ void chunk_stream_group::stream_stop_received(chunk_stream_group_member &s) std::lock_guard lock(mutex); // Set the head_chunk to the largest possible value, so that this stream // no longer blocks anything. - stream_head_updated_unlocked(s, std::numeric_limits::max()); + stream_head_updated_unlocked(s, std::numeric_limits::max()); } -chunk *chunk_stream_group::get_chunk(std::int64_t chunk_id, std::uintptr_t stream_id, std::uint64_t *batch_stats) +chunk *chunk_stream_group::get_chunk(std::uint64_t chunk_id, std::uintptr_t stream_id, std::uint64_t *batch_stats) { std::unique_lock lock(mutex); /* Streams should not be requesting chunks older than their heads, and the group @@ -184,9 +184,9 @@ chunk *chunk_stream_group::get_chunk(std::int64_t chunk_id, std::uintptr_t strea * state after a wait. */ const std::size_t max_chunks = config.get_max_chunks(); - if (std::uint64_t(chunk_id - chunks.get_head_chunk()) >= max_chunks) + if (chunk_id - chunks.get_head_chunk() >= max_chunks) { - std::int64_t target = chunk_id - max_chunks + 1; // first chunk we don't need to flush + std::uint64_t target = chunk_id - (max_chunks - 1); // first chunk we don't need to flush if (config.get_eviction_mode() == chunk_stream_group_config::eviction_mode::LOSSY && target > last_flush_until) { @@ -210,7 +210,7 @@ chunk *chunk_stream_group::get_chunk(std::int64_t chunk_id, std::uintptr_t strea // Should be unreachable, as we've ensured this by waiting above assert(false); }, - [](std::int64_t) {} // Don't need notification for head moving + [](std::uint64_t) {} // Don't need notification for head moving ); return c; } @@ -221,10 +221,10 @@ void chunk_stream_group::ready_chunk(chunk *c, std::uint64_t *batch_stats) config.get_ready()(std::move(owned), batch_stats); } -void chunk_stream_group::stream_head_updated_unlocked(chunk_stream_group_member &s, std::int64_t head_chunk) +void chunk_stream_group::stream_head_updated_unlocked(chunk_stream_group_member &s, std::uint64_t head_chunk) { std::size_t stream_index = s.group_index; - std::int64_t old = head_chunks[stream_index]; + std::uint64_t old = head_chunks[stream_index]; head_chunks[stream_index] = head_chunk; // Update so that our head chunk is min(head_chunks). We can skip the work // if we weren't previously the oldest. @@ -234,12 +234,12 @@ void chunk_stream_group::stream_head_updated_unlocked(chunk_stream_group_member chunks.flush_until( min_head_chunk, [this, &s](chunk *c) { ready_chunk(c, s.batch_stats.data()); }, - [this](std::int64_t) { ready_condition.notify_all(); } + [this](std::uint64_t) { ready_condition.notify_all(); } ); } } -void chunk_stream_group::stream_head_updated(chunk_stream_group_member &s, std::int64_t head_chunk) +void chunk_stream_group::stream_head_updated(chunk_stream_group_member &s, std::uint64_t head_chunk) { std::lock_guard lock(mutex); stream_head_updated_unlocked(s, head_chunk); @@ -264,14 +264,14 @@ void chunk_stream_group_member::heap_ready(live_heap &&lh) do_heap_ready(std::move(lh)); } -void chunk_stream_group_member::async_flush_until(std::int64_t chunk_id) +void chunk_stream_group_member::async_flush_until(std::uint64_t chunk_id) { post([chunk_id](stream_base &s) { chunk_stream_group_member &self = static_cast(s); self.chunks.flush_until( chunk_id, [](chunk *) {}, - [&self](std::int64_t head_chunk) { + [&self](std::uint64_t head_chunk) { self.group.stream_head_updated(self, head_chunk); } ); diff --git a/tests/test_recv_chunk_stream_group.py b/tests/test_recv_chunk_stream_group.py index 253220024..c8d6b89aa 100644 --- a/tests/test_recv_chunk_stream_group.py +++ b/tests/test_recv_chunk_stream_group.py @@ -14,17 +14,23 @@ # along with this program. If not, see . import collections.abc +import ctypes import gc import threading import time import weakref +import numba +from numba import types import numpy as np import pytest +import scipy import spead2 import spead2.recv as recv import spead2.send as send +from spead2.numba import intp_to_voidptr +from spead2.recv.numba import chunk_place_data from tests.test_recv_chunk_stream import ( CHUNK_PAYLOAD_SIZE, HEAP_PAYLOAD_SIZE, HEAPS_PER_CHUNK, place_plain_llc @@ -35,6 +41,26 @@ LOSSLESS_PARAM = pytest.param(recv.ChunkStreamGroupConfig.EvictionMode.LOSSLESS, id="lossless") +@numba.cfunc( + types.void(types.CPointer(chunk_place_data), types.uintp, types.CPointer(types.int64)), + nopython=True) +def place_bias(data_ptr, data_size, user_data_ptr): + # Biases the chunk_id by the user parameter + data = numba.carray(data_ptr, 1) + items = numba.carray(intp_to_voidptr(data[0].items), 2, dtype=np.int64) + heap_cnt = items[0] + payload_size = items[1] + user_data = numba.carray(user_data_ptr, 1) + if payload_size == HEAP_PAYLOAD_SIZE: + data[0].chunk_id = heap_cnt // HEAPS_PER_CHUNK + user_data[0] + data[0].heap_index = heap_cnt % HEAPS_PER_CHUNK + data[0].heap_offset = data[0].heap_index * HEAP_PAYLOAD_SIZE + + +place_bias_llc = scipy.LowLevelCallable( + place_bias.ctypes, signature='void (void *, size_t, void *)') + + class TestChunkStreamGroupConfig: def test_default_construct(self): config = recv.ChunkStreamGroupConfig() @@ -178,15 +204,23 @@ def eviction_mode(self, request): return request.param @pytest.fixture - def group(self, eviction_mode, data_ring, free_ring, queues): + def chunk_id_bias(self): + return np.array([0], np.int64) + + @pytest.fixture + def group(self, eviction_mode, data_ring, free_ring, queues, chunk_id_bias): group_config = recv.ChunkStreamGroupConfig(max_chunks=4, eviction_mode=eviction_mode) group = recv.ChunkStreamRingGroup(group_config, data_ring, free_ring) # max_heaps is artificially high to make test_packet_too_old work config = spead2.recv.StreamConfig(max_heaps=128) + place_llc = scipy.LowLevelCallable( + place_bias.ctypes, + user_data=chunk_id_bias.ctypes.data_as(ctypes.c_void_p), + signature='void (void *, size_t, void *)') chunk_stream_config = spead2.recv.ChunkStreamConfig( items=[0x1000, spead2.HEAP_LENGTH_ID], max_chunks=4, - place=place_plain_llc, + place=place_llc, ) for queue in queues: group.emplace_back( @@ -225,14 +259,23 @@ def _send_data(self, send_stream, data, eviction_mode, heaps=None): if lossy: time.sleep(0.001) - def _verify(self, group, data, expected_present): + def _verify(self, group, data, expected_present, chunk_id_bias=0): expected_present = expected_present.reshape(-1, HEAPS_PER_CHUNK) chunks = len(expected_present) data_by_heap = data.reshape(chunks, HEAPS_PER_CHUNK, -1) + def next_real_chunk(): + # Skip padding chunks + while True: + chunk = group.data_ringbuffer.get() + if any(chunk.present): + return chunk + else: + group.add_free_chunk(chunk) + for i in range(len(expected_present)): - chunk = group.data_ringbuffer.get() - assert chunk.chunk_id == i + chunk = next_real_chunk() + assert chunk.chunk_id == i + chunk_id_bias np.testing.assert_equal(chunk.present, expected_present[i]) actual_data = chunk.data.reshape(HEAPS_PER_CHUNK, -1) for j in range(HEAPS_PER_CHUNK): @@ -244,7 +287,7 @@ def _verify(self, group, data, expected_present): with pytest.raises(spead2.Stopped): group.data_ringbuffer.get() - def _test_simple(self, group, send_stream, chunks, heaps): + def _test_simple(self, group, send_stream, chunks, heaps, chunk_id_bias=0): """Send a given set of heaps (in order) and check that they arrive correctly.""" rng = np.random.default_rng(seed=1) data = rng.integers(0, 256, chunks * CHUNK_PAYLOAD_SIZE, np.uint8) @@ -261,7 +304,7 @@ def send(): expected_present = np.zeros(chunks * HEAPS_PER_CHUNK, np.uint8) expected_present[heaps] = True - self._verify(group, data, expected_present) + self._verify(group, data, expected_present, chunk_id_bias) send_thread.join() @@ -277,6 +320,22 @@ def test_missing_stream(self, group, send_stream): heaps = [i for i in range(chunks * HEAPS_PER_CHUNK) if i % STREAMS != 2] self._test_simple(group, send_stream, chunks, heaps) + def test_half_missing_stream(self, group, send_stream): + """Skip sending data to one of the streams after a certain point.""" + chunks = 20 + heaps = [ + i for i in range(chunks * HEAPS_PER_CHUNK) + if i < 7 * HEAPS_PER_CHUNK or i % STREAMS != 2 + ] + self._test_simple(group, send_stream, chunks, heaps) + + def test_missing_chunks(self, group, send_stream): + """Skip sending some whole chunks.""" + chunks = 20 + skip = [1, 6, 7, 13, 14, 15, 16, 17, 18] + heaps = [i for i in range(chunks * HEAPS_PER_CHUNK) if i // HEAPS_PER_CHUNK not in skip] + self._test_simple(group, send_stream, chunks, heaps) + @pytest.mark.parametrize("eviction_mode", [LOSSLESS_PARAM]) def test_lossless_late_stream(self, group, send_stream): """Send one stream later than the others, to make sure lossless mode really works.""" @@ -303,6 +362,13 @@ def send(): send_thread.join() + def test_large_chunk_ids(self, group, send_stream, chunk_id_bias): + chunks = 20 + heaps = list(range(chunks * HEAPS_PER_CHUNK)) + # Ensure that the last chunk will have the maximum possible chunk ID (2**63-1) + chunk_id_bias[0] = 2**63 - chunks + self._test_simple(group, send_stream, chunks, heaps, chunk_id_bias=chunk_id_bias[0]) + def test_unblock_stop(self, group, send_stream): """Stop the group without stopping the queues.""" chunks = 20 From 5276c95c39f02918f07e206082d79acb143098aa Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Tue, 4 Jul 2023 15:22:21 +0200 Subject: [PATCH 67/74] Refine test_getitem_slice_gc Apart from fixing a spurious flake8 warning, this strengthens the test. --- tests/test_recv_chunk_stream_group.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_recv_chunk_stream_group.py b/tests/test_recv_chunk_stream_group.py index c8d6b89aa..884462ced 100644 --- a/tests/test_recv_chunk_stream_group.py +++ b/tests/test_recv_chunk_stream_group.py @@ -144,6 +144,12 @@ def test_getitem_slice_gc(self): gc.collect() assert group_weak() is not None + # Now delete the things that are keeping it alive + streams.clear() + for i in range(5): + gc.collect() + assert group_weak() is None + def test_iter(self): group, streams = self.make_group(5) assert list(group) == streams From 36ef6e02371b22d826f308a415e573f4c86de6f2 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Tue, 4 Jul 2023 16:31:32 +0200 Subject: [PATCH 68/74] Fix missing lock Classic mistake of initialising the lock_guard as a temporary (which immediately evaporates) instead of as a variable. --- include/spead2/recv_stream.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/spead2/recv_stream.h b/include/spead2/recv_stream.h index f014a7def..554778ef1 100644 --- a/include/spead2/recv_stream.h +++ b/include/spead2/recv_stream.h @@ -700,7 +700,7 @@ class stream_base void operator()() const { - std::lock_guard(shared->queue_mutex); + std::lock_guard lock(shared->queue_mutex); stream_base *self = shared->self; if (self) func(*self); From 66692ec912e8999cbba1ed941e1758d2b9e9d2e9 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Tue, 4 Jul 2023 16:34:37 +0200 Subject: [PATCH 69/74] Remove chunk_stream_group::stream_stop_received It's still there, but a no-op, since chunk_stream_group_member::stop_received takes care of advancing the chunk id window to UINT64_MAX. Also removed stream_head_updated_unlocked since it didn't need to exist as a separate function (folded into single caller). --- include/spead2/recv_chunk_stream_group.h | 8 +------- src/recv_chunk_stream_group.cpp | 17 ++--------------- 2 files changed, 3 insertions(+), 22 deletions(-) diff --git a/include/spead2/recv_chunk_stream_group.h b/include/spead2/recv_chunk_stream_group.h index ee5644a5f..50e8db9ee 100644 --- a/include/spead2/recv_chunk_stream_group.h +++ b/include/spead2/recv_chunk_stream_group.h @@ -179,12 +179,6 @@ class chunk_stream_group */ chunk *get_chunk(std::uint64_t chunk_id, std::uintptr_t stream_id, std::uint64_t *batch_stats); - /** - * Update the head_chunk copy for a stream. This version assumes the caller takes - * the mutex, and is only used internally. - */ - void stream_head_updated_unlocked(chunk_stream_group_member &s, std::uint64_t head_chunk); - /** * Called by a stream to report movement in its head pointer. This function * takes the group mutex. @@ -225,7 +219,7 @@ class chunk_stream_group * * The stream's @c queue_mutex is locked when this is called. */ - virtual void stream_stop_received(chunk_stream_group_member &s); + virtual void stream_stop_received(chunk_stream_group_member &s) {} public: using iterator = boost::transform_iterator< diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index 53626289d..da8a0cd04 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -159,14 +159,6 @@ void chunk_stream_group::stop() stream->stop1(); } -void chunk_stream_group::stream_stop_received(chunk_stream_group_member &s) -{ - std::lock_guard lock(mutex); - // Set the head_chunk to the largest possible value, so that this stream - // no longer blocks anything. - stream_head_updated_unlocked(s, std::numeric_limits::max()); -} - chunk *chunk_stream_group::get_chunk(std::uint64_t chunk_id, std::uintptr_t stream_id, std::uint64_t *batch_stats) { std::unique_lock lock(mutex); @@ -221,8 +213,9 @@ void chunk_stream_group::ready_chunk(chunk *c, std::uint64_t *batch_stats) config.get_ready()(std::move(owned), batch_stats); } -void chunk_stream_group::stream_head_updated_unlocked(chunk_stream_group_member &s, std::uint64_t head_chunk) +void chunk_stream_group::stream_head_updated(chunk_stream_group_member &s, std::uint64_t head_chunk) { + std::lock_guard lock(mutex); std::size_t stream_index = s.group_index; std::uint64_t old = head_chunks[stream_index]; head_chunks[stream_index] = head_chunk; @@ -239,12 +232,6 @@ void chunk_stream_group::stream_head_updated_unlocked(chunk_stream_group_member } } -void chunk_stream_group::stream_head_updated(chunk_stream_group_member &s, std::uint64_t head_chunk) -{ - std::lock_guard lock(mutex); - stream_head_updated_unlocked(s, head_chunk); -} - chunk_stream_group_member::chunk_stream_group_member( chunk_stream_group &group, std::size_t group_index, From 8bfdac7e3f267e4ac46cd7200e6adcc63d7700c7 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Tue, 4 Jul 2023 16:35:53 +0200 Subject: [PATCH 70/74] Add an additional assert --- src/recv_chunk_stream_group.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/recv_chunk_stream_group.cpp b/src/recv_chunk_stream_group.cpp index da8a0cd04..efe2bc131 100644 --- a/src/recv_chunk_stream_group.cpp +++ b/src/recv_chunk_stream_group.cpp @@ -218,6 +218,7 @@ void chunk_stream_group::stream_head_updated(chunk_stream_group_member &s, std:: std::lock_guard lock(mutex); std::size_t stream_index = s.group_index; std::uint64_t old = head_chunks[stream_index]; + assert(head_chunk > old); // head_updated should only be called on forward progress head_chunks[stream_index] = head_chunk; // Update so that our head chunk is min(head_chunks). We can skip the work // if we weren't previously the oldest. From fbf2f7ae272a6b7d292acb62fa9a9fcda0c3e872 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Tue, 4 Jul 2023 16:38:39 +0200 Subject: [PATCH 71/74] Fix up TestChunkStreamRingRing::test_missing_chunks It was failing because the indexing wasn't accounting for the missing chunks. --- tests/test_recv_chunk_stream_group.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_recv_chunk_stream_group.py b/tests/test_recv_chunk_stream_group.py index 884462ced..39a852d00 100644 --- a/tests/test_recv_chunk_stream_group.py +++ b/tests/test_recv_chunk_stream_group.py @@ -267,6 +267,7 @@ def _send_data(self, send_stream, data, eviction_mode, heaps=None): def _verify(self, group, data, expected_present, chunk_id_bias=0): expected_present = expected_present.reshape(-1, HEAPS_PER_CHUNK) + expected_chunk_ids = np.nonzero(np.any(expected_present, axis=1))[0] chunks = len(expected_present) data_by_heap = data.reshape(chunks, HEAPS_PER_CHUNK, -1) @@ -279,7 +280,7 @@ def next_real_chunk(): else: group.add_free_chunk(chunk) - for i in range(len(expected_present)): + for i in expected_chunk_ids: chunk = next_real_chunk() assert chunk.chunk_id == i + chunk_id_bias np.testing.assert_equal(chunk.present, expected_present[i]) From 1634bb00d8517f5d86dd4e616eb517255cbf4b93 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Thu, 6 Jul 2023 13:35:19 +0200 Subject: [PATCH 72/74] Fix mypy errors about ChunkStreamRingGroup being abstract Sequence considers __getitem__ and __len__ to be abstract, so they need to be repeated as concrete in the subclass. --- src/spead2/recv/__init__.pyi | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/spead2/recv/__init__.pyi b/src/spead2/recv/__init__.pyi index 7cc759965..2de59e3d0 100644 --- a/src/spead2/recv/__init__.pyi +++ b/src/spead2/recv/__init__.pyi @@ -292,6 +292,12 @@ class ChunkStreamRingGroup(ChunkRingPair, collections.abc.Sequence[ChunkStreamGr self, thread_pool: spead2.ThreadPool, config: spead2.StreamConfig, chunk_stream_config: spead2.ChunkStreamConfig) -> ChunkStreamGroupMember: ... def stop(self) -> None: ... + # These are marked abstract in Sequence, so need to be implemented here + @overload + def __getitem__(self, index: int) -> ChunkStreamGroupMember: ... + @overload + def __getitem__(self, index: slice) -> Sequence[ChunkStreamGroupMember]: ... + def __len__(self) -> int: ... class ChunkStreamGroupMember(_Stream): pass From ddb7812f124ac0b1cb2a7fe4ff07d861ab1b0c98 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Sun, 9 Jul 2023 11:31:55 +0200 Subject: [PATCH 73/74] Update changelog for chunk stream groups --- doc/changelog.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/changelog.rst b/doc/changelog.rst index a3155f893..1b863e6ff 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -3,6 +3,9 @@ Changelog .. rubric:: Development version +- Add support for :doc:`recv-chunk-group` to assemble chunks in parallel. +- Simplify the way receive streams shut down. Users should not notice any + change, but custom reader implementations will need to be updated. - Update :meth:`!test_async_flush` and :meth:`!test_async_flush_fail` to keep handles to async tasks, to prevent them being garbage collected too early. - Fix a bug where copying a :cpp:class:`spead2::recv::stream_config` would not From aaad4f4bf5841fde91f6e894d5ea10400c72b859 Mon Sep 17 00:00:00 2001 From: Bruce Merry <1963944+bmerry@users.noreply.github.com> Date: Sun, 9 Jul 2023 11:32:45 +0200 Subject: [PATCH 74/74] Bike-shedding on documentation Co-authored-by: James Smith --- doc/recv-chunk-group.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/recv-chunk-group.rst b/doc/recv-chunk-group.rst index eb5baeda2..176f8e5ed 100644 --- a/doc/recv-chunk-group.rst +++ b/doc/recv-chunk-group.rst @@ -21,7 +21,7 @@ Each member stream also has its own sliding window, which can be smaller (but no larger) than the group's window. When the group's window slides forward, the streams' windows are adjusted to ensure they still fit within the group's window. In other words, a stream's window determines how much reordering is -tolerated within a stream, while the group's window determines how out of sync +tolerated within a stream, while the group's window determines how out-of-sync the streams are allowed to become. When desynchronisation does occur, there is a choice of strategies. The default @@ -60,7 +60,7 @@ ringbuffers can be shared between groups. Caveats ------- -This is an advanced API that sacrifices some user-friendlyness for +This is an advanced API that sacrifices some user-friendliness for performance, and thus some care is needed to use it safely. - It is vital that all the streams can make forward progress independently,