From 64e6ecf91533fc179a889a74fd3dff3045257dfb Mon Sep 17 00:00:00 2001 From: Mikhail Uvarov Date: Wed, 31 Jan 2024 22:13:05 +0100 Subject: [PATCH] Do ignore_cancel_check if table lists do not match --- src/cets_discovery.erl | 25 ++++++++++++++++++------- test/cets_SUITE.erl | 4 ++-- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/cets_discovery.erl b/src/cets_discovery.erl index 398f62ad..955ac7fb 100644 --- a/src/cets_discovery.erl +++ b/src/cets_discovery.erl @@ -319,8 +319,8 @@ handle_info({ping_result, Node, Result}, State) -> {noreply, handle_ping_result(Node, Result, State)}; handle_info(enter_regular_phase, State) -> {noreply, State#{phase := regular}}; -handle_info({cancel_check, Node}, State) -> - {noreply, handle_cancel_check(Node, State)}; +handle_info({cancel_check, Node, Tables}, State) -> + {noreply, handle_cancel_check(Node, Tables, State)}; handle_info(Msg, State) -> ?LOG_ERROR(#{what => unexpected_info, msg => Msg}), {noreply, State}. @@ -348,17 +348,28 @@ handle_check(State = #{backend_module := Mod, backend_state := BackendState}) -> State#{get_nodes_status := running}. -spec send_cancel_check_to_other_nodes(state()) -> ok. -send_cancel_check_to_other_nodes(State = #{nodes := Nodes}) -> +send_cancel_check_to_other_nodes(State = #{nodes := Nodes, tables := Tables}) -> AliveNodes = lists:sort(nodes()), AliveKnownNodes = ordsets:intersection(Nodes, AliveNodes), - broadcast_to_other_nodes(AliveKnownNodes, {cancel_check, node()}, State). + broadcast_to_other_nodes(AliveKnownNodes, {cancel_check, node(), Tables}, State). --spec handle_cancel_check(node(), state()) -> state(). -handle_cancel_check(_FromNode, State) -> +-spec handle_cancel_check(node(), [cets:table_name()], state()) -> state(). +handle_cancel_check(_FromNode, Tables, State = #{tables := Tables}) -> %% Remote node asked us to skip our check to reduce %% resource usage and reduce a chance of a race condition. %% Start a new timeout for the next check. - schedule_check(State). + schedule_check(State); +handle_cancel_check(FromNode, RemoteTables, State = #{tables := Tables}) -> + %% A corner case when some nodes have different table lists, + %% in this case all nodes need to call join for all tables. + ?LOG_INFO(#{ + what => ignore_cancel_check, + text => <<"Table lists do not match, ignore cancel_check">>, + remote_tables => RemoteTables, + remote_node => FromNode, + tables => Tables + }), + State. -spec broadcast_to_other_nodes([node()], term(), state()) -> ok. broadcast_to_other_nodes([_ | _] = Nodes, Msg, #{name := Name}) -> diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl index 8c3988ca..d4a6adfa 100644 --- a/test/cets_SUITE.erl +++ b/test/cets_SUITE.erl @@ -2834,7 +2834,7 @@ cancel_check_is_sent_after_check(Config) -> #{nodes := [Node1, Node2]} = cets_discovery:system_info(Disco), %% Force check, so we do not wait for long. Disco ! check, - receive_message({cancel_check, Node2}). + receive_message({cancel_check, Node2, []}). cancel_check_resets_check_timeout(Config) -> DiscoName = disco_name(Config), @@ -2851,7 +2851,7 @@ cancel_check_resets_check_timeout(Config) -> %% Now next check would be not soon. %% So, we could be sure nobody would change timer_ref, interfering with our code. #{timer_ref := OldTRef} = cets_discovery:system_info(Disco), - Disco ! {cancel_check, Node1}, + Disco ! {cancel_check, Node1, []}, wait_for_different_timer_ref(Disco, OldTRef). %% Helper functions