From 6794afa4fbf7ff4c772ecd13f1c0f1490cd0f0b4 Mon Sep 17 00:00:00 2001 From: Melissa Kilby Date: Wed, 2 Aug 2023 03:22:26 +0000 Subject: [PATCH] cleanup(outputs): ensure old defaults in queue_capacity_outputs in new config Co-authored-by: Leonardo Grasso Signed-off-by: Melissa Kilby --- falco.yaml | 39 +++++++++++++++++------------ userspace/falco/configuration.cpp | 4 +-- userspace/falco/configuration_aux.h | 2 +- userspace/falco/falco_outputs.cpp | 12 ++++++--- userspace/falco/stats_writer.cpp | 5 +++- 5 files changed, 38 insertions(+), 24 deletions(-) diff --git a/falco.yaml b/falco.yaml index a043306ceeb..8d6a3a8f1a8 100644 --- a/falco.yaml +++ b/falco.yaml @@ -293,24 +293,31 @@ outputs: # [Experimental] `queue_capacity_outputs` # -# Falco utilizes tbb::concurrent_bounded_queue for the outputs, and this parameter -# allows you to customize the capacity. Refer to the official documentation: +# Falco utilizes tbb::concurrent_bounded_queue for handling outputs, and this parameter +# allows you to customize the queue capacity. Please refer to the official documentation: # https://oneapi-src.github.io/oneTBB/main/tbb_userguide/Concurrent_Queue_Classes.html. -# On a healthy system with tuned Falco rules, the queue should not fill up. -# If it does, it most likely happens if the entire event flow is too slow. This -# could indicate that the server is under heavy load. -# -# Lowering the number of items can prevent steadily increasing memory until the OOM -# killer stops the Falco process. We expose recovery actions to self-limit or self -# OOM kill earlier similar to how we expose the kernel buffer size as parameter. -# However, it will not address the root cause of the event pipe not holding up. +# On a healthy system with optimized Falco rules, the queue should not fill up. +# If it does, it is most likely happening due to the entire event flow being too slow, +# indicating that the server is under heavy load. +# +# Lowering the number of items can prevent memory from steadily increasing until the OOM +# killer stops the Falco process. We provide recovery actions to self-limit or self-kill +# in order to handle this situation earlier, similar to how we expose the kernel buffer size +# as a parameter. +# However, it will not address the root cause of the event pipe not keeping up. +# +# `items`: the maximum number of items allowed in the queue, defaulting to 0. This means that +# the queue is unbounded. +# You can experiment with values greater or smaller than the anchor value 1000000. +# +# `recovery`: the strategy to follow when the queue becomes filled up. This also applies when +# the queue is unbounded, and all available memory on the system is consumed. +# recovery: 0 means continue. +# recovery: 1 means simply exit (default behavior). +# recovery: 2 means empty the queue and then continue. queue_capacity_outputs: - # number of max items in queue - items: 1000000 - # continue: 0 (default) - # exit: 1 - # empty queue then continue: 2 - recovery: 0 + items: 0 + recovery: 1 ########################## diff --git a/userspace/falco/configuration.cpp b/userspace/falco/configuration.cpp index 05273cdd8b5..64c42eb4717 100644 --- a/userspace/falco/configuration.cpp +++ b/userspace/falco/configuration.cpp @@ -41,7 +41,7 @@ falco_configuration::falco_configuration(): m_watch_config_files(true), m_buffered_outputs(false), m_queue_capacity_outputs_items(DEFAULT_ITEMS_QUEUE_CAPAXITY_OUTPUTS), - m_queue_capacity_outputs_recovery(RECOVERY_DROP_CURRENT), + m_queue_capacity_outputs_recovery(RECOVERY_EXIT), m_time_format_iso_8601(false), m_output_timeout(2000), m_grpc_enabled(false), @@ -255,7 +255,7 @@ void falco_configuration::load_yaml(const std::string& config_name, const yaml_h m_buffered_outputs = config.get_scalar("buffered_outputs", false); m_queue_capacity_outputs_items = config.get_scalar("queue_capacity_outputs.items", DEFAULT_ITEMS_QUEUE_CAPAXITY_OUTPUTS); - m_queue_capacity_outputs_recovery = config.get_scalar("queue_capacity_outputs.recovery", RECOVERY_DROP_CURRENT); + m_queue_capacity_outputs_recovery = config.get_scalar("queue_capacity_outputs.recovery", RECOVERY_EXIT); m_time_format_iso_8601 = config.get_scalar("time_format_iso_8601", false); falco_logger::log_stderr = config.get_scalar("log_stderr", false); diff --git a/userspace/falco/configuration_aux.h b/userspace/falco/configuration_aux.h index eb9344a6d31..31504e30041 100644 --- a/userspace/falco/configuration_aux.h +++ b/userspace/falco/configuration_aux.h @@ -13,7 +13,7 @@ limitations under the License. #pragma once -#define DEFAULT_ITEMS_QUEUE_CAPAXITY_OUTPUTS 1000000UL +#define DEFAULT_ITEMS_QUEUE_CAPAXITY_OUTPUTS 0 enum outputs_recovery_code { RECOVERY_DROP_CURRENT = 0, /* queue_capacity_outputs recovery strategy of continuing on. */ diff --git a/userspace/falco/falco_outputs.cpp b/userspace/falco/falco_outputs.cpp index de1610b35e9..889f1b65ed7 100644 --- a/userspace/falco/falco_outputs.cpp +++ b/userspace/falco/falco_outputs.cpp @@ -68,7 +68,11 @@ falco_outputs::falco_outputs( } m_worker_thread = std::thread(&falco_outputs::worker, this); - m_queue.set_capacity(queue_capacity_outputs_items); + if (queue_capacity_outputs_items > 0) + { + m_queue.set_capacity(queue_capacity_outputs_items); + } + m_recovery = queue_capacity_outputs_recovery; } @@ -275,13 +279,13 @@ inline void falco_outputs::push(const ctrl_msg& cmsg) switch (m_recovery) { case RECOVERY_EXIT: - fprintf(stderr, "Fatal error: Output queue reached maximum capacity. Exiting ... \n"); + fprintf(stderr, "Fatal error: Output queue out of memory. Exiting ... \n"); exit(EXIT_FAILURE); case RECOVERY_EMPTY: - fprintf(stderr, "Output queue reached maximum capacity. Empty queue and continue ... \n"); + fprintf(stderr, "Output queue out of memory. Empty queue and continue ... \n"); m_queue.empty(); default: - fprintf(stderr, "Output queue reached maximum capacity. Continue on ... \n"); + fprintf(stderr, "Output queue out of memory. Continue on ... \n"); break; } } diff --git a/userspace/falco/stats_writer.cpp b/userspace/falco/stats_writer.cpp index 4e8648dd99e..673b62c71de 100644 --- a/userspace/falco/stats_writer.cpp +++ b/userspace/falco/stats_writer.cpp @@ -86,7 +86,10 @@ stats_writer::stats_writer( m_config = config; // capacity and controls should not be relevant for stats outputs, adopt capacity // for completeness, but do not implement config recovery strategies. - m_queue.set_capacity(config->m_queue_capacity_outputs_items); + if (config->m_queue_capacity_outputs_items > 0) + { + m_queue.set_capacity(config->m_queue_capacity_outputs_items); + } if (config->m_metrics_enabled) { if (!config->m_metrics_output_file.empty())