From e1f70e7a64be8adffbe0921faf2770561fea6f56 Mon Sep 17 00:00:00 2001 From: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> Date: Mon, 6 Jan 2025 10:44:40 -0500 Subject: [PATCH] fix(ffi): Disallow input MessagePack maps that contain non-string keys or array values that contain unsupported types. (#570) Co-authored-by: kirkrodrigues <2454684+kirkrodrigues@users.noreply.github.com> --- .../core/src/clp/ffi/ir_stream/Serializer.cpp | 226 ++++++++++++------ .../core/src/clp/ffi/ir_stream/Serializer.hpp | 8 + .../core/tests/test-ir_encoding_methods.cpp | 97 ++++++++ 3 files changed, 255 insertions(+), 76 deletions(-) diff --git a/components/core/src/clp/ffi/ir_stream/Serializer.cpp b/components/core/src/clp/ffi/ir_stream/Serializer.cpp index b29cf0492..295ab356c 100644 --- a/components/core/src/clp/ffi/ir_stream/Serializer.cpp +++ b/components/core/src/clp/ffi/ir_stream/Serializer.cpp @@ -16,6 +16,7 @@ #include "../../ir/types.hpp" #include "../../time_types.hpp" +#include "../../TransactionManager.hpp" #include "../../type_utils.hpp" #include "../encoding_methods.hpp" #include "../SchemaTree.hpp" @@ -145,6 +146,16 @@ template vector& output_buf ) -> bool; +/** + * Checks whether the given msgpack array can be serialized into the key-value pair IR format. + * @param array + * @return true if the array is serializable. + * @return false if: + * - Any value inside the array has an unsupported type (i.e., `BIN` or `EXT`). + * - Any value inside the array has type `MAP` and the map has non-string keys. + */ +[[nodiscard]] auto is_msgpack_array_serializable(msgpack::object const& array) -> bool; + auto get_schema_tree_node_type_from_msgpack_val(msgpack::object const& val ) -> optional { optional ret_val; @@ -225,11 +236,56 @@ auto serialize_value_array( string& logtype_buf, vector& output_buf ) -> bool { + if (false == is_msgpack_array_serializable(val)) { + return false; + } std::ostringstream oss; oss << val; logtype_buf.clear(); return serialize_clp_string(oss.str(), logtype_buf, output_buf); } + +auto is_msgpack_array_serializable(msgpack::object const& array) -> bool { + vector validation_stack{&array}; + while (false == validation_stack.empty()) { + auto const* curr{validation_stack.back()}; + validation_stack.pop_back(); + if (msgpack::type::MAP == curr->type) { + // Validate map + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) + auto const& as_map{curr->via.map}; + for (auto const& [key, value] : span{as_map.ptr, as_map.size}) { + if (msgpack::type::STR != key.type) { + return false; + } + if (msgpack::type::MAP == value.type || msgpack::type::ARRAY == value.type) { + validation_stack.push_back(&value); + } + } + continue; + } + + // Validate array + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) + auto const& as_array{curr->via.array}; + for (auto const& obj : span{as_array.ptr, as_array.size}) { + switch (obj.type) { + case msgpack::type::BIN: + case msgpack::type::EXT: + // Unsupported types + return false; + case msgpack::type::ARRAY: + case msgpack::type::MAP: + validation_stack.push_back(&obj); + break; + default: + break; + } + } + } + + return true; +} } // namespace template @@ -282,86 +338,11 @@ auto Serializer::serialize_msgpack_map(msgpack::object_map c return true; } - m_schema_tree.take_snapshot(); m_schema_tree_node_buf.clear(); m_key_group_buf.clear(); m_value_group_buf.clear(); - // Traverse the map using DFS iteratively - bool failure{false}; - vector dfs_stack; - dfs_stack.emplace_back( - SchemaTree::cRootId, - span{msgpack_map.ptr, msgpack_map.size} - ); - while (false == dfs_stack.empty()) { - auto& curr{dfs_stack.back()}; - if (false == curr.has_next_child()) { - // Visited all children, so pop node - dfs_stack.pop_back(); - continue; - } - - // Convert the current value's type to its corresponding schema-tree node type - auto const& [key, val]{curr.get_next_child()}; - auto const opt_schema_tree_node_type{get_schema_tree_node_type_from_msgpack_val(val)}; - if (false == opt_schema_tree_node_type.has_value()) { - failure = true; - break; - } - auto const schema_tree_node_type{opt_schema_tree_node_type.value()}; - - SchemaTree::NodeLocator const locator{ - curr.get_schema_tree_node_id(), - key.as(), - schema_tree_node_type - }; - - // Get the schema-tree node that corresponds with the current kv-pair, or add it if it - // doesn't exist. - auto opt_schema_tree_node_id{m_schema_tree.try_get_node_id(locator)}; - if (false == opt_schema_tree_node_id.has_value()) { - opt_schema_tree_node_id.emplace(m_schema_tree.insert_node(locator)); - if (false == serialize_schema_tree_node(locator)) { - failure = true; - break; - } - } - auto const schema_tree_node_id{opt_schema_tree_node_id.value()}; - - if (msgpack::type::MAP == val.type) { - // Serialize map - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) - auto const& inner_map{val.via.map}; - auto const inner_map_size(inner_map.size); - if (0 == inner_map_size) { - // Value is an empty map, so we can serialize it immediately - if (false == serialize_key(schema_tree_node_id)) { - failure = true; - break; - } - serialize_value_empty_object(m_value_group_buf); - } else { - // Add map for DFS iteration - dfs_stack.emplace_back( - schema_tree_node_id, - span{inner_map.ptr, inner_map_size} - ); - } - } else { - // Serialize primitive - if (false - == (serialize_key(schema_tree_node_id) && serialize_val(val, schema_tree_node_type) - )) - { - failure = true; - break; - } - } - } - - if (failure) { - m_schema_tree.revert(); + if (false == serialize_msgpack_map_using_dfs(msgpack_map)) { return false; } @@ -485,6 +466,92 @@ auto Serializer::serialize_val( return true; } +template +auto Serializer::serialize_msgpack_map_using_dfs( + msgpack::object_map const& msgpack_map +) -> bool { + m_schema_tree.take_snapshot(); + TransactionManager revert_manager{ + []() noexcept -> void {}, + [&]() noexcept -> void { m_schema_tree.revert(); } + }; + + vector dfs_stack; + dfs_stack.emplace_back( + SchemaTree::cRootId, + span{msgpack_map.ptr, msgpack_map.size} + ); + while (false == dfs_stack.empty()) { + auto& curr{dfs_stack.back()}; + if (false == curr.has_next_child()) { + // Visited all children, so pop node + dfs_stack.pop_back(); + continue; + } + + // Convert the current value's type to its corresponding schema-tree node type + auto const& [key, val]{curr.get_next_child()}; + if (msgpack::type::STR != key.type) { + // A map containing non-string keys is not serializable + return false; + } + + auto const opt_schema_tree_node_type{get_schema_tree_node_type_from_msgpack_val(val)}; + if (false == opt_schema_tree_node_type.has_value()) { + return false; + } + auto const schema_tree_node_type{opt_schema_tree_node_type.value()}; + + SchemaTree::NodeLocator const locator{ + curr.get_schema_tree_node_id(), + key.as(), + schema_tree_node_type + }; + + // Get the schema-tree node that corresponds with the current kv-pair, or add it if it + // doesn't exist. + auto opt_schema_tree_node_id{m_schema_tree.try_get_node_id(locator)}; + if (false == opt_schema_tree_node_id.has_value()) { + opt_schema_tree_node_id.emplace(m_schema_tree.insert_node(locator)); + if (false == serialize_schema_tree_node(locator)) { + return false; + } + } + auto const schema_tree_node_id{opt_schema_tree_node_id.value()}; + + if (msgpack::type::MAP == val.type) { + // Serialize map + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) + auto const& inner_map{val.via.map}; + auto const inner_map_size(inner_map.size); + if (0 != inner_map_size) { + // Add map for DFS iteration + dfs_stack.emplace_back( + schema_tree_node_id, + span{inner_map.ptr, inner_map_size} + ); + } else { + // Value is an empty map, so we can serialize it immediately + if (false == serialize_key(schema_tree_node_id)) { + return false; + } + serialize_value_empty_object(m_value_group_buf); + } + continue; + } + + // Serialize primitive + if (false + == (serialize_key(schema_tree_node_id) && serialize_val(val, schema_tree_node_type))) + { + return false; + } + } + + revert_manager.mark_success(); + return true; +} + // Explicitly declare template specializations so that we can define the template methods in this // file template auto Serializer::create( @@ -524,4 +591,11 @@ template auto Serializer::serialize_val( msgpack::object const& val, SchemaTree::Node::Type schema_tree_node_type ) -> bool; + +template auto Serializer::serialize_msgpack_map_using_dfs( + msgpack::object_map const& msgpack_map +) -> bool; +template auto Serializer::serialize_msgpack_map_using_dfs( + msgpack::object_map const& msgpack_map +) -> bool; } // namespace clp::ffi::ir_stream diff --git a/components/core/src/clp/ffi/ir_stream/Serializer.hpp b/components/core/src/clp/ffi/ir_stream/Serializer.hpp index 14077ffba..edc6bf0cb 100644 --- a/components/core/src/clp/ffi/ir_stream/Serializer.hpp +++ b/components/core/src/clp/ffi/ir_stream/Serializer.hpp @@ -116,6 +116,14 @@ class Serializer { [[nodiscard]] auto serialize_val(msgpack::object const& val, SchemaTree::Node::Type schema_tree_node_type) -> bool; + /** + * Serializes the given msgpack map using a depth-first search (DFS). + * @param msgpack_map + * @return Whether serialization succeeded. + */ + [[nodiscard]] auto serialize_msgpack_map_using_dfs(msgpack::object_map const& msgpack_map + ) -> bool; + UtcOffset m_curr_utc_offset{0}; Buffer m_ir_buf; SchemaTree m_schema_tree; diff --git a/components/core/tests/test-ir_encoding_methods.cpp b/components/core/tests/test-ir_encoding_methods.cpp index 347dadb7a..578ae49f7 100644 --- a/components/core/tests/test-ir_encoding_methods.cpp +++ b/components/core/tests/test-ir_encoding_methods.cpp @@ -1,8 +1,11 @@ #include #include +#include #include +#include #include #include +#include #include #include #include @@ -208,6 +211,21 @@ template */ [[nodiscard]] auto count_num_leaves(nlohmann::json const& root) -> size_t; +/** + * Unpacks the given bytes into a msgpack object and asserts that serializing it into the KV-pair IR + * format fails. + * @tparam encoded_variable_t + * @param buffer A buffer containing a msgpack byte sequence that cannot be serialized into the + * KV-pair IR format. + * @param serializer + * @return Whether serialization failed and the underlying IR buffer remains empty. + */ +template +[[nodiscard]] auto unpack_and_assert_serialization_failure( + std::stringstream& buffer, + Serializer& serializer +) -> bool; + template [[nodiscard]] auto serialize_log_events( vector const& log_events, @@ -357,6 +375,29 @@ auto count_num_leaves(nlohmann::json const& root) -> size_t { return num_leaves; } + +template +auto unpack_and_assert_serialization_failure( + std::stringstream& buffer, + Serializer& serializer +) -> bool { + REQUIRE(serializer.get_ir_buf_view().empty()); + string msgpack_bytes{buffer.str()}; + buffer.str({}); + buffer.clear(); + auto const msgpack_obj_handle{msgpack::unpack(msgpack_bytes.data(), msgpack_bytes.size())}; + auto const msgpack_obj{msgpack_obj_handle.get()}; + REQUIRE((msgpack::type::MAP == msgpack_obj.type)); + if (serializer.serialize_msgpack_map(msgpack_obj.via.map)) { + // Serialization should fail + return false; + } + if (false == serializer.get_ir_buf_view().empty()) { + // Serialization buffer should be empty + return false; + } + return true; +} } // namespace /** @@ -1332,3 +1373,59 @@ TEMPLATE_TEST_CASE( output_buf )); } + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +TEMPLATE_TEST_CASE( + "ffi_ir_stream_Serializer_serialize_invalid_msgpack", + "[clp][ffi][ir_stream][Serializer]", + four_byte_encoded_variable_t, + eight_byte_encoded_variable_t +) { + auto result{Serializer::create()}; + REQUIRE((false == result.has_error())); + + std::stringstream msgpack_serialization_buffer; + auto& serializer{result.value()}; + serializer.clear_ir_buf(); + + auto assert_invalid_serialization = [&](T invalid_value) -> bool { + std::map const invalid_map{{"valid_key", invalid_value}}; + msgpack::pack(msgpack_serialization_buffer, invalid_map); + return unpack_and_assert_serialization_failure(msgpack_serialization_buffer, serializer); + }; + + std::map const map_with_integer_keys{{0, 0}, {1, 1}, {2, 2}}; + REQUIRE(assert_invalid_serialization(map_with_integer_keys)); + + std::map const map_with_invalid_submap{ + {"valid_key", map_with_integer_keys} + }; + REQUIRE(assert_invalid_serialization(map_with_invalid_submap)); + + std::tuple> const array_with_invalid_type{0, {0x00, 0x00, 0x00}}; + REQUIRE(assert_invalid_serialization(array_with_invalid_type)); + + std::tuple const subarray_with_invalid_type{ + 0, + array_with_invalid_type + }; + REQUIRE(assert_invalid_serialization(subarray_with_invalid_type)); + + std::tuple const array_with_invalid_map{ + 0, + map_with_integer_keys + }; + REQUIRE(assert_invalid_serialization(array_with_invalid_map)); + + std::tuple const subarray_with_invalid_map{ + 0, + array_with_invalid_map + }; + REQUIRE(assert_invalid_serialization(subarray_with_invalid_map)); + + std::tuple const array_with_invalid_submap{ + 0, + map_with_invalid_submap + }; + REQUIRE(assert_invalid_serialization(array_with_invalid_submap)); +}