From d060bc60ad047c2768e83ad23fdb02b6958207d7 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 11 Nov 2024 10:58:39 -0500 Subject: [PATCH 001/144] Temp fix for unit-test until future PR where Tag ptrs are stored in vector instead of set. --- tests/test-lexer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test-lexer.cpp b/tests/test-lexer.cpp index e369020f..09b8bb0c 100644 --- a/tests/test-lexer.cpp +++ b/tests/test-lexer.cpp @@ -152,7 +152,7 @@ TEST_CASE("Test the Schema class", "[Schema]") { "?\\d+" ")C" ")", - U"(Z<~letter2><~containerID><~letter1><~letter>)|(" + U"(Z<~containerID><~letter><~letter1><~letter2>)|(" "A(" "(((a)|(b))<~letter2>)|" "(((c)|(d))<~letter1>)" @@ -196,7 +196,7 @@ TEST_CASE("Test the Schema class", "[Schema]") { "){0,10}" ")", U"(" - U"(<~letterA><~letterB>)|((" + U"(<~letterB><~letterA>)|((" U"((a)<~letterB>)|" U"((b)<~letterA>)" U"){1,inf})" @@ -205,7 +205,7 @@ TEST_CASE("Test the Schema class", "[Schema]") { U"((c)<~letterD>)|" U"((d)<~letterC>)" U"){1,10})" - U"<~letterA><~letterB>)" + U"<~letterB><~letterA>)" // clang-format on ); } From f041a373392ac6f2f7233c104560e43f88eb97bd Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 11 Nov 2024 11:17:04 -0500 Subject: [PATCH 002/144] Swap from set to vector to tag pointers to ensure determinism. --- src/log_surgeon/finite_automata/RegexAST.hpp | 18 +++++++++++------- src/log_surgeon/finite_automata/RegexNFA.hpp | 4 ++-- .../finite_automata/RegexNFAState.hpp | 2 +- .../finite_automata/TaggedTransition.hpp | 4 ++-- tests/test-NFA.cpp | 2 +- tests/test-lexer.cpp | 10 +++++----- 6 files changed, 22 insertions(+), 18 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index cebae88c..0a4431cf 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -82,19 +82,23 @@ class RegexAST { */ [[nodiscard]] virtual auto serialize() const -> std::u32string = 0; - [[nodiscard]] auto get_subtree_positive_tags() const -> std::set const& { + [[nodiscard]] auto get_subtree_positive_tags() const -> std::vector const& { return m_subtree_positive_tags; } - auto set_subtree_positive_tags(std::set subtree_positive_tags) -> void { + auto set_subtree_positive_tags(std::vector subtree_positive_tags) -> void { m_subtree_positive_tags = std::move(subtree_positive_tags); } - auto add_subtree_positive_tags(std::set subtree_positive_tags) -> void { - m_subtree_positive_tags.merge(subtree_positive_tags); + auto add_subtree_positive_tags(std::vector subtree_positive_tags) -> void { + m_subtree_positive_tags.insert( + m_subtree_positive_tags.end(), + subtree_positive_tags.begin(), + subtree_positive_tags.end() + ); } - auto set_negative_tags(std::set negative_tags) -> void { + auto set_negative_tags(std::vector negative_tags) -> void { m_negative_tags = std::move(negative_tags); } @@ -141,8 +145,8 @@ class RegexAST { } private: - std::set m_subtree_positive_tags; - std::set m_negative_tags; + std::vector m_subtree_positive_tags; + std::vector m_negative_tags; }; /** diff --git a/src/log_surgeon/finite_automata/RegexNFA.hpp b/src/log_surgeon/finite_automata/RegexNFA.hpp index edbbf43a..7919a0c6 100644 --- a/src/log_surgeon/finite_automata/RegexNFA.hpp +++ b/src/log_surgeon/finite_automata/RegexNFA.hpp @@ -54,7 +54,7 @@ class RegexNFA { * @return NFAStateType* */ [[nodiscard]] auto new_state_with_negative_tagged_transition( - std::set tags, + std::vector tags, NFAStateType const* dest_state ) -> NFAStateType*; @@ -111,7 +111,7 @@ auto RegexNFA::new_state_with_positive_tagged_transition( template auto RegexNFA::new_state_with_negative_tagged_transition( - std::set tags, + std::vector tags, NFAStateType const* dest_state ) -> NFAStateType* { m_states.emplace_back(std::make_unique(std::move(tags), dest_state)); diff --git a/src/log_surgeon/finite_automata/RegexNFAState.hpp b/src/log_surgeon/finite_automata/RegexNFAState.hpp index f6d18d3c..dd21557b 100644 --- a/src/log_surgeon/finite_automata/RegexNFAState.hpp +++ b/src/log_surgeon/finite_automata/RegexNFAState.hpp @@ -34,7 +34,7 @@ class RegexNFAState { RegexNFAState(Tag const* tag, RegexNFAState const* dest_state) : m_positive_tagged_transitions{{tag, dest_state}} {} - RegexNFAState(std::set tags, RegexNFAState const* dest_state) + RegexNFAState(std::vector tags, RegexNFAState const* dest_state) : m_negative_tagged_transition{NegativeTaggedTransition{std::move(tags), dest_state}} {} auto set_accepting(bool accepting) -> void { m_accepting = accepting; } diff --git a/src/log_surgeon/finite_automata/TaggedTransition.hpp b/src/log_surgeon/finite_automata/TaggedTransition.hpp index 3efe7bc6..2c238275 100644 --- a/src/log_surgeon/finite_automata/TaggedTransition.hpp +++ b/src/log_surgeon/finite_automata/TaggedTransition.hpp @@ -44,7 +44,7 @@ class PositiveTaggedTransition { template class NegativeTaggedTransition { public: - NegativeTaggedTransition(std::set tags, NFAStateType const* dest_state) + NegativeTaggedTransition(std::vector tags, NFAStateType const* dest_state) : m_tags{std::move(tags)}, m_dest_state{dest_state} {} @@ -71,7 +71,7 @@ class NegativeTaggedTransition { } private: - std::set const m_tags; + std::vector const m_tags; NFAStateType const* m_dest_state; }; } // namespace log_surgeon::finite_automata diff --git a/tests/test-NFA.cpp b/tests/test-NFA.cpp index 0c379028..c7a599b2 100644 --- a/tests/test-NFA.cpp +++ b/tests/test-NFA.cpp @@ -59,7 +59,7 @@ TEST_CASE("Test NFA", "[NFA]") { += "2:byte_transitions={}," "epsilon_transitions={}," "positive_tagged_transitions={}," - "negative_tagged_transition={5[containerID,letter1,letter2,letter]}\n"; + "negative_tagged_transition={5[letter1,letter2,letter,containerID]}\n"; expected_serialized_nfa += "3:byte_transitions={}," "epsilon_transitions={}," "positive_tagged_transitions={6[letter1]}," diff --git a/tests/test-lexer.cpp b/tests/test-lexer.cpp index 09b8bb0c..8906be8d 100644 --- a/tests/test-lexer.cpp +++ b/tests/test-lexer.cpp @@ -152,7 +152,7 @@ TEST_CASE("Test the Schema class", "[Schema]") { "?\\d+" ")C" ")", - U"(Z<~containerID><~letter><~letter1><~letter2>)|(" + U"(Z<~letter1><~letter2><~letter><~containerID>)|(" "A(" "(((a)|(b))<~letter2>)|" "(((c)|(d))<~letter1>)" @@ -196,16 +196,16 @@ TEST_CASE("Test the Schema class", "[Schema]") { "){0,10}" ")", U"(" - U"(<~letterB><~letterA>)|((" + U"(<~letterA><~letterB>)|((" U"((a)<~letterB>)|" U"((b)<~letterA>)" U"){1,inf})" - U"<~letterD><~letterC>)|(" - U"(<~letterD><~letterC>)|((" + U"<~letterC><~letterD>)|(" + U"(<~letterC><~letterD>)|((" U"((c)<~letterD>)|" U"((d)<~letterC>)" U"){1,10})" - U"<~letterB><~letterA>)" + U"<~letterA><~letterB>)" // clang-format on ); } From f72e1205cce4204926edb1b72bff5f273f69b005 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 11 Nov 2024 11:20:56 -0500 Subject: [PATCH 003/144] Better test coverage for tag class. --- tests/test-tag.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tests/test-tag.cpp b/tests/test-tag.cpp index fdfff4c1..7c5394fc 100644 --- a/tests/test-tag.cpp +++ b/tests/test-tag.cpp @@ -4,7 +4,19 @@ using log_surgeon::finite_automata::Tag; -TEST_CASE("Test Tag class", "[Tag]") { - Tag const tag("uID"); - REQUIRE("uID" == tag.get_name()); +TEST_CASE("Tag operations", "[Tag]") { + SECTION("Basic name retrieval works correctly") { + Tag const tag("uID"); + REQUIRE("uID" == tag.get_name()); + } + + SECTION("Empty tag name is handled correctly") { + Tag const empty_tag(""); + REQUIRE(empty_tag.get_name().empty()); + } + + SECTION("Special characters in tag names are preserved") { + Tag const special_tag("user.id-123_@"); + REQUIRE("user.id-123_@" == special_tag.get_name()); + } } From d5ac1adac2bb80b11dbb5ecd7c0ec90a55e8ae3c Mon Sep 17 00:00:00 2001 From: Sharaf Mohamed Date: Tue, 12 Nov 2024 08:53:36 -0500 Subject: [PATCH 004/144] Use constant iterators for elements that should not change. Co-authored-by: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> --- src/log_surgeon/finite_automata/RegexAST.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index 0a4431cf..906c4edf 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -93,8 +93,8 @@ class RegexAST { auto add_subtree_positive_tags(std::vector subtree_positive_tags) -> void { m_subtree_positive_tags.insert( m_subtree_positive_tags.end(), - subtree_positive_tags.begin(), - subtree_positive_tags.end() + subtree_positive_tags.cbegin(), + subtree_positive_tags.cend() ); } From 30f03ede7b13846fe57eed23c4233753005b7b6d Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 12 Nov 2024 08:54:37 -0500 Subject: [PATCH 005/144] Use braced intiailization in test-tag.cpp. --- tests/test-tag.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test-tag.cpp b/tests/test-tag.cpp index 7c5394fc..05a812cd 100644 --- a/tests/test-tag.cpp +++ b/tests/test-tag.cpp @@ -6,17 +6,17 @@ using log_surgeon::finite_automata::Tag; TEST_CASE("Tag operations", "[Tag]") { SECTION("Basic name retrieval works correctly") { - Tag const tag("uID"); + Tag const tag{"uID"}; REQUIRE("uID" == tag.get_name()); } SECTION("Empty tag name is handled correctly") { - Tag const empty_tag(""); + Tag const empty_tag{""}; REQUIRE(empty_tag.get_name().empty()); } SECTION("Special characters in tag names are preserved") { - Tag const special_tag("user.id-123_@"); + Tag const special_tag{"user.id-123_@"}; REQUIRE("user.id-123_@" == special_tag.get_name()); } } From d386fc053c8fad1142be343bb298d105aa4d0806 Mon Sep 17 00:00:00 2001 From: Sharaf Mohamed Date: Tue, 12 Nov 2024 08:58:50 -0500 Subject: [PATCH 006/144] Use const& for insertion function that can't use move semantics. Co-authored-by: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> --- src/log_surgeon/finite_automata/RegexAST.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index 906c4edf..0ee544eb 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -90,7 +90,7 @@ class RegexAST { m_subtree_positive_tags = std::move(subtree_positive_tags); } - auto add_subtree_positive_tags(std::vector subtree_positive_tags) -> void { + auto add_subtree_positive_tags(std::vector const& subtree_positive_tags) -> void { m_subtree_positive_tags.insert( m_subtree_positive_tags.end(), subtree_positive_tags.cbegin(), From 4024c3eec05f60782db9ac9085604d75a9c6565a Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 13 Nov 2024 14:23:46 -0500 Subject: [PATCH 007/144] Have get_name() return string_view; Update headers. --- src/log_surgeon/finite_automata/RegexAST.hpp | 3 ++- src/log_surgeon/finite_automata/Tag.hpp | 3 ++- tests/test-lexer.cpp | 4 +--- tests/test-tag.cpp | 7 +++++-- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index 0ee544eb..dd390cc9 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -699,7 +700,7 @@ class RegexASTCapture : public RegexAST { [[nodiscard]] auto serialize() const -> std::u32string override; - [[nodiscard]] auto get_group_name() const -> std::string const& { return m_tag->get_name(); } + [[nodiscard]] auto get_group_name() const -> std::string_view { return m_tag->get_name(); } [[nodiscard]] auto get_group_regex_ast( ) const -> std::unique_ptr> const& { diff --git a/src/log_surgeon/finite_automata/Tag.hpp b/src/log_surgeon/finite_automata/Tag.hpp index 36e7c3d1..5a30071e 100644 --- a/src/log_surgeon/finite_automata/Tag.hpp +++ b/src/log_surgeon/finite_automata/Tag.hpp @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -19,7 +20,7 @@ class Tag { public: explicit Tag(std::string name) : m_name{std::move(name)} {} - [[nodiscard]] auto get_name() const -> std::string const& { return m_name; } + [[nodiscard]] auto get_name() const -> std::string_view { return m_name; } private: std::string const m_name; diff --git a/tests/test-lexer.cpp b/tests/test-lexer.cpp index 8906be8d..6c0ee042 100644 --- a/tests/test-lexer.cpp +++ b/tests/test-lexer.cpp @@ -1,7 +1,5 @@ #include -#include #include -#include #include #include #include @@ -118,7 +116,7 @@ TEST_CASE("Test the Schema class", "[Schema]") { auto* regex_ast_capture = dynamic_cast(regex_ast_cat_ptr->get_right()); REQUIRE(nullptr != regex_ast_capture); - REQUIRE("uID" == regex_ast_capture->get_group_name()); + REQUIRE("uID" == string{regex_ast_capture->get_group_name()}); auto* regex_ast_multiplication_ast = dynamic_cast( regex_ast_capture->get_group_regex_ast().get() diff --git a/tests/test-tag.cpp b/tests/test-tag.cpp index 05a812cd..fa7f6b9f 100644 --- a/tests/test-tag.cpp +++ b/tests/test-tag.cpp @@ -2,12 +2,15 @@ #include +#include + using log_surgeon::finite_automata::Tag; +using std::string; TEST_CASE("Tag operations", "[Tag]") { SECTION("Basic name retrieval works correctly") { Tag const tag{"uID"}; - REQUIRE("uID" == tag.get_name()); + REQUIRE("uID" == string{tag.get_name()}); } SECTION("Empty tag name is handled correctly") { @@ -17,6 +20,6 @@ TEST_CASE("Tag operations", "[Tag]") { SECTION("Special characters in tag names are preserved") { Tag const special_tag{"user.id-123_@"}; - REQUIRE("user.id-123_@" == special_tag.get_name()); + REQUIRE("user.id-123_@" == string{special_tag.get_name()}); } } From 22c3b8200a2fd8722e2fb68ea54b4cf43ef4351f Mon Sep 17 00:00:00 2001 From: Sharaf Mohamed Date: Wed, 13 Nov 2024 14:25:06 -0500 Subject: [PATCH 008/144] Remove const from member variable. Co-authored-by: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> --- src/log_surgeon/finite_automata/Tag.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/Tag.hpp b/src/log_surgeon/finite_automata/Tag.hpp index 5a30071e..f552ecc6 100644 --- a/src/log_surgeon/finite_automata/Tag.hpp +++ b/src/log_surgeon/finite_automata/Tag.hpp @@ -23,7 +23,7 @@ class Tag { [[nodiscard]] auto get_name() const -> std::string_view { return m_name; } private: - std::string const m_name; + std::string m_name; std::vector m_starts; std::vector m_ends; }; From ed5553431ddc7cfe8f0eeb0863c384f21865738e Mon Sep 17 00:00:00 2001 From: Sharaf Mohamed Date: Wed, 13 Nov 2024 14:25:18 -0500 Subject: [PATCH 009/144] Remove const from member variable. Co-authored-by: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> --- src/log_surgeon/finite_automata/TaggedTransition.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/TaggedTransition.hpp b/src/log_surgeon/finite_automata/TaggedTransition.hpp index 2c238275..effd88c6 100644 --- a/src/log_surgeon/finite_automata/TaggedTransition.hpp +++ b/src/log_surgeon/finite_automata/TaggedTransition.hpp @@ -71,7 +71,7 @@ class NegativeTaggedTransition { } private: - std::vector const m_tags; + std::vector m_tags; NFAStateType const* m_dest_state; }; } // namespace log_surgeon::finite_automata From 534afce1f12491dbc22c5a0fe949028cf9170178 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 13 Nov 2024 14:26:58 -0500 Subject: [PATCH 010/144] Run linter. --- tests/test-tag.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test-tag.cpp b/tests/test-tag.cpp index fa7f6b9f..28d4fedd 100644 --- a/tests/test-tag.cpp +++ b/tests/test-tag.cpp @@ -1,9 +1,9 @@ +#include + #include #include -#include - using log_surgeon::finite_automata::Tag; using std::string; From 61fdb5dabdca7d185006a49feae2ffcde5a66914 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 13 Nov 2024 14:42:30 -0500 Subject: [PATCH 011/144] Add move semantic test cases. --- tests/test-tag.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test-tag.cpp b/tests/test-tag.cpp index 28d4fedd..a8b35e99 100644 --- a/tests/test-tag.cpp +++ b/tests/test-tag.cpp @@ -22,4 +22,14 @@ TEST_CASE("Tag operations", "[Tag]") { Tag const special_tag{"user.id-123_@"}; REQUIRE("user.id-123_@" == string{special_tag.get_name()}); } + + SECTION("Move semantics work correctly") { + Tag original_tag{"source"}; + Tag moved_tag{std::move(original_tag)}; + REQUIRE("source" == string{moved_tag.get_name()}); + + Tag assign_tag{"target"}; + assign_tag = Tag{"new_source"}; + REQUIRE("new_source" == string{assign_tag.get_name()}); + } } From 78e5fe8ef050d534374aff688e75af7ef2dd5375 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 13 Nov 2024 14:54:58 -0500 Subject: [PATCH 012/144] Add PositiveTaggedTransition docstring and make m_tag throw if ever null. --- src/log_surgeon/finite_automata/TaggedTransition.hpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/finite_automata/TaggedTransition.hpp b/src/log_surgeon/finite_automata/TaggedTransition.hpp index effd88c6..571c9425 100644 --- a/src/log_surgeon/finite_automata/TaggedTransition.hpp +++ b/src/log_surgeon/finite_automata/TaggedTransition.hpp @@ -13,11 +13,18 @@ #include namespace log_surgeon::finite_automata { + +/** + * Represents an NFA transition indicating a capture group has been matched. + * `m_tag` is always expected to be non-null. + * @throw std::invalid_argument Thrown when a null tag is passed into the constructor. + * @tparam NFAStateType Specifies the type of transition (bytes or UTF-8 characters). + */ template class PositiveTaggedTransition { public: PositiveTaggedTransition(Tag const* tag, NFAStateType const* dest_state) - : m_tag{tag}, + : m_tag{nullptr == tag ? throw std::invalid_argument("tag cannot be null") : tag}, m_dest_state{dest_state} {} [[nodiscard]] auto get_dest_state() const -> NFAStateType const* { return m_dest_state; } @@ -30,7 +37,7 @@ class PositiveTaggedTransition { [[nodiscard]] auto serialize(std::unordered_map const& state_ids ) const -> std::optional { auto const state_id_it = state_ids.find(m_dest_state); - if (state_id_it == state_ids.end() || nullptr == m_tag) { + if (state_id_it == state_ids.end()) { return std::nullopt; } return fmt::format("{}[{}]", state_id_it->second, m_tag->get_name()); From 630d882b025f5e5234e232eeaafeb1f780956103 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 13 Nov 2024 15:14:24 -0500 Subject: [PATCH 013/144] Delete unused operators. --- src/log_surgeon/finite_automata/RegexAST.hpp | 22 +++----------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index dd390cc9..6be339b9 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -123,9 +123,9 @@ class RegexAST { protected: RegexAST(RegexAST const& rhs) = default; - auto operator=(RegexAST const& rhs) -> RegexAST& = default; - RegexAST(RegexAST&& rhs) noexcept = default; - auto operator=(RegexAST&& rhs) noexcept -> RegexAST& = default; + auto operator=(RegexAST const& rhs) -> RegexAST& = delete; + RegexAST(RegexAST&& rhs) noexcept = delete; + auto operator=(RegexAST&& rhs) noexcept -> RegexAST& = delete; [[nodiscard]] auto serialize_negative_tags() const -> std::u32string { if (m_negative_tags.empty()) { @@ -439,10 +439,6 @@ class RegexASTOr : public RegexAST { m_left(std::unique_ptr>(rhs.m_left->clone())), m_right(std::unique_ptr>(rhs.m_right->clone())) {} - auto operator=(RegexASTOr const& rhs) -> RegexASTOr& = default; - RegexASTOr(RegexASTOr&& rhs) noexcept = default; - auto operator=(RegexASTOr&& rhs) noexcept -> RegexASTOr& = default; - /** * Used for cloning a unique_pointer of type RegexASTOr * @return RegexASTOr* @@ -506,10 +502,6 @@ class RegexASTCat : public RegexAST { m_left(std::unique_ptr>(rhs.m_left->clone())), m_right(std::unique_ptr>(rhs.m_right->clone())) {} - auto operator=(RegexASTCat const& rhs) -> RegexASTCat& = default; - RegexASTCat(RegexASTCat&& rhs) noexcept = default; - auto operator=(RegexASTCat&& rhs) noexcept -> RegexASTCat& = default; - /** * Used for cloning a unique_pointer of type RegexASTCat * @return RegexASTCat* @@ -575,10 +567,6 @@ class RegexASTMultiplication : public RegexAST { m_min(rhs.m_min), m_max(rhs.m_max) {} - auto operator=(RegexASTMultiplication const& rhs) -> RegexASTMultiplication& = default; - RegexASTMultiplication(RegexASTMultiplication&& rhs) noexcept = default; - auto operator=(RegexASTMultiplication&& rhs) noexcept -> RegexASTMultiplication& = default; - /** * Used for cloning a unique_pointer of type RegexASTMultiplication * @return RegexASTMultiplication* @@ -659,10 +647,6 @@ class RegexASTCapture : public RegexAST { RegexAST::set_subtree_positive_tags(rhs.get_subtree_positive_tags()); } - auto operator=(RegexASTCapture const& rhs) -> RegexASTCapture& = default; - RegexASTCapture(RegexASTCapture&& rhs) noexcept = default; - auto operator=(RegexASTCapture&& rhs) noexcept -> RegexASTCapture& = default; - /** * Used for cloning a `unique_pointer` of type `RegexASTCapture`. * @return RegexASTCapture* From 543f8af0cfcbaef4905fe4d0b7517a486a864405 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 13 Nov 2024 15:24:47 -0500 Subject: [PATCH 014/144] Move null check into intiailizer list for NegativeTaggedTransition constructor; Add docstring to NegativeTaggedTransition. --- .../finite_automata/TaggedTransition.hpp | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/log_surgeon/finite_automata/TaggedTransition.hpp b/src/log_surgeon/finite_automata/TaggedTransition.hpp index 571c9425..30d48942 100644 --- a/src/log_surgeon/finite_automata/TaggedTransition.hpp +++ b/src/log_surgeon/finite_automata/TaggedTransition.hpp @@ -15,9 +15,9 @@ namespace log_surgeon::finite_automata { /** - * Represents an NFA transition indicating a capture group has been matched. + * Represents an NFA transition indicating that a capture group has been matched. * `m_tag` is always expected to be non-null. - * @throw std::invalid_argument Thrown when a null tag is passed into the constructor. + * @throw std::invalid_argument Thrown if a null tag is passed into the constructor. * @tparam NFAStateType Specifies the type of transition (bytes or UTF-8 characters). */ template @@ -48,11 +48,22 @@ class PositiveTaggedTransition { NFAStateType const* m_dest_state; }; +/** + * Represents an NFA transition indicating that a capture group has been unmatched. + * All tags in `m_tags` are always expected to be non-null. + * @throw std::invalid_argument Thrown if any tag passed into the constructor is null. + * @tparam NFAStateType Specifies the type of transition (bytes or UTF-8 characters). + */ template class NegativeTaggedTransition { public: NegativeTaggedTransition(std::vector tags, NFAStateType const* dest_state) - : m_tags{std::move(tags)}, + : m_tags{[&tags] { + if (std::ranges::any_of(tags, [](Tag const* tag) { return nullptr == tag; })) { + throw std::invalid_argument("tags cannot contain null elements"); + } + return std::move(tags); + }()}, m_dest_state{dest_state} {} [[nodiscard]] auto get_dest_state() const -> NFAStateType const* { return m_dest_state; } @@ -69,9 +80,6 @@ class NegativeTaggedTransition { return std::nullopt; } - if (std::ranges::any_of(m_tags, [](Tag const* tag) { return tag == nullptr; })) { - return std::nullopt; - } auto const tag_names = m_tags | std::ranges::views::transform(&Tag::get_name); return fmt::format("{}[{}]", state_id_it->second, fmt::join(tag_names, ",")); From ec342fc34036ef783739010ca562e5b10a9c6b43 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 13 Nov 2024 16:12:15 -0500 Subject: [PATCH 015/144] Remove position vectors from Tag, as they arent used in the AST. --- src/log_surgeon/finite_automata/RegexAST.hpp | 2 +- src/log_surgeon/finite_automata/Tag.hpp | 12 ------------ 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index 6be339b9..3fd2b104 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -643,7 +643,7 @@ class RegexASTCapture : public RegexAST { m_group_regex_ast{ std::unique_ptr>(rhs.m_group_regex_ast->clone()) }, - m_tag{rhs.m_tag ? std::make_unique(*rhs.m_tag) : nullptr} { + m_tag{std::make_unique(*rhs.m_tag)} { RegexAST::set_subtree_positive_tags(rhs.get_subtree_positive_tags()); } diff --git a/src/log_surgeon/finite_automata/Tag.hpp b/src/log_surgeon/finite_automata/Tag.hpp index f552ecc6..3a3b4d7f 100644 --- a/src/log_surgeon/finite_automata/Tag.hpp +++ b/src/log_surgeon/finite_automata/Tag.hpp @@ -1,21 +1,11 @@ #ifndef LOG_SURGEON_FINITE_AUTOMATA_TAG #define LOG_SURGEON_FINITE_AUTOMATA_TAG -#include #include #include #include -#include namespace log_surgeon::finite_automata { -/** - * This class represents a tag that is associated with matches of a capture group. If `m_starts` is - * empty, it indicates that the capture group was unmatched. - * - * Since capture group regex can be contained within repetition regex, - * (e.g., "((user_id=(?\d+),)+"), `m_starts` and `m_ends` are vectors that track the locations - * of each occurrence of the capture group. - */ class Tag { public: explicit Tag(std::string name) : m_name{std::move(name)} {} @@ -24,8 +14,6 @@ class Tag { private: std::string m_name; - std::vector m_starts; - std::vector m_ends; }; } // namespace log_surgeon::finite_automata From af86281948843e8f0ae8389e87d07e3fe5b77bda Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 13 Nov 2024 17:14:19 -0500 Subject: [PATCH 016/144] RegexASTCapture enforces non-null arguments; Add docstring to RegexASTCapture; Use cbegin() and cend(). --- src/log_surgeon/finite_automata/RegexAST.hpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index 3fd2b104..d78a8e76 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -621,6 +621,14 @@ class RegexASTMultiplication : public RegexAST { uint32_t m_max; }; +/** + * Represents a capture group AST node. + * `m_tag` is always expected to be non-null. + * `m_group_regex_ast` is always expected to be non-null. + * @throw std::invalid_argument Thrown if a null tag or group regex AST is passed into the + * constructor. + * @tparam NFAStateType Specifies the type of transition (bytes or UTF-8 characters). + */ template class RegexASTCapture : public RegexAST { public: @@ -630,8 +638,9 @@ class RegexASTCapture : public RegexAST { std::unique_ptr> group_regex_ast, std::unique_ptr tag ) - : m_group_regex_ast{std::move(group_regex_ast)}, - m_tag{std::move(tag)} { + : m_group_regex_ast{nullptr == group_regex_ast ? throw std::invalid_argument("group regex AST cannot be null") : std::move(group_regex_ast)}, + m_tag{nullptr == tag ? throw std::invalid_argument("tag cannot be null") + : std::move(tag)} { RegexAST::set_subtree_positive_tags( m_group_regex_ast->get_subtree_positive_tags() ); @@ -884,10 +893,10 @@ void RegexASTCapture::add_to_nfa(RegexNFA* nfa, NFAS template [[nodiscard]] auto RegexASTCapture::serialize() const -> std::u32string { - auto const tag_name_u32 = std::u32string(m_tag->get_name().begin(), m_tag->get_name().end()); + auto const tag_name_u32 = std::u32string(m_tag->get_name().cbegin(), m_tag->get_name().cend()); return fmt::format( U"({})<{}>{}", - nullptr != m_group_regex_ast ? m_group_regex_ast->serialize() : U"null", + m_group_regex_ast->serialize(), tag_name_u32, RegexAST::serialize_negative_tags() ); From 738becd7481dea631c246a4b67ecc237db976c44 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 13 Nov 2024 17:17:48 -0500 Subject: [PATCH 017/144] Capitalize exceptions. --- src/log_surgeon/finite_automata/RegexAST.hpp | 4 ++-- src/log_surgeon/finite_automata/TaggedTransition.hpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index d78a8e76..d37067ec 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -638,8 +638,8 @@ class RegexASTCapture : public RegexAST { std::unique_ptr> group_regex_ast, std::unique_ptr tag ) - : m_group_regex_ast{nullptr == group_regex_ast ? throw std::invalid_argument("group regex AST cannot be null") : std::move(group_regex_ast)}, - m_tag{nullptr == tag ? throw std::invalid_argument("tag cannot be null") + : m_group_regex_ast{nullptr == group_regex_ast ? throw std::invalid_argument("Group regex AST cannot be null") : std::move(group_regex_ast)}, + m_tag{nullptr == tag ? throw std::invalid_argument("Tag cannot be null") : std::move(tag)} { RegexAST::set_subtree_positive_tags( m_group_regex_ast->get_subtree_positive_tags() diff --git a/src/log_surgeon/finite_automata/TaggedTransition.hpp b/src/log_surgeon/finite_automata/TaggedTransition.hpp index 30d48942..309fc97d 100644 --- a/src/log_surgeon/finite_automata/TaggedTransition.hpp +++ b/src/log_surgeon/finite_automata/TaggedTransition.hpp @@ -24,7 +24,7 @@ template class PositiveTaggedTransition { public: PositiveTaggedTransition(Tag const* tag, NFAStateType const* dest_state) - : m_tag{nullptr == tag ? throw std::invalid_argument("tag cannot be null") : tag}, + : m_tag{nullptr == tag ? throw std::invalid_argument("Tag cannot be null") : tag}, m_dest_state{dest_state} {} [[nodiscard]] auto get_dest_state() const -> NFAStateType const* { return m_dest_state; } @@ -60,7 +60,7 @@ class NegativeTaggedTransition { NegativeTaggedTransition(std::vector tags, NFAStateType const* dest_state) : m_tags{[&tags] { if (std::ranges::any_of(tags, [](Tag const* tag) { return nullptr == tag; })) { - throw std::invalid_argument("tags cannot contain null elements"); + throw std::invalid_argument("Tags cannot contain null elements"); } return std::move(tags); }()}, From 789263ea958b724a85ad9fd0f0dc7dbfd0300241 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 13 Nov 2024 17:32:47 -0500 Subject: [PATCH 018/144] Use () to fix linting issue. --- src/log_surgeon/finite_automata/RegexAST.hpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index d37067ec..57a8ae15 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -638,7 +638,11 @@ class RegexASTCapture : public RegexAST { std::unique_ptr> group_regex_ast, std::unique_ptr tag ) - : m_group_regex_ast{nullptr == group_regex_ast ? throw std::invalid_argument("Group regex AST cannot be null") : std::move(group_regex_ast)}, + : m_group_regex_ast{( + nullptr == group_regex_ast + ? throw std::invalid_argument("Group regex AST cannot be null") + : std::move(group_regex_ast) + )}, m_tag{nullptr == tag ? throw std::invalid_argument("Tag cannot be null") : std::move(tag)} { RegexAST::set_subtree_positive_tags( From 1f15ca712709244a7d61f56e7a31c192e3471934 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Nov 2024 10:08:39 -0500 Subject: [PATCH 019/144] Keep default copy assignment. --- src/log_surgeon/finite_automata/RegexAST.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index 57a8ae15..ed23ece5 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -123,7 +123,7 @@ class RegexAST { protected: RegexAST(RegexAST const& rhs) = default; - auto operator=(RegexAST const& rhs) -> RegexAST& = delete; + auto operator=(RegexAST const& rhs) -> RegexAST& = default; RegexAST(RegexAST&& rhs) noexcept = delete; auto operator=(RegexAST&& rhs) noexcept -> RegexAST& = delete; From 7688c24442930a5e6aad9af8cd2fe0c532138c79 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Nov 2024 10:15:31 -0500 Subject: [PATCH 020/144] Move @throw to constructor docstrings. --- src/log_surgeon/finite_automata/RegexAST.hpp | 7 +++++-- src/log_surgeon/finite_automata/TaggedTransition.hpp | 12 ++++++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index ed23ece5..acb9ce39 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -625,8 +625,6 @@ class RegexASTMultiplication : public RegexAST { * Represents a capture group AST node. * `m_tag` is always expected to be non-null. * `m_group_regex_ast` is always expected to be non-null. - * @throw std::invalid_argument Thrown if a null tag or group regex AST is passed into the - * constructor. * @tparam NFAStateType Specifies the type of transition (bytes or UTF-8 characters). */ template @@ -634,6 +632,11 @@ class RegexASTCapture : public RegexAST { public: ~RegexASTCapture() override = default; + /** + * @param group_regex_ast + * @param tag + * @throw std::invalid_argument if `group_regex_ast` or `tag` are `nullptr`. + */ RegexASTCapture( std::unique_ptr> group_regex_ast, std::unique_ptr tag diff --git a/src/log_surgeon/finite_automata/TaggedTransition.hpp b/src/log_surgeon/finite_automata/TaggedTransition.hpp index 309fc97d..f1460aa7 100644 --- a/src/log_surgeon/finite_automata/TaggedTransition.hpp +++ b/src/log_surgeon/finite_automata/TaggedTransition.hpp @@ -17,12 +17,16 @@ namespace log_surgeon::finite_automata { /** * Represents an NFA transition indicating that a capture group has been matched. * `m_tag` is always expected to be non-null. - * @throw std::invalid_argument Thrown if a null tag is passed into the constructor. * @tparam NFAStateType Specifies the type of transition (bytes or UTF-8 characters). */ template class PositiveTaggedTransition { public: + /** + * @param tag + * @param dest_state + * @throw std::invalid_argument if `tag` is `nullptr`. + */ PositiveTaggedTransition(Tag const* tag, NFAStateType const* dest_state) : m_tag{nullptr == tag ? throw std::invalid_argument("Tag cannot be null") : tag}, m_dest_state{dest_state} {} @@ -51,12 +55,16 @@ class PositiveTaggedTransition { /** * Represents an NFA transition indicating that a capture group has been unmatched. * All tags in `m_tags` are always expected to be non-null. - * @throw std::invalid_argument Thrown if any tag passed into the constructor is null. * @tparam NFAStateType Specifies the type of transition (bytes or UTF-8 characters). */ template class NegativeTaggedTransition { public: + /** + * @param tags + * @param dest_state + * @throw std::invalid_argument if any elements in `tags` is `nullptr`. + */ NegativeTaggedTransition(std::vector tags, NFAStateType const* dest_state) : m_tags{[&tags] { if (std::ranges::any_of(tags, [](Tag const* tag) { return nullptr == tag; })) { From 486190a0fedd66f70a3715c610670f22ab401c6f Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Nov 2024 11:12:34 -0500 Subject: [PATCH 021/144] Do string_viee comparisomn in lexer test. --- tests/test-lexer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-lexer.cpp b/tests/test-lexer.cpp index 6c0ee042..dd305a76 100644 --- a/tests/test-lexer.cpp +++ b/tests/test-lexer.cpp @@ -116,7 +116,7 @@ TEST_CASE("Test the Schema class", "[Schema]") { auto* regex_ast_capture = dynamic_cast(regex_ast_cat_ptr->get_right()); REQUIRE(nullptr != regex_ast_capture); - REQUIRE("uID" == string{regex_ast_capture->get_group_name()}); + REQUIRE("uID" == regex_ast_capture->get_group_name()); auto* regex_ast_multiplication_ast = dynamic_cast( regex_ast_capture->get_group_regex_ast().get() From ac75909319c2c2cfba0f32e9afc01889c40407dd Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Nov 2024 11:17:16 -0500 Subject: [PATCH 022/144] Use string_view compares in tag tests. --- tests/test-tag.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/test-tag.cpp b/tests/test-tag.cpp index a8b35e99..90264939 100644 --- a/tests/test-tag.cpp +++ b/tests/test-tag.cpp @@ -1,16 +1,13 @@ -#include - #include #include using log_surgeon::finite_automata::Tag; -using std::string; TEST_CASE("Tag operations", "[Tag]") { SECTION("Basic name retrieval works correctly") { Tag const tag{"uID"}; - REQUIRE("uID" == string{tag.get_name()}); + REQUIRE("uID" == tag.get_name()); } SECTION("Empty tag name is handled correctly") { @@ -20,16 +17,16 @@ TEST_CASE("Tag operations", "[Tag]") { SECTION("Special characters in tag names are preserved") { Tag const special_tag{"user.id-123_@"}; - REQUIRE("user.id-123_@" == string{special_tag.get_name()}); + REQUIRE("user.id-123_@" == special_tag.get_name()); } SECTION("Move semantics work correctly") { Tag original_tag{"source"}; Tag moved_tag{std::move(original_tag)}; - REQUIRE("source" == string{moved_tag.get_name()}); + REQUIRE("source" == moved_tag.get_name()); Tag assign_tag{"target"}; assign_tag = Tag{"new_source"}; - REQUIRE("new_source" == string{assign_tag.get_name()}); + REQUIRE("new_source" == assign_tag.get_name()); } } From 090f18cae3ead36721fd32a85cdb3e21fec863f0 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 15 Nov 2024 11:15:24 -0500 Subject: [PATCH 023/144] Update headers in TaggedTransition.hpp. --- src/log_surgeon/finite_automata/TaggedTransition.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/log_surgeon/finite_automata/TaggedTransition.hpp b/src/log_surgeon/finite_automata/TaggedTransition.hpp index f1460aa7..c4cfb76a 100644 --- a/src/log_surgeon/finite_automata/TaggedTransition.hpp +++ b/src/log_surgeon/finite_automata/TaggedTransition.hpp @@ -3,13 +3,12 @@ #include #include -#include +#include #include #include #include -#include #include namespace log_surgeon::finite_automata { From c7cfc10b914e54e23085282160bb7de66690ec63 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 15 Nov 2024 11:16:06 -0500 Subject: [PATCH 024/144] Seperate copy and move constructor unit-tests. --- tests/test-tag.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/test-tag.cpp b/tests/test-tag.cpp index 90264939..41f8a2ef 100644 --- a/tests/test-tag.cpp +++ b/tests/test-tag.cpp @@ -20,13 +20,15 @@ TEST_CASE("Tag operations", "[Tag]") { REQUIRE("user.id-123_@" == special_tag.get_name()); } - SECTION("Move semantics work correctly") { - Tag original_tag{"source"}; - Tag moved_tag{std::move(original_tag)}; - REQUIRE("source" == moved_tag.get_name()); - + SECTION("Copy constructor works correctly") { Tag assign_tag{"target"}; assign_tag = Tag{"new_source"}; REQUIRE("new_source" == assign_tag.get_name()); } + + SECTION("Move constructor works correctly") { + Tag original_tag{"source"}; + Tag moved_tag{std::move(original_tag)}; + REQUIRE("source" == moved_tag.get_name()); + } } From 91b8b515eacd07ec77f37469c5303daa4175fa38 Mon Sep 17 00:00:00 2001 From: Sharaf Mohamed Date: Fri, 15 Nov 2024 11:17:37 -0500 Subject: [PATCH 025/144] Use NOTE for class requirements. Co-authored-by: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> --- src/log_surgeon/finite_automata/RegexAST.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index acb9ce39..c0c6b04f 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -623,8 +623,9 @@ class RegexASTMultiplication : public RegexAST { /** * Represents a capture group AST node. - * `m_tag` is always expected to be non-null. - * `m_group_regex_ast` is always expected to be non-null. + * NOTE: + * - `m_tag` is always expected to be non-null. + * - `m_group_regex_ast` is always expected to be non-null. * @tparam NFAStateType Specifies the type of transition (bytes or UTF-8 characters). */ template From fcb1a76fb34e2f2c08d065dbe29dc7620f22d791 Mon Sep 17 00:00:00 2001 From: Sharaf Mohamed Date: Fri, 15 Nov 2024 11:17:44 -0500 Subject: [PATCH 026/144] Use NOTE for class requirements. Co-authored-by: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> --- src/log_surgeon/finite_automata/TaggedTransition.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/TaggedTransition.hpp b/src/log_surgeon/finite_automata/TaggedTransition.hpp index c4cfb76a..2d530d7b 100644 --- a/src/log_surgeon/finite_automata/TaggedTransition.hpp +++ b/src/log_surgeon/finite_automata/TaggedTransition.hpp @@ -15,7 +15,7 @@ namespace log_surgeon::finite_automata { /** * Represents an NFA transition indicating that a capture group has been matched. - * `m_tag` is always expected to be non-null. + * NOTE: `m_tag` is always expected to be non-null. * @tparam NFAStateType Specifies the type of transition (bytes or UTF-8 characters). */ template From 9b09e1991c70206793f809661e319f7ad8edfeae Mon Sep 17 00:00:00 2001 From: Sharaf Mohamed Date: Fri, 15 Nov 2024 11:17:50 -0500 Subject: [PATCH 027/144] Use NOTE for class requirements. Co-authored-by: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> --- src/log_surgeon/finite_automata/TaggedTransition.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/TaggedTransition.hpp b/src/log_surgeon/finite_automata/TaggedTransition.hpp index 2d530d7b..86fe7a39 100644 --- a/src/log_surgeon/finite_automata/TaggedTransition.hpp +++ b/src/log_surgeon/finite_automata/TaggedTransition.hpp @@ -53,7 +53,7 @@ class PositiveTaggedTransition { /** * Represents an NFA transition indicating that a capture group has been unmatched. - * All tags in `m_tags` are always expected to be non-null. + * NOTE: All tags in `m_tags` are always expected to be non-null. * @tparam NFAStateType Specifies the type of transition (bytes or UTF-8 characters). */ template From 583bea2f9ada9d9dbbc0931cefefec9fda0b5c3d Mon Sep 17 00:00:00 2001 From: Sharaf Mohamed Date: Fri, 15 Nov 2024 15:37:50 -0500 Subject: [PATCH 028/144] Replace the integer capture group tag ID with a dedicated 'Tag' class. (#48) Co-authored-by: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> --- CMakeLists.txt | 1 + src/log_surgeon/SchemaParser.cpp | 10 +- src/log_surgeon/SchemaParser.hpp | 20 ---- src/log_surgeon/finite_automata/RegexAST.hpp | 94 ++++++++++--------- src/log_surgeon/finite_automata/RegexNFA.hpp | 22 +++-- .../finite_automata/RegexNFAState.hpp | 4 +- src/log_surgeon/finite_automata/Tag.hpp | 20 ++++ .../finite_automata/TaggedTransition.hpp | 53 ++++++++--- tests/CMakeLists.txt | 3 +- tests/test-NFA.cpp | 21 +++-- tests/test-lexer.cpp | 38 ++++---- tests/test-tag.cpp | 34 +++++++ 12 files changed, 193 insertions(+), 127 deletions(-) create mode 100644 src/log_surgeon/finite_automata/Tag.hpp create mode 100644 tests/test-tag.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 50e45392..e76ecb8c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,6 +99,7 @@ set(SOURCE_FILES src/log_surgeon/finite_automata/RegexNFA.hpp src/log_surgeon/finite_automata/RegexNFAState.hpp src/log_surgeon/finite_automata/RegexNFAStateType.hpp + src/log_surgeon/finite_automata/Tag.hpp src/log_surgeon/finite_automata/TaggedTransition.hpp src/log_surgeon/finite_automata/UnicodeIntervalTree.hpp src/log_surgeon/finite_automata/UnicodeIntervalTree.tpp diff --git a/src/log_surgeon/SchemaParser.cpp b/src/log_surgeon/SchemaParser.cpp index d74167fa..56760262 100644 --- a/src/log_surgeon/SchemaParser.cpp +++ b/src/log_surgeon/SchemaParser.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -161,13 +162,12 @@ auto SchemaParser::existing_schema_rule(NonTerminal* m) -> unique_ptr return schema_ast; } -auto SchemaParser::regex_capture_rule(NonTerminal* m) -> std::unique_ptr { - auto* r4 = dynamic_cast(m->non_terminal_cast(3)->get_parser_ast().get()); +static auto regex_capture_rule(NonTerminal const* m) -> std::unique_ptr { + auto const* r4 = dynamic_cast(m->non_terminal_cast(3)->get_parser_ast().get()); auto& r6 = m->non_terminal_cast(5)->get_parser_ast()->get>(); return std::make_unique(make_unique( - r4->m_name, std::move(r6), - m_capture_group_id_generator.assign_next_id() + std::make_unique(r4->m_name) )); } @@ -622,7 +622,7 @@ void SchemaParser::add_productions() { add_production( "Literal", {"Lparen", "QuestionMark", "Langle", "Identifier", "Rangle", "Regex", "Rparen"}, - [this](NonTerminal* m) { return regex_capture_rule(m); } + regex_capture_rule ); add_production("Literal", {"Lparen", "Regex", "Rparen"}, regex_middle_identity_rule); for (auto const& [special_regex_char, special_regex_name] : m_special_regex_characters) { diff --git a/src/log_surgeon/SchemaParser.hpp b/src/log_surgeon/SchemaParser.hpp index 004ec495..c5081287 100644 --- a/src/log_surgeon/SchemaParser.hpp +++ b/src/log_surgeon/SchemaParser.hpp @@ -8,17 +8,6 @@ #include namespace log_surgeon { -/** - * Class for generating monotonically increasing integer IDs. - */ -class UniqueIdGenerator { -public: - [[nodiscard]] auto assign_next_id() -> uint32_t { return m_next_id++; } - -private: - uint32_t m_next_id{0}; -}; - // ASTs used in SchemaParser AST class SchemaAST : public ParserAST { public: @@ -113,13 +102,6 @@ class SchemaParser : public LALR1Parser< */ auto existing_schema_rule(NonTerminal* m) -> std::unique_ptr; - /** - * A semantic rule for regex capture groups that needs access to `m_capture_group_id_generator`. - * @param m - * @return A unique pointer to the parsed regex capture group. - */ - auto regex_capture_rule(NonTerminal* m) -> std::unique_ptr; - /** * After lexing half of the buffer, reads into that half of the buffer and * changes variables accordingly @@ -146,8 +128,6 @@ class SchemaParser : public LALR1Parser< auto generate_schema_ast(Reader& reader) -> std::unique_ptr; static inline std::unordered_map m_special_regex_characters; - - UniqueIdGenerator m_capture_group_id_generator; }; } // namespace log_surgeon diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index ee0cc7e3..c0c6b04f 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -19,6 +20,7 @@ #include #include +#include #include namespace log_surgeon::finite_automata { @@ -81,19 +83,23 @@ class RegexAST { */ [[nodiscard]] virtual auto serialize() const -> std::u32string = 0; - [[nodiscard]] auto get_subtree_positive_tags() const -> std::set const& { + [[nodiscard]] auto get_subtree_positive_tags() const -> std::vector const& { return m_subtree_positive_tags; } - auto set_subtree_positive_tags(std::set subtree_positive_tags) -> void { + auto set_subtree_positive_tags(std::vector subtree_positive_tags) -> void { m_subtree_positive_tags = std::move(subtree_positive_tags); } - auto add_subtree_positive_tags(std::set subtree_positive_tags) -> void { - m_subtree_positive_tags.merge(subtree_positive_tags); + auto add_subtree_positive_tags(std::vector const& subtree_positive_tags) -> void { + m_subtree_positive_tags.insert( + m_subtree_positive_tags.end(), + subtree_positive_tags.cbegin(), + subtree_positive_tags.cend() + ); } - auto set_negative_tags(std::set negative_tags) -> void { + auto set_negative_tags(std::vector negative_tags) -> void { m_negative_tags = std::move(negative_tags); } @@ -118,8 +124,8 @@ class RegexAST { protected: RegexAST(RegexAST const& rhs) = default; auto operator=(RegexAST const& rhs) -> RegexAST& = default; - RegexAST(RegexAST&& rhs) noexcept = default; - auto operator=(RegexAST&& rhs) noexcept -> RegexAST& = default; + RegexAST(RegexAST&& rhs) noexcept = delete; + auto operator=(RegexAST&& rhs) noexcept -> RegexAST& = delete; [[nodiscard]] auto serialize_negative_tags() const -> std::u32string { if (m_negative_tags.empty()) { @@ -127,8 +133,8 @@ class RegexAST { } auto const transformed_negative_tags - = m_negative_tags | std::ranges::views::transform([](uint32_t tag) { - return fmt::format("<~{}>", tag); + = m_negative_tags | std::ranges::views::transform([](Tag const* tag) { + return fmt::format("<~{}>", tag->get_name()); }); auto const negative_tags_string = fmt::format("{}", fmt::join(transformed_negative_tags, "")); @@ -140,8 +146,8 @@ class RegexAST { } private: - std::set m_subtree_positive_tags; - std::set m_negative_tags; + std::vector m_subtree_positive_tags; + std::vector m_negative_tags; }; /** @@ -433,10 +439,6 @@ class RegexASTOr : public RegexAST { m_left(std::unique_ptr>(rhs.m_left->clone())), m_right(std::unique_ptr>(rhs.m_right->clone())) {} - auto operator=(RegexASTOr const& rhs) -> RegexASTOr& = default; - RegexASTOr(RegexASTOr&& rhs) noexcept = default; - auto operator=(RegexASTOr&& rhs) noexcept -> RegexASTOr& = default; - /** * Used for cloning a unique_pointer of type RegexASTOr * @return RegexASTOr* @@ -500,10 +502,6 @@ class RegexASTCat : public RegexAST { m_left(std::unique_ptr>(rhs.m_left->clone())), m_right(std::unique_ptr>(rhs.m_right->clone())) {} - auto operator=(RegexASTCat const& rhs) -> RegexASTCat& = default; - RegexASTCat(RegexASTCat&& rhs) noexcept = default; - auto operator=(RegexASTCat&& rhs) noexcept -> RegexASTCat& = default; - /** * Used for cloning a unique_pointer of type RegexASTCat * @return RegexASTCat* @@ -569,10 +567,6 @@ class RegexASTMultiplication : public RegexAST { m_min(rhs.m_min), m_max(rhs.m_max) {} - auto operator=(RegexASTMultiplication const& rhs) -> RegexASTMultiplication& = default; - RegexASTMultiplication(RegexASTMultiplication&& rhs) noexcept = default; - auto operator=(RegexASTMultiplication&& rhs) noexcept -> RegexASTMultiplication& = default; - /** * Used for cloning a unique_pointer of type RegexASTMultiplication * @return RegexASTMultiplication* @@ -627,39 +621,49 @@ class RegexASTMultiplication : public RegexAST { uint32_t m_max; }; +/** + * Represents a capture group AST node. + * NOTE: + * - `m_tag` is always expected to be non-null. + * - `m_group_regex_ast` is always expected to be non-null. + * @tparam NFAStateType Specifies the type of transition (bytes or UTF-8 characters). + */ template class RegexASTCapture : public RegexAST { public: ~RegexASTCapture() override = default; + /** + * @param group_regex_ast + * @param tag + * @throw std::invalid_argument if `group_regex_ast` or `tag` are `nullptr`. + */ RegexASTCapture( - std::string group_name, std::unique_ptr> group_regex_ast, - uint32_t const tag + std::unique_ptr tag ) - : m_group_name(std::move(group_name)), - m_group_regex_ast(std::move(group_regex_ast)), - m_tag(tag) { + : m_group_regex_ast{( + nullptr == group_regex_ast + ? throw std::invalid_argument("Group regex AST cannot be null") + : std::move(group_regex_ast) + )}, + m_tag{nullptr == tag ? throw std::invalid_argument("Tag cannot be null") + : std::move(tag)} { RegexAST::set_subtree_positive_tags( m_group_regex_ast->get_subtree_positive_tags() ); - RegexAST::add_subtree_positive_tags({m_tag}); + RegexAST::add_subtree_positive_tags({m_tag.get()}); } RegexASTCapture(RegexASTCapture const& rhs) - : RegexAST(rhs), - m_group_name(rhs.m_group_name), - m_group_regex_ast( + : RegexAST{rhs}, + m_group_regex_ast{ std::unique_ptr>(rhs.m_group_regex_ast->clone()) - ), - m_tag(rhs.m_tag) { + }, + m_tag{std::make_unique(*rhs.m_tag)} { RegexAST::set_subtree_positive_tags(rhs.get_subtree_positive_tags()); } - auto operator=(RegexASTCapture const& rhs) -> RegexASTCapture& = default; - RegexASTCapture(RegexASTCapture&& rhs) noexcept = default; - auto operator=(RegexASTCapture&& rhs) noexcept -> RegexASTCapture& = default; - /** * Used for cloning a `unique_pointer` of type `RegexASTCapture`. * @return RegexASTCapture* @@ -697,19 +701,16 @@ class RegexASTCapture : public RegexAST { [[nodiscard]] auto serialize() const -> std::u32string override; - [[nodiscard]] auto get_group_name() const -> std::string const& { return m_group_name; } + [[nodiscard]] auto get_group_name() const -> std::string_view { return m_tag->get_name(); } [[nodiscard]] auto get_group_regex_ast( ) const -> std::unique_ptr> const& { return m_group_regex_ast; } - [[nodiscard]] auto get_tag() const -> uint32_t { return m_tag; } - private: - std::string m_group_name; std::unique_ptr> m_group_regex_ast; - uint32_t m_tag; + std::unique_ptr m_tag; }; template @@ -894,16 +895,17 @@ template void RegexASTCapture::add_to_nfa(RegexNFA* nfa, NFAStateType* end_state) const { auto* state_with_positive_tagged_transition - = nfa->new_state_with_positive_tagged_transition(m_tag, end_state); + = nfa->new_state_with_positive_tagged_transition(m_tag.get(), end_state); m_group_regex_ast->add_to_nfa_with_negative_tags(nfa, state_with_positive_tagged_transition); } template [[nodiscard]] auto RegexASTCapture::serialize() const -> std::u32string { + auto const tag_name_u32 = std::u32string(m_tag->get_name().cbegin(), m_tag->get_name().cend()); return fmt::format( U"({})<{}>{}", - nullptr != m_group_regex_ast ? m_group_regex_ast->serialize() : U"null", - m_tag, + m_group_regex_ast->serialize(), + tag_name_u32, RegexAST::serialize_negative_tags() ); } diff --git a/src/log_surgeon/finite_automata/RegexNFA.hpp b/src/log_surgeon/finite_automata/RegexNFA.hpp index 54ac5bdc..7919a0c6 100644 --- a/src/log_surgeon/finite_automata/RegexNFA.hpp +++ b/src/log_surgeon/finite_automata/RegexNFA.hpp @@ -26,7 +26,7 @@ class RegexNFA { public: using StateVec = std::vector; - explicit RegexNFA(std::vector> const& rules); + explicit RegexNFA(std::vector> rules); /** * Creates a unique_ptr for an NFA state with no tagged transitions and adds it to `m_states`. @@ -42,7 +42,7 @@ class RegexNFA { * @return NFAStateType* */ [[nodiscard]] auto new_state_with_positive_tagged_transition( - uint32_t tag, + Tag const* tag, NFAStateType const* dest_state ) -> NFAStateType*; @@ -54,7 +54,7 @@ class RegexNFA { * @return NFAStateType* */ [[nodiscard]] auto new_state_with_negative_tagged_transition( - std::set tags, + std::vector tags, NFAStateType const* dest_state ) -> NFAStateType*; @@ -80,12 +80,16 @@ class RegexNFA { private: std::vector> m_states; NFAStateType* m_root; + // Store the rules locally as they contain information needed by the NFA. E.g., transitions in + // the NFA point to tags in the rule ASTs. + std::vector> m_rules; }; template -RegexNFA::RegexNFA(std::vector> const& rules) - : m_root{new_state()} { - for (auto const& rule : rules) { +RegexNFA::RegexNFA(std::vector> rules) + : m_root{new_state()}, + m_rules{std::move(rules)} { + for (auto const& rule : m_rules) { rule.add_to_nfa(this); } } @@ -98,7 +102,7 @@ auto RegexNFA::new_state() -> NFAStateType* { template auto RegexNFA::new_state_with_positive_tagged_transition( - uint32_t const tag, + Tag const* tag, NFAStateType const* dest_state ) -> NFAStateType* { m_states.emplace_back(std::make_unique(tag, dest_state)); @@ -107,10 +111,10 @@ auto RegexNFA::new_state_with_positive_tagged_transition( template auto RegexNFA::new_state_with_negative_tagged_transition( - std::set tags, + std::vector tags, NFAStateType const* dest_state ) -> NFAStateType* { - m_states.emplace_back(std::make_unique(tags, dest_state)); + m_states.emplace_back(std::make_unique(std::move(tags), dest_state)); return m_states.back().get(); } diff --git a/src/log_surgeon/finite_automata/RegexNFAState.hpp b/src/log_surgeon/finite_automata/RegexNFAState.hpp index f2a27898..dd21557b 100644 --- a/src/log_surgeon/finite_automata/RegexNFAState.hpp +++ b/src/log_surgeon/finite_automata/RegexNFAState.hpp @@ -31,10 +31,10 @@ class RegexNFAState { RegexNFAState() = default; - RegexNFAState(uint32_t const tag, RegexNFAState const* dest_state) + RegexNFAState(Tag const* tag, RegexNFAState const* dest_state) : m_positive_tagged_transitions{{tag, dest_state}} {} - RegexNFAState(std::set tags, RegexNFAState const* dest_state) + RegexNFAState(std::vector tags, RegexNFAState const* dest_state) : m_negative_tagged_transition{NegativeTaggedTransition{std::move(tags), dest_state}} {} auto set_accepting(bool accepting) -> void { m_accepting = accepting; } diff --git a/src/log_surgeon/finite_automata/Tag.hpp b/src/log_surgeon/finite_automata/Tag.hpp new file mode 100644 index 00000000..3a3b4d7f --- /dev/null +++ b/src/log_surgeon/finite_automata/Tag.hpp @@ -0,0 +1,20 @@ +#ifndef LOG_SURGEON_FINITE_AUTOMATA_TAG +#define LOG_SURGEON_FINITE_AUTOMATA_TAG + +#include +#include +#include + +namespace log_surgeon::finite_automata { +class Tag { +public: + explicit Tag(std::string name) : m_name{std::move(name)} {} + + [[nodiscard]] auto get_name() const -> std::string_view { return m_name; } + +private: + std::string m_name; +}; +} // namespace log_surgeon::finite_automata + +#endif // LOG_SURGEON_FINITE_AUTOMATA_TAG diff --git a/src/log_surgeon/finite_automata/TaggedTransition.hpp b/src/log_surgeon/finite_automata/TaggedTransition.hpp index 614841a7..86fe7a39 100644 --- a/src/log_surgeon/finite_automata/TaggedTransition.hpp +++ b/src/log_surgeon/finite_automata/TaggedTransition.hpp @@ -3,24 +3,33 @@ #include #include -#include +#include #include #include #include -#include +#include namespace log_surgeon::finite_automata { + +/** + * Represents an NFA transition indicating that a capture group has been matched. + * NOTE: `m_tag` is always expected to be non-null. + * @tparam NFAStateType Specifies the type of transition (bytes or UTF-8 characters). + */ template class PositiveTaggedTransition { public: - PositiveTaggedTransition(uint32_t const tag, NFAStateType const* dest_state) - : m_tag{tag}, + /** + * @param tag + * @param dest_state + * @throw std::invalid_argument if `tag` is `nullptr`. + */ + PositiveTaggedTransition(Tag const* tag, NFAStateType const* dest_state) + : m_tag{nullptr == tag ? throw std::invalid_argument("Tag cannot be null") : tag}, m_dest_state{dest_state} {} - [[nodiscard]] auto get_tag() const -> uint32_t { return m_tag; } - [[nodiscard]] auto get_dest_state() const -> NFAStateType const* { return m_dest_state; } /** @@ -34,23 +43,36 @@ class PositiveTaggedTransition { if (state_id_it == state_ids.end()) { return std::nullopt; } - return fmt::format("{}[{}]", state_id_it->second, m_tag); + return fmt::format("{}[{}]", state_id_it->second, m_tag->get_name()); } private: - uint32_t m_tag; + Tag const* m_tag; NFAStateType const* m_dest_state; }; +/** + * Represents an NFA transition indicating that a capture group has been unmatched. + * NOTE: All tags in `m_tags` are always expected to be non-null. + * @tparam NFAStateType Specifies the type of transition (bytes or UTF-8 characters). + */ template class NegativeTaggedTransition { public: - NegativeTaggedTransition(std::set tags, NFAStateType const* dest_state) - : m_tags{std::move(tags)}, + /** + * @param tags + * @param dest_state + * @throw std::invalid_argument if any elements in `tags` is `nullptr`. + */ + NegativeTaggedTransition(std::vector tags, NFAStateType const* dest_state) + : m_tags{[&tags] { + if (std::ranges::any_of(tags, [](Tag const* tag) { return nullptr == tag; })) { + throw std::invalid_argument("Tags cannot contain null elements"); + } + return std::move(tags); + }()}, m_dest_state{dest_state} {} - [[nodiscard]] auto get_tags() const -> std::set const& { return m_tags; } - [[nodiscard]] auto get_dest_state() const -> NFAStateType const* { return m_dest_state; } /** @@ -64,11 +86,14 @@ class NegativeTaggedTransition { if (state_id_it == state_ids.end()) { return std::nullopt; } - return fmt::format("{}[{}]", state_id_it->second, fmt::join(m_tags, ",")); + + auto const tag_names = m_tags | std::ranges::views::transform(&Tag::get_name); + + return fmt::format("{}[{}]", state_id_it->second, fmt::join(tag_names, ",")); } private: - std::set m_tags; + std::vector m_tags; NFAStateType const* m_dest_state; }; } // namespace log_surgeon::finite_automata diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b7afd1f1..d150252f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,6 +6,7 @@ set( ../src/log_surgeon/finite_automata/RegexNFA.hpp ../src/log_surgeon/finite_automata/RegexNFAState.hpp ../src/log_surgeon/finite_automata/RegexNFAStateType.hpp + ../src/log_surgeon/finite_automata/Tag.hpp ../src/log_surgeon/finite_automata/TaggedTransition.hpp ../src/log_surgeon/LALR1Parser.cpp ../src/log_surgeon/LALR1Parser.hpp @@ -20,7 +21,7 @@ set( ../src/log_surgeon/Token.hpp ) -set(SOURCES_TESTS test-lexer.cpp test-NFA.cpp) +set(SOURCES_TESTS test-lexer.cpp test-NFA.cpp test-tag.cpp) add_executable(unit-test ${SOURCES_LOG_SURGEON} ${SOURCES_TESTS}) target_link_libraries(unit-test PRIVATE Catch2::Catch2WithMain log_surgeon::log_surgeon) diff --git a/tests/test-NFA.cpp b/tests/test-NFA.cpp index 0223c9bb..c7a599b2 100644 --- a/tests/test-NFA.cpp +++ b/tests/test-NFA.cpp @@ -55,17 +55,18 @@ TEST_CASE("Test NFA", "[NFA]") { "epsilon_transitions={}," "positive_tagged_transitions={}," "negative_tagged_transition={}\n"; - expected_serialized_nfa += "2:byte_transitions={}," - "epsilon_transitions={}," - "positive_tagged_transitions={}," - "negative_tagged_transition={5[0,1,2,3]}\n"; + expected_serialized_nfa + += "2:byte_transitions={}," + "epsilon_transitions={}," + "positive_tagged_transitions={}," + "negative_tagged_transition={5[letter1,letter2,letter,containerID]}\n"; expected_serialized_nfa += "3:byte_transitions={}," "epsilon_transitions={}," - "positive_tagged_transitions={6[0]}," + "positive_tagged_transitions={6[letter1]}," "negative_tagged_transition={}\n"; expected_serialized_nfa += "4:byte_transitions={}," "epsilon_transitions={}," - "positive_tagged_transitions={7[1]}," + "positive_tagged_transitions={7[letter2]}," "negative_tagged_transition={}\n"; expected_serialized_nfa += "5:accepting_tag=0,byte_transitions={}," "epsilon_transitions={}," @@ -74,14 +75,14 @@ TEST_CASE("Test NFA", "[NFA]") { expected_serialized_nfa += "6:byte_transitions={}," "epsilon_transitions={}," "positive_tagged_transitions={}," - "negative_tagged_transition={8[1]}\n"; + "negative_tagged_transition={8[letter2]}\n"; expected_serialized_nfa += "7:byte_transitions={}," "epsilon_transitions={}," "positive_tagged_transitions={}," - "negative_tagged_transition={8[0]}\n"; + "negative_tagged_transition={8[letter1]}\n"; expected_serialized_nfa += "8:byte_transitions={}," "epsilon_transitions={}," - "positive_tagged_transitions={9[2]}," + "positive_tagged_transitions={9[letter]}," "negative_tagged_transition={}\n"; expected_serialized_nfa += "9:byte_transitions={B-->10}," "epsilon_transitions={}," @@ -95,7 +96,7 @@ TEST_CASE("Test NFA", "[NFA]") { expected_serialized_nfa += "11:byte_transitions={0-->11,1-->11,2-->11,3-->11,4-->11,5-->11,6-->" "11,7-->11,8-->11,9-->11}," "epsilon_transitions={}," - "positive_tagged_transitions={12[3]}," + "positive_tagged_transitions={12[containerID]}," "negative_tagged_transition={}\n"; expected_serialized_nfa += "12:byte_transitions={C-->5}," "epsilon_transitions={}," diff --git a/tests/test-lexer.cpp b/tests/test-lexer.cpp index abfa460e..dd305a76 100644 --- a/tests/test-lexer.cpp +++ b/tests/test-lexer.cpp @@ -1,7 +1,5 @@ #include -#include #include -#include #include #include #include @@ -152,13 +150,13 @@ TEST_CASE("Test the Schema class", "[Schema]") { "?\\d+" ")C" ")", - U"(Z<~0><~1><~2><~3>)|(" + U"(Z<~letter1><~letter2><~letter><~containerID>)|(" "A(" - "(((a)|(b))<0><~1>)|" - "(((c)|(d))<1><~0>)" - ")<2>B(" + "(((a)|(b))<~letter2>)|" + "(((c)|(d))<~letter1>)" + ")B(" "([0-9]){1,inf}" - ")<3>C" + ")C" ")" // clang-format on ); @@ -172,13 +170,13 @@ TEST_CASE("Test the Schema class", "[Schema]") { test_regex_ast("capture:a+", U"(a){1,inf}"); // Repetition with capture groups untagged and tagged AST are different - test_regex_ast("capture:(?a){0,10}", U"(<~0>)|(((a)<0>){1,10})"); - test_regex_ast("capture:(?a){5,10}", U"((a)<0>){5,10}"); - test_regex_ast("capture:(?a)*", U"(<~0>)|(((a)<0>){1,inf})"); - test_regex_ast("capture:(?a)+", U"((a)<0>){1,inf}"); + test_regex_ast("capture:(?a){0,10}", U"(<~letter>)|(((a)){1,10})"); + test_regex_ast("capture:(?a){5,10}", U"((a)){5,10}"); + test_regex_ast("capture:(?a)*", U"(<~letter>)|(((a)){1,inf})"); + test_regex_ast("capture:(?a)+", U"((a)){1,inf}"); // Capture group with repetition - test_regex_ast("capture:(?a{0,10})", U"(()|((a){1,10}))<0>"); + test_regex_ast("capture:(?a{0,10})", U"(()|((a){1,10}))"); // Complex repetition test_regex_ast( @@ -196,16 +194,16 @@ TEST_CASE("Test the Schema class", "[Schema]") { "){0,10}" ")", U"(" - U"(<~0><~1>)|((" - U"((a)<0><~1>)|" - U"((b)<1><~0>)" + U"(<~letterA><~letterB>)|((" + U"((a)<~letterB>)|" + U"((b)<~letterA>)" U"){1,inf})" - U"<~2><~3>)|(" - U"(<~2><~3>)|((" - U"((c)<2><~3>)|" - U"((d)<3><~2>)" + U"<~letterC><~letterD>)|(" + U"(<~letterC><~letterD>)|((" + U"((c)<~letterD>)|" + U"((d)<~letterC>)" U"){1,10})" - U"<~0><~1>)" + U"<~letterA><~letterB>)" // clang-format on ); } diff --git a/tests/test-tag.cpp b/tests/test-tag.cpp new file mode 100644 index 00000000..41f8a2ef --- /dev/null +++ b/tests/test-tag.cpp @@ -0,0 +1,34 @@ +#include + +#include + +using log_surgeon::finite_automata::Tag; + +TEST_CASE("Tag operations", "[Tag]") { + SECTION("Basic name retrieval works correctly") { + Tag const tag{"uID"}; + REQUIRE("uID" == tag.get_name()); + } + + SECTION("Empty tag name is handled correctly") { + Tag const empty_tag{""}; + REQUIRE(empty_tag.get_name().empty()); + } + + SECTION("Special characters in tag names are preserved") { + Tag const special_tag{"user.id-123_@"}; + REQUIRE("user.id-123_@" == special_tag.get_name()); + } + + SECTION("Copy constructor works correctly") { + Tag assign_tag{"target"}; + assign_tag = Tag{"new_source"}; + REQUIRE("new_source" == assign_tag.get_name()); + } + + SECTION("Move constructor works correctly") { + Tag original_tag{"source"}; + Tag moved_tag{std::move(original_tag)}; + REQUIRE("source" == moved_tag.get_name()); + } +} From 75aecc44a4b3d5300dd1444c57ac259de70109c4 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 18 Nov 2024 10:41:52 -0500 Subject: [PATCH 029/144] Update install-catch2.sh to compile catch2 with c++17. --- tools/deps-install/ubuntu/install-catch2.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/deps-install/ubuntu/install-catch2.sh b/tools/deps-install/ubuntu/install-catch2.sh index bb5ebfbe..aa063d72 100755 --- a/tools/deps-install/ubuntu/install-catch2.sh +++ b/tools/deps-install/ubuntu/install-catch2.sh @@ -69,7 +69,7 @@ fi # Build cd "$extracted_dir" -cmake -B build -S . -DBUILD_TESTING=OFF +cmake -B build -S . -DBUILD_TESTING=OFF -DCMAKE_CXX_STANDARD=17 cmake --build build --parallel "$num_cpus" # Check if checkinstall is installed From ad2b6c8dc8333f6a598aa7990d0ab3d3bb2916a9 Mon Sep 17 00:00:00 2001 From: Sharaf Mohamed Date: Mon, 18 Nov 2024 10:51:42 -0500 Subject: [PATCH 030/144] Update Catch2 install script to use C++17. (#49) --- tools/deps-install/ubuntu/install-catch2.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/deps-install/ubuntu/install-catch2.sh b/tools/deps-install/ubuntu/install-catch2.sh index bb5ebfbe..aa063d72 100755 --- a/tools/deps-install/ubuntu/install-catch2.sh +++ b/tools/deps-install/ubuntu/install-catch2.sh @@ -69,7 +69,7 @@ fi # Build cd "$extracted_dir" -cmake -B build -S . -DBUILD_TESTING=OFF +cmake -B build -S . -DBUILD_TESTING=OFF -DCMAKE_CXX_STANDARD=17 cmake --build build --parallel "$num_cpus" # Check if checkinstall is installed From 507a7d3fa0a9ce9b52204c6f4efa5349ae5ee755 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 18 Nov 2024 11:10:37 -0500 Subject: [PATCH 031/144] Loop over end_transitions correctly. --- src/log_surgeon/Lexer.tpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/Lexer.tpp b/src/log_surgeon/Lexer.tpp index 43750036..bdac76a1 100644 --- a/src/log_surgeon/Lexer.tpp +++ b/src/log_surgeon/Lexer.tpp @@ -411,7 +411,7 @@ auto Lexer::epsilon_closure(NFAStateType const* stat stack.push(positive_tagged_start_transition.get_dest_state()); } for (auto const& positive_tagged_end_transition : - current_state->get_positive_tagged_start_transitions()) + current_state->get_positive_tagged_end_transitions()) { stack.push(positive_tagged_end_transition.get_dest_state()); } From 34c227b74268e0ccea2847187c86dacfc408b89a Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 18 Nov 2024 11:29:33 -0500 Subject: [PATCH 032/144] Add TagPositions class. --- src/log_surgeon/finite_automata/Tag.hpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/log_surgeon/finite_automata/Tag.hpp b/src/log_surgeon/finite_automata/Tag.hpp index 3a3b4d7f..7fa5ceca 100644 --- a/src/log_surgeon/finite_automata/Tag.hpp +++ b/src/log_surgeon/finite_automata/Tag.hpp @@ -15,6 +15,16 @@ class Tag { private: std::string m_name; }; + +class TagPositions { +public: + explicit TagPositions(Tag const* tag) : m_tag{tag} {} + +private: + Tag const* m_tag; + std::vector start_positions; + std::vector end_positions; +}; } // namespace log_surgeon::finite_automata #endif // LOG_SURGEON_FINITE_AUTOMATA_TAG From 27c8560a578e7d5dc5b8e5496c393c43798a5b0a Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 18 Nov 2024 11:38:32 -0500 Subject: [PATCH 033/144] Remove new class, going to add it later. --- src/log_surgeon/finite_automata/Tag.hpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/log_surgeon/finite_automata/Tag.hpp b/src/log_surgeon/finite_automata/Tag.hpp index 7fa5ceca..3a3b4d7f 100644 --- a/src/log_surgeon/finite_automata/Tag.hpp +++ b/src/log_surgeon/finite_automata/Tag.hpp @@ -15,16 +15,6 @@ class Tag { private: std::string m_name; }; - -class TagPositions { -public: - explicit TagPositions(Tag const* tag) : m_tag{tag} {} - -private: - Tag const* m_tag; - std::vector start_positions; - std::vector end_positions; -}; } // namespace log_surgeon::finite_automata #endif // LOG_SURGEON_FINITE_AUTOMATA_TAG From 86caa9bff25fe8a44c24bdafc699baf479fdaceb Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 18 Nov 2024 11:43:18 -0500 Subject: [PATCH 034/144] Add const back in. --- src/log_surgeon/finite_automata/RegexAST.hpp | 10 +++++----- src/log_surgeon/finite_automata/RegexNFA.hpp | 16 ++++++++-------- .../finite_automata/RegexNFAState.hpp | 6 +++--- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index 5b5a82db..6c98109f 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -87,11 +87,11 @@ class RegexAST { return m_subtree_positive_tags; } - auto set_subtree_positive_tags(std::vector subtree_positive_tags) -> void { + auto set_subtree_positive_tags(std::vector subtree_positive_tags) -> void { m_subtree_positive_tags = std::move(subtree_positive_tags); } - auto add_subtree_positive_tags(std::vector const& subtree_positive_tags) -> void { + auto add_subtree_positive_tags(std::vector const& subtree_positive_tags) -> void { m_subtree_positive_tags.insert( m_subtree_positive_tags.end(), subtree_positive_tags.cbegin(), @@ -99,7 +99,7 @@ class RegexAST { ); } - auto set_negative_tags(std::vector negative_tags) -> void { + auto set_negative_tags(std::vector negative_tags) -> void { m_negative_tags = std::move(negative_tags); } @@ -146,8 +146,8 @@ class RegexAST { } private: - std::vector m_subtree_positive_tags; - std::vector m_negative_tags; + std::vector m_subtree_positive_tags; + std::vector m_negative_tags; }; /** diff --git a/src/log_surgeon/finite_automata/RegexNFA.hpp b/src/log_surgeon/finite_automata/RegexNFA.hpp index 9fa0112b..c35aa83a 100644 --- a/src/log_surgeon/finite_automata/RegexNFA.hpp +++ b/src/log_surgeon/finite_automata/RegexNFA.hpp @@ -42,8 +42,8 @@ class RegexNFA { * @return NFAStateType* */ [[nodiscard]] auto new_state_with_positive_tagged_transition( - Tag* tag, - NFAStateType* dest_state + Tag const* tag, + NFAStateType const* dest_state ) -> NFAStateType*; /** @@ -54,8 +54,8 @@ class RegexNFA { * @return NFAStateType* */ [[nodiscard]] auto new_state_with_negative_tagged_transition( - std::vector tags, - NFAStateType* dest_state + std::vector tags, + NFAStateType const* dest_state ) -> NFAStateType*; /** @@ -102,8 +102,8 @@ auto RegexNFA::new_state() -> NFAStateType* { template auto RegexNFA::new_state_with_positive_tagged_transition( - Tag* tag, - NFAStateType* dest_state + Tag const* tag, + NFAStateType const* dest_state ) -> NFAStateType* { m_states.emplace_back(std::make_unique(tag, dest_state)); return m_states.back().get(); @@ -111,8 +111,8 @@ auto RegexNFA::new_state_with_positive_tagged_transition( template auto RegexNFA::new_state_with_negative_tagged_transition( - std::vector tags, - NFAStateType* dest_state + std::vector tags, + NFAStateType const* dest_state ) -> NFAStateType* { m_states.emplace_back(std::make_unique(std::move(tags), dest_state)); return m_states.back().get(); diff --git a/src/log_surgeon/finite_automata/RegexNFAState.hpp b/src/log_surgeon/finite_automata/RegexNFAState.hpp index 139b179b..94df249f 100644 --- a/src/log_surgeon/finite_automata/RegexNFAState.hpp +++ b/src/log_surgeon/finite_automata/RegexNFAState.hpp @@ -31,10 +31,10 @@ class RegexNFAState { RegexNFAState() = default; - RegexNFAState(Tag* tag, RegexNFAState* dest_state) + RegexNFAState(Tag const* tag, RegexNFAState* dest_state) : m_positive_tagged_end_transitions{{tag, dest_state}} {} - RegexNFAState(std::vector tags, RegexNFAState* dest_state) + RegexNFAState(std::vector tags, RegexNFAState* dest_state) : m_negative_tagged_transition{NegativeTaggedTransition{std::move(tags), dest_state}} {} auto set_accepting(bool accepting) -> void { m_accepting = accepting; } @@ -49,7 +49,7 @@ class RegexNFAState { return m_matching_variable_id; } - auto add_positive_tagged_start_transition(Tag* tag, RegexNFAState* dest_state) -> void { + auto add_positive_tagged_start_transition(Tag const* tag, RegexNFAState* dest_state) -> void { m_positive_tagged_start_transitions.emplace_back(tag, dest_state); } From 338638e0514d88f063754b66b3fce172d9425d42 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 18 Nov 2024 11:45:53 -0500 Subject: [PATCH 035/144] Add more const back in. --- src/log_surgeon/finite_automata/RegexAST.hpp | 2 +- src/log_surgeon/finite_automata/TaggedTransition.hpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index 6c98109f..21691d4b 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -83,7 +83,7 @@ class RegexAST { */ [[nodiscard]] virtual auto serialize() const -> std::u32string = 0; - [[nodiscard]] auto get_subtree_positive_tags() const -> std::vector const& { + [[nodiscard]] auto get_subtree_positive_tags() const -> std::vector const& { return m_subtree_positive_tags; } diff --git a/src/log_surgeon/finite_automata/TaggedTransition.hpp b/src/log_surgeon/finite_automata/TaggedTransition.hpp index f04143da..f1686c80 100644 --- a/src/log_surgeon/finite_automata/TaggedTransition.hpp +++ b/src/log_surgeon/finite_automata/TaggedTransition.hpp @@ -26,7 +26,7 @@ class PositiveTaggedTransition { * @param dest_state * @throw std::invalid_argument if `tag` is `nullptr`. */ - PositiveTaggedTransition(Tag* tag, NFAStateType const* dest_state) + PositiveTaggedTransition(Tag const* tag, NFAStateType const* dest_state) : m_tag{nullptr == tag ? throw std::invalid_argument("Tag cannot be null") : tag}, m_dest_state{dest_state} {} @@ -47,7 +47,7 @@ class PositiveTaggedTransition { } private: - Tag* m_tag; + Tag const* m_tag; NFAStateType const* m_dest_state; }; @@ -64,7 +64,7 @@ class NegativeTaggedTransition { * @param dest_state * @throw std::invalid_argument if any elements in `tags` is `nullptr`. */ - NegativeTaggedTransition(std::vector tags, NFAStateType* dest_state) + NegativeTaggedTransition(std::vector tags, NFAStateType* dest_state) : m_tags{[&tags] { if (std::ranges::any_of(tags, [](Tag const* tag) { return nullptr == tag; })) { throw std::invalid_argument("Tags cannot contain null elements"); @@ -93,7 +93,7 @@ class NegativeTaggedTransition { } private: - std::vector m_tags; + std::vector m_tags; NFAStateType* m_dest_state; }; } // namespace log_surgeon::finite_automata From a742601f28162bb010a87ee9c2d3e75c9f020702 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 18 Nov 2024 11:46:31 -0500 Subject: [PATCH 036/144] Add more const back in. --- src/log_surgeon/finite_automata/TaggedTransition.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/TaggedTransition.hpp b/src/log_surgeon/finite_automata/TaggedTransition.hpp index f1686c80..beedd423 100644 --- a/src/log_surgeon/finite_automata/TaggedTransition.hpp +++ b/src/log_surgeon/finite_automata/TaggedTransition.hpp @@ -94,7 +94,7 @@ class NegativeTaggedTransition { private: std::vector m_tags; - NFAStateType* m_dest_state; + NFAStateType const* m_dest_state; }; } // namespace log_surgeon::finite_automata From d3587134daa5765ed3a61f336a8cdec7525a05cd Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 18 Nov 2024 11:47:58 -0500 Subject: [PATCH 037/144] Linter. --- src/log_surgeon/Lexer.tpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/Lexer.tpp b/src/log_surgeon/Lexer.tpp index bdac76a1..d72f11ef 100644 --- a/src/log_surgeon/Lexer.tpp +++ b/src/log_surgeon/Lexer.tpp @@ -411,7 +411,7 @@ auto Lexer::epsilon_closure(NFAStateType const* stat stack.push(positive_tagged_start_transition.get_dest_state()); } for (auto const& positive_tagged_end_transition : - current_state->get_positive_tagged_end_transitions()) + current_state->get_positive_tagged_end_transitions()) { stack.push(positive_tagged_end_transition.get_dest_state()); } From 43870ea84bd6bb4d602082c541d07a869634d1c7 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 18 Nov 2024 11:49:42 -0500 Subject: [PATCH 038/144] Add more const back in. --- src/log_surgeon/finite_automata/RegexNFAState.hpp | 4 ++-- src/log_surgeon/finite_automata/TaggedTransition.hpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexNFAState.hpp b/src/log_surgeon/finite_automata/RegexNFAState.hpp index 94df249f..61ab42c4 100644 --- a/src/log_surgeon/finite_automata/RegexNFAState.hpp +++ b/src/log_surgeon/finite_automata/RegexNFAState.hpp @@ -31,10 +31,10 @@ class RegexNFAState { RegexNFAState() = default; - RegexNFAState(Tag const* tag, RegexNFAState* dest_state) + RegexNFAState(Tag const* tag, RegexNFAState const* dest_state) : m_positive_tagged_end_transitions{{tag, dest_state}} {} - RegexNFAState(std::vector tags, RegexNFAState* dest_state) + RegexNFAState(std::vector tags, RegexNFAState const* dest_state) : m_negative_tagged_transition{NegativeTaggedTransition{std::move(tags), dest_state}} {} auto set_accepting(bool accepting) -> void { m_accepting = accepting; } diff --git a/src/log_surgeon/finite_automata/TaggedTransition.hpp b/src/log_surgeon/finite_automata/TaggedTransition.hpp index beedd423..86fe7a39 100644 --- a/src/log_surgeon/finite_automata/TaggedTransition.hpp +++ b/src/log_surgeon/finite_automata/TaggedTransition.hpp @@ -64,7 +64,7 @@ class NegativeTaggedTransition { * @param dest_state * @throw std::invalid_argument if any elements in `tags` is `nullptr`. */ - NegativeTaggedTransition(std::vector tags, NFAStateType* dest_state) + NegativeTaggedTransition(std::vector tags, NFAStateType const* dest_state) : m_tags{[&tags] { if (std::ranges::any_of(tags, [](Tag const* tag) { return nullptr == tag; })) { throw std::invalid_argument("Tags cannot contain null elements"); From f94160720d593a7d62137a3e25ab17b704e5a7aa Mon Sep 17 00:00:00 2001 From: Sharaf Mohamed Date: Tue, 19 Nov 2024 15:10:54 -0500 Subject: [PATCH 039/144] Use `auto`. Co-authored-by: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> --- src/log_surgeon/finite_automata/RegexAST.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index 21691d4b..0a081b56 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -894,7 +894,7 @@ template template void RegexASTCapture::add_to_nfa(RegexNFA* nfa, NFAStateType* end_state) const { - NFAStateType* root = nfa->get_root(); + auto* root = nfa->get_root(); auto* capture_group_start_state = nfa->new_state(); root->add_positive_tagged_start_transition(m_tag.get(), capture_group_start_state); From aad9eb39b2a2ad149385557b3203caf0ce165dff Mon Sep 17 00:00:00 2001 From: Sharaf Mohamed Date: Tue, 19 Nov 2024 15:11:49 -0500 Subject: [PATCH 040/144] Fix spacing. Co-authored-by: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> --- src/log_surgeon/finite_automata/RegexAST.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index 0a081b56..221c77be 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -902,7 +902,6 @@ void RegexASTCapture::add_to_nfa(RegexNFA* nfa, NFAS = nfa->new_state_with_positive_tagged_transition(m_tag.get(), end_state); nfa->set_root(capture_group_start_state); m_group_regex_ast->add_to_nfa_with_negative_tags(nfa, state_with_positive_tagged_transition); - nfa->set_root(root); } From a801bf89ba263e597939b587bf489d2f41befb4f Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 19 Nov 2024 16:23:51 -0500 Subject: [PATCH 041/144] Add diagram for capture group NFA. --- src/log_surgeon/finite_automata/RegexAST.hpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index 221c77be..4eb4a21e 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -894,14 +894,20 @@ template template void RegexASTCapture::add_to_nfa(RegexNFA* nfa, NFAStateType* end_state) const { + // root --(pos_tagged_start_transition)--> capture_group_start_state --> + // [inner capture group NFA] --(neg_tagged_transition)--> neg_state --> + // state_with_positive_tagged_end_transition --(pos_tagged_end_transition)--> end_state auto* root = nfa->get_root(); auto* capture_group_start_state = nfa->new_state(); root->add_positive_tagged_start_transition(m_tag.get(), capture_group_start_state); - auto* state_with_positive_tagged_transition + auto* state_with_positive_tagged_end_transition = nfa->new_state_with_positive_tagged_transition(m_tag.get(), end_state); nfa->set_root(capture_group_start_state); - m_group_regex_ast->add_to_nfa_with_negative_tags(nfa, state_with_positive_tagged_transition); + m_group_regex_ast->add_to_nfa_with_negative_tags( + nfa, + state_with_positive_tagged_end_transition + ); nfa->set_root(root); } From 08b7548b40cd2342ba61bc5e57d299d457dccb39 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 19 Nov 2024 16:27:42 -0500 Subject: [PATCH 042/144] Add const for consitency with constructor. --- src/log_surgeon/finite_automata/RegexNFAState.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/RegexNFAState.hpp b/src/log_surgeon/finite_automata/RegexNFAState.hpp index 61ab42c4..bf47011b 100644 --- a/src/log_surgeon/finite_automata/RegexNFAState.hpp +++ b/src/log_surgeon/finite_automata/RegexNFAState.hpp @@ -49,7 +49,8 @@ class RegexNFAState { return m_matching_variable_id; } - auto add_positive_tagged_start_transition(Tag const* tag, RegexNFAState* dest_state) -> void { + auto + add_positive_tagged_start_transition(Tag const* tag, RegexNFAState const* dest_state) -> void { m_positive_tagged_start_transitions.emplace_back(tag, dest_state); } From 449133e40fa019fdd284171d4fb2b6da655481d0 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 19 Nov 2024 16:44:55 -0500 Subject: [PATCH 043/144] Update positive end transition to be optional instead of a vector. --- src/log_surgeon/Lexer.tpp | 9 +++--- src/log_surgeon/finite_automata/RegexNFA.hpp | 11 ++++--- .../finite_automata/RegexNFAState.hpp | 30 +++++++++---------- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/src/log_surgeon/Lexer.tpp b/src/log_surgeon/Lexer.tpp index d72f11ef..45524ed3 100644 --- a/src/log_surgeon/Lexer.tpp +++ b/src/log_surgeon/Lexer.tpp @@ -410,11 +410,12 @@ auto Lexer::epsilon_closure(NFAStateType const* stat { stack.push(positive_tagged_start_transition.get_dest_state()); } - for (auto const& positive_tagged_end_transition : - current_state->get_positive_tagged_end_transitions()) - { - stack.push(positive_tagged_end_transition.get_dest_state()); + auto const& optional_positive_tagged_end_transition + = current_state->get_positive_tagged_end_transitions(); + if (optional_positive_tagged_end_transition.has_value()) { + stack.push(optional_positive_tagged_end_transition.value().get_dest_state()); } + auto const& optional_negative_tagged_transition = current_state->get_negative_tagged_transition(); if (optional_negative_tagged_transition.has_value()) { diff --git a/src/log_surgeon/finite_automata/RegexNFA.hpp b/src/log_surgeon/finite_automata/RegexNFA.hpp index c35aa83a..374542cd 100644 --- a/src/log_surgeon/finite_automata/RegexNFA.hpp +++ b/src/log_surgeon/finite_automata/RegexNFA.hpp @@ -152,11 +152,14 @@ auto RegexNFA::get_bfs_traversal_order() const -> std::vectorget_positive_tagged_end_transitions()) - { - add_to_queue_and_visited(positive_tagged_end_transition.get_dest_state()); + + auto const& optional_positive_tagged_end_transition + = current_state->get_positive_tagged_end_transitions(); + if (optional_positive_tagged_end_transition.has_value()) { + add_to_queue_and_visited(optional_positive_tagged_end_transition.value().get_dest_state( + )); } + auto const& optional_negative_tagged_transition = current_state->get_negative_tagged_transition(); if (optional_negative_tagged_transition.has_value()) { diff --git a/src/log_surgeon/finite_automata/RegexNFAState.hpp b/src/log_surgeon/finite_automata/RegexNFAState.hpp index bf47011b..5d440551 100644 --- a/src/log_surgeon/finite_automata/RegexNFAState.hpp +++ b/src/log_surgeon/finite_automata/RegexNFAState.hpp @@ -32,7 +32,7 @@ class RegexNFAState { RegexNFAState() = default; RegexNFAState(Tag const* tag, RegexNFAState const* dest_state) - : m_positive_tagged_end_transitions{{tag, dest_state}} {} + : m_positive_tagged_end_transition{PositiveTaggedTransition{tag, dest_state}} {} RegexNFAState(std::vector tags, RegexNFAState const* dest_state) : m_negative_tagged_transition{NegativeTaggedTransition{std::move(tags), dest_state}} {} @@ -60,8 +60,8 @@ class RegexNFAState { } [[nodiscard]] auto get_positive_tagged_end_transitions( - ) const -> std::vector> const& { - return m_positive_tagged_end_transitions; + ) const -> std::optional> const& { + return m_positive_tagged_end_transition; } [[nodiscard]] auto get_negative_tagged_transition( @@ -111,7 +111,7 @@ class RegexNFAState { bool m_accepting{false}; uint32_t m_matching_variable_id{0}; std::vector> m_positive_tagged_start_transitions; - std::vector> m_positive_tagged_end_transitions; + std::optional> m_positive_tagged_end_transition; std::optional> m_negative_tagged_transition; std::vector m_epsilon_transitions; std::array, cSizeOfByte> m_bytes_transitions; @@ -189,26 +189,24 @@ auto RegexNFAState::serialize( std::vector positive_tagged_start_transition_strings; for (auto const& positive_tagged_start_transition : m_positive_tagged_start_transitions) { - auto const optional_serialized_positive_transition + auto const optional_serialized_positive_start_transition = positive_tagged_start_transition.serialize(state_ids); - if (false == optional_serialized_positive_transition.has_value()) { + if (false == optional_serialized_positive_start_transition.has_value()) { return std::nullopt; } positive_tagged_start_transition_strings.emplace_back( - optional_serialized_positive_transition.value() + optional_serialized_positive_start_transition.value() ); } - std::vector positive_tagged_end_transition_strings; - for (auto const& positive_tagged_end_transition : m_positive_tagged_end_transitions) { - auto const optional_serialized_positive_transition - = positive_tagged_end_transition.serialize(state_ids); - if (false == optional_serialized_positive_transition.has_value()) { + std::string positive_tagged_end_transition_string; + if (m_positive_tagged_end_transition.has_value()) { + auto const optional_serialized_positive_end_transition + = m_positive_tagged_end_transition.value().serialize(state_ids); + if (false == optional_serialized_positive_end_transition.has_value()) { return std::nullopt; } - positive_tagged_end_transition_strings.emplace_back( - optional_serialized_positive_transition.value() - ); + positive_tagged_end_transition_string = optional_serialized_positive_end_transition.value(); } std::string negative_tagged_transition_string; @@ -233,7 +231,7 @@ auto RegexNFAState::serialize( fmt::join(byte_transitions, ","), fmt::join(epsilon_transitions, ","), fmt::join(positive_tagged_start_transition_strings, ","), - fmt::join(positive_tagged_end_transition_strings, ","), + positive_tagged_end_transition_string, negative_tagged_transition_string ); } From 7b837bf1580f2d51a5c3e9bb269dcfed838c2b7f Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 19 Nov 2024 17:38:56 -0500 Subject: [PATCH 044/144] Rename new_state function correctly. --- src/log_surgeon/finite_automata/RegexAST.hpp | 2 +- src/log_surgeon/finite_automata/RegexNFA.hpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index 4eb4a21e..48ea0313 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -902,7 +902,7 @@ void RegexASTCapture::add_to_nfa(RegexNFA* nfa, NFAS root->add_positive_tagged_start_transition(m_tag.get(), capture_group_start_state); auto* state_with_positive_tagged_end_transition - = nfa->new_state_with_positive_tagged_transition(m_tag.get(), end_state); + = nfa->new_state_with_positive_tagged_end_transition(m_tag.get(), end_state); nfa->set_root(capture_group_start_state); m_group_regex_ast->add_to_nfa_with_negative_tags( nfa, diff --git a/src/log_surgeon/finite_automata/RegexNFA.hpp b/src/log_surgeon/finite_automata/RegexNFA.hpp index 374542cd..7fb87d5b 100644 --- a/src/log_surgeon/finite_automata/RegexNFA.hpp +++ b/src/log_surgeon/finite_automata/RegexNFA.hpp @@ -41,7 +41,7 @@ class RegexNFA { * @param dest_state * @return NFAStateType* */ - [[nodiscard]] auto new_state_with_positive_tagged_transition( + [[nodiscard]] auto new_state_with_positive_tagged_end_transition( Tag const* tag, NFAStateType const* dest_state ) -> NFAStateType*; @@ -101,7 +101,7 @@ auto RegexNFA::new_state() -> NFAStateType* { } template -auto RegexNFA::new_state_with_positive_tagged_transition( +auto RegexNFA::new_state_with_positive_tagged_end_transition( Tag const* tag, NFAStateType const* dest_state ) -> NFAStateType* { From f0eb56b1f3d06fd51ec14844b8f6a25631293975 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 19 Nov 2024 17:50:32 -0500 Subject: [PATCH 045/144] Update capture group AST state creation. --- src/log_surgeon/finite_automata/RegexAST.hpp | 6 +++--- src/log_surgeon/finite_automata/RegexNFA.hpp | 16 +++++++++++++++- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index 48ea0313..6e7dceeb 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -897,13 +897,13 @@ void RegexASTCapture::add_to_nfa(RegexNFA* nfa, NFAS // root --(pos_tagged_start_transition)--> capture_group_start_state --> // [inner capture group NFA] --(neg_tagged_transition)--> neg_state --> // state_with_positive_tagged_end_transition --(pos_tagged_end_transition)--> end_state + auto* capture_group_start_state = nfa->new_capture_group_start_state(m_tag.get()); + auto* root = nfa->get_root(); - auto* capture_group_start_state = nfa->new_state(); - root->add_positive_tagged_start_transition(m_tag.get(), capture_group_start_state); + nfa->set_root(capture_group_start_state); auto* state_with_positive_tagged_end_transition = nfa->new_state_with_positive_tagged_end_transition(m_tag.get(), end_state); - nfa->set_root(capture_group_start_state); m_group_regex_ast->add_to_nfa_with_negative_tags( nfa, state_with_positive_tagged_end_transition diff --git a/src/log_surgeon/finite_automata/RegexNFA.hpp b/src/log_surgeon/finite_automata/RegexNFA.hpp index 7fb87d5b..fdb289ae 100644 --- a/src/log_surgeon/finite_automata/RegexNFA.hpp +++ b/src/log_surgeon/finite_automata/RegexNFA.hpp @@ -35,7 +35,7 @@ class RegexNFA { [[nodiscard]] auto new_state() -> NFAStateType*; /** - * Creates a unique_ptr for an NFA state with a positive tagged transition and adds it to + * Creates a unique_ptr for an NFA state with a positive tagged end transition and adds it to * `m_states`. * @param tag * @param dest_state @@ -46,6 +46,13 @@ class RegexNFA { NFAStateType const* dest_state ) -> NFAStateType*; + /** + * Add an NFA state with in incoming positive tagged start transition from `m_root`. + * @param tag + * @return NFAStateType* + */ + [[nodiscard]] auto new_capture_group_start_state(Tag const* tag) -> NFAStateType*; + /** * Creates a unique_ptr for an NFA state with a negative tagged transition and adds it to * `m_states`. @@ -100,6 +107,13 @@ auto RegexNFA::new_state() -> NFAStateType* { return m_states.back().get(); } +template +auto RegexNFA::new_capture_group_start_state(Tag const* tag) -> NFAStateType* { + auto* state = new_state(); + m_root->add_positive_tagged_start_transition(tag, state); + return state; +} + template auto RegexNFA::new_state_with_positive_tagged_end_transition( Tag const* tag, From a9459154a075e6ce73537e7db8028febf8413b66 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 19 Nov 2024 18:27:34 -0500 Subject: [PATCH 046/144] Encapsulate new state for capture group. --- src/log_surgeon/finite_automata/RegexAST.hpp | 28 ++++++-------- src/log_surgeon/finite_automata/RegexNFA.hpp | 40 +++++++++++++------- 2 files changed, 38 insertions(+), 30 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index 6e7dceeb..440c3a89 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -693,11 +693,11 @@ class RegexASTCapture : public RegexAST { /** * Adds the needed `RegexNFA::states` to the passed in nfa to handle a - * `RegexASTCapture` before transitioning to an accepting `end_state`. + * `RegexASTCapture` before transitioning to a `dest_state`. * @param nfa - * @param end_state + * @param dest_state */ - auto add_to_nfa(RegexNFA* nfa, NFAStateType* end_state) const -> void override; + auto add_to_nfa(RegexNFA* nfa, NFAStateType* dest_state) const -> void override; [[nodiscard]] auto serialize() const -> std::u32string override; @@ -892,23 +892,19 @@ template } template -void RegexASTCapture::add_to_nfa(RegexNFA* nfa, NFAStateType* end_state) - const { +void RegexASTCapture::add_to_nfa( + RegexNFA* nfa, + NFAStateType* dest_state +) const { // root --(pos_tagged_start_transition)--> capture_group_start_state --> // [inner capture group NFA] --(neg_tagged_transition)--> neg_state --> // state_with_positive_tagged_end_transition --(pos_tagged_end_transition)--> end_state - auto* capture_group_start_state = nfa->new_capture_group_start_state(m_tag.get()); - - auto* root = nfa->get_root(); - nfa->set_root(capture_group_start_state); + auto [start_state, end_state] = nfa->new_capture_group_start_states(m_tag.get(), dest_state); - auto* state_with_positive_tagged_end_transition - = nfa->new_state_with_positive_tagged_end_transition(m_tag.get(), end_state); - m_group_regex_ast->add_to_nfa_with_negative_tags( - nfa, - state_with_positive_tagged_end_transition - ); - nfa->set_root(root); + auto* initial_root = nfa->get_root(); + nfa->set_root(start_state); + m_group_regex_ast->add_to_nfa_with_negative_tags(nfa, end_state); + nfa->set_root(initial_root); } template diff --git a/src/log_surgeon/finite_automata/RegexNFA.hpp b/src/log_surgeon/finite_automata/RegexNFA.hpp index fdb289ae..95d18615 100644 --- a/src/log_surgeon/finite_automata/RegexNFA.hpp +++ b/src/log_surgeon/finite_automata/RegexNFA.hpp @@ -46,13 +46,6 @@ class RegexNFA { NFAStateType const* dest_state ) -> NFAStateType*; - /** - * Add an NFA state with in incoming positive tagged start transition from `m_root`. - * @param tag - * @return NFAStateType* - */ - [[nodiscard]] auto new_capture_group_start_state(Tag const* tag) -> NFAStateType*; - /** * Creates a unique_ptr for an NFA state with a negative tagged transition and adds it to * `m_states`. @@ -65,6 +58,19 @@ class RegexNFA { NFAStateType const* dest_state ) -> NFAStateType*; + /** + * Add two NFA states for a capture group: + * 1. A start state: `m_root` --(start `tag`)--> start_state. + * 2. An end state: end_state --(end `tag`)--> `dest_state`. + * @param tag + * @param dest_state + * @return std::pair + */ + [[nodiscard]] auto new_capture_group_start_states( + Tag const* tag, + NFAStateType const* dest_state + ) -> std::pair; + /** * @return A vector representing the traversal order of the NFA states using breadth-first * search (BFS). @@ -107,13 +113,6 @@ auto RegexNFA::new_state() -> NFAStateType* { return m_states.back().get(); } -template -auto RegexNFA::new_capture_group_start_state(Tag const* tag) -> NFAStateType* { - auto* state = new_state(); - m_root->add_positive_tagged_start_transition(tag, state); - return state; -} - template auto RegexNFA::new_state_with_positive_tagged_end_transition( Tag const* tag, @@ -132,6 +131,19 @@ auto RegexNFA::new_state_with_negative_tagged_transition( return m_states.back().get(); } +template +auto RegexNFA::new_capture_group_start_states( + Tag const* tag, + NFAStateType const* dest_state +) -> std::pair { + auto* start_state = new_state(); + m_root->add_positive_tagged_start_transition(tag, start_state); + + auto* end_state = new_state_with_positive_tagged_transition(tag, dest_state); + + return {start_state, end_state}; +} + template auto RegexNFA::get_bfs_traversal_order() const -> std::vector { std::queue state_queue; From c757deda4859a787908c1e899213de3fcb423699 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 19 Nov 2024 18:30:40 -0500 Subject: [PATCH 047/144] Fix compiler error. --- src/log_surgeon/finite_automata/RegexNFA.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/RegexNFA.hpp b/src/log_surgeon/finite_automata/RegexNFA.hpp index 95d18615..e9638f12 100644 --- a/src/log_surgeon/finite_automata/RegexNFA.hpp +++ b/src/log_surgeon/finite_automata/RegexNFA.hpp @@ -139,7 +139,7 @@ auto RegexNFA::new_capture_group_start_states( auto* start_state = new_state(); m_root->add_positive_tagged_start_transition(tag, start_state); - auto* end_state = new_state_with_positive_tagged_transition(tag, dest_state); + auto* end_state = new_state_with_positive_tagged_end_transition(tag, dest_state); return {start_state, end_state}; } From 5bb19169d260b1d902d79510475b6898073a219c Mon Sep 17 00:00:00 2001 From: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> Date: Tue, 19 Nov 2024 22:29:20 -0500 Subject: [PATCH 048/144] ci: Add GH workflow to validate PR titles follow Conventional Commits. (#51) Co-authored-by: kirkrodrigues <2454684+kirkrodrigues@users.noreply.github.com> --- .github/PULL_REQUEST_TEMPLATE.md | 10 ++++++++-- .github/workflows/pr-title-checks.yaml | 25 +++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/pr-title-checks.yaml diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 9672f6da..6b6cd51d 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,5 +1,11 @@ -# References - + # Description diff --git a/.github/workflows/pr-title-checks.yaml b/.github/workflows/pr-title-checks.yaml new file mode 100644 index 00000000..1d65f1e0 --- /dev/null +++ b/.github/workflows/pr-title-checks.yaml @@ -0,0 +1,25 @@ +name: "pr-title-checks" + +on: + pull_request_target: + types: ["edited", "opened", "reopened"] + branches: ["main"] + +permissions: {} + +concurrency: + group: "${{github.workflow}}-${{github.ref}}" + + # Cancel in-progress jobs for efficiency + cancel-in-progress: true + +jobs: + conventional-commits: + permissions: + # For amannn/action-semantic-pull-request + pull-requests: "read" + runs-on: "ubuntu-latest" + steps: + - uses: "amannn/action-semantic-pull-request@v5" + env: + GITHUB_TOKEN: "${{secrets.GITHUB_TOKEN}}" From 2eb74772b267aabd72782dd7bd193418088ceb97 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 20 Nov 2024 04:31:47 -0500 Subject: [PATCH 049/144] Use singular for end transition getter function. --- src/log_surgeon/Lexer.tpp | 2 +- src/log_surgeon/finite_automata/RegexNFA.hpp | 2 +- src/log_surgeon/finite_automata/RegexNFAState.hpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/log_surgeon/Lexer.tpp b/src/log_surgeon/Lexer.tpp index 45524ed3..8a8aeb33 100644 --- a/src/log_surgeon/Lexer.tpp +++ b/src/log_surgeon/Lexer.tpp @@ -411,7 +411,7 @@ auto Lexer::epsilon_closure(NFAStateType const* stat stack.push(positive_tagged_start_transition.get_dest_state()); } auto const& optional_positive_tagged_end_transition - = current_state->get_positive_tagged_end_transitions(); + = current_state->get_positive_tagged_end_transition(); if (optional_positive_tagged_end_transition.has_value()) { stack.push(optional_positive_tagged_end_transition.value().get_dest_state()); } diff --git a/src/log_surgeon/finite_automata/RegexNFA.hpp b/src/log_surgeon/finite_automata/RegexNFA.hpp index e9638f12..1dbd8810 100644 --- a/src/log_surgeon/finite_automata/RegexNFA.hpp +++ b/src/log_surgeon/finite_automata/RegexNFA.hpp @@ -180,7 +180,7 @@ auto RegexNFA::get_bfs_traversal_order() const -> std::vectorget_positive_tagged_end_transitions(); + = current_state->get_positive_tagged_end_transition(); if (optional_positive_tagged_end_transition.has_value()) { add_to_queue_and_visited(optional_positive_tagged_end_transition.value().get_dest_state( )); diff --git a/src/log_surgeon/finite_automata/RegexNFAState.hpp b/src/log_surgeon/finite_automata/RegexNFAState.hpp index 5d440551..a28d35a5 100644 --- a/src/log_surgeon/finite_automata/RegexNFAState.hpp +++ b/src/log_surgeon/finite_automata/RegexNFAState.hpp @@ -59,7 +59,7 @@ class RegexNFAState { return m_positive_tagged_start_transitions; } - [[nodiscard]] auto get_positive_tagged_end_transitions( + [[nodiscard]] auto get_positive_tagged_end_transition( ) const -> std::optional> const& { return m_positive_tagged_end_transition; } From 08060ed2c241c9d9a91fe73ab300a7303385a5a7 Mon Sep 17 00:00:00 2001 From: Sharaf Mohamed Date: Wed, 20 Nov 2024 05:09:26 -0500 Subject: [PATCH 050/144] Void to auto -> void. Co-authored-by: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> --- src/log_surgeon/finite_automata/RegexAST.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index 440c3a89..56ff2123 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -892,10 +892,10 @@ template } template -void RegexASTCapture::add_to_nfa( +auto RegexASTCapture::add_to_nfa( RegexNFA* nfa, NFAStateType* dest_state -) const { +) const -> void { // root --(pos_tagged_start_transition)--> capture_group_start_state --> // [inner capture group NFA] --(neg_tagged_transition)--> neg_state --> // state_with_positive_tagged_end_transition --(pos_tagged_end_transition)--> end_state From 0c2c1d1df227c188200fb1c35ad6ebbed2549df6 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 20 Nov 2024 05:16:05 -0500 Subject: [PATCH 051/144] Update new_capture_group_start_states to new_capture_group_states to reflect functionality change. --- src/log_surgeon/finite_automata/RegexAST.hpp | 2 +- src/log_surgeon/finite_automata/RegexNFA.hpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index 56ff2123..75d1b220 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -899,7 +899,7 @@ auto RegexASTCapture::add_to_nfa( // root --(pos_tagged_start_transition)--> capture_group_start_state --> // [inner capture group NFA] --(neg_tagged_transition)--> neg_state --> // state_with_positive_tagged_end_transition --(pos_tagged_end_transition)--> end_state - auto [start_state, end_state] = nfa->new_capture_group_start_states(m_tag.get(), dest_state); + auto [start_state, end_state] = nfa->new_capture_group_states(m_tag.get(), dest_state); auto* initial_root = nfa->get_root(); nfa->set_root(start_state); diff --git a/src/log_surgeon/finite_automata/RegexNFA.hpp b/src/log_surgeon/finite_automata/RegexNFA.hpp index 1dbd8810..0fd2b9af 100644 --- a/src/log_surgeon/finite_automata/RegexNFA.hpp +++ b/src/log_surgeon/finite_automata/RegexNFA.hpp @@ -66,7 +66,7 @@ class RegexNFA { * @param dest_state * @return std::pair */ - [[nodiscard]] auto new_capture_group_start_states( + [[nodiscard]] auto new_capture_group_states( Tag const* tag, NFAStateType const* dest_state ) -> std::pair; @@ -132,7 +132,7 @@ auto RegexNFA::new_state_with_negative_tagged_transition( } template -auto RegexNFA::new_capture_group_start_states( +auto RegexNFA::new_capture_group_states( Tag const* tag, NFAStateType const* dest_state ) -> std::pair { From b0b951a57ad218a1261c15093cac1d6c701b9506 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 20 Nov 2024 05:17:17 -0500 Subject: [PATCH 052/144] Linter. --- src/log_surgeon/finite_automata/RegexNFA.hpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexNFA.hpp b/src/log_surgeon/finite_automata/RegexNFA.hpp index 0fd2b9af..05ba6cb9 100644 --- a/src/log_surgeon/finite_automata/RegexNFA.hpp +++ b/src/log_surgeon/finite_automata/RegexNFA.hpp @@ -66,10 +66,8 @@ class RegexNFA { * @param dest_state * @return std::pair */ - [[nodiscard]] auto new_capture_group_states( - Tag const* tag, - NFAStateType const* dest_state - ) -> std::pair; + [[nodiscard]] auto new_capture_group_states(Tag const* tag, NFAStateType const* dest_state) + -> std::pair; /** * @return A vector representing the traversal order of the NFA states using breadth-first From 3c2a2abde7ee23b29e0c7b471bbcee9ee4c7c4c3 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 20 Nov 2024 15:11:45 -0500 Subject: [PATCH 053/144] Update docstring for . --- src/log_surgeon/finite_automata/RegexNFA.hpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexNFA.hpp b/src/log_surgeon/finite_automata/RegexNFA.hpp index 05ba6cb9..74847c04 100644 --- a/src/log_surgeon/finite_automata/RegexNFA.hpp +++ b/src/log_surgeon/finite_automata/RegexNFA.hpp @@ -59,12 +59,12 @@ class RegexNFA { ) -> NFAStateType*; /** - * Add two NFA states for a capture group: - * 1. A start state: `m_root` --(start `tag`)--> start_state. - * 2. An end state: end_state --(end `tag`)--> `dest_state`. - * @param tag + * Creates the start and end states for a capture group. + * @param tag The tag associated with the capture group. * @param dest_state - * @return std::pair + * @return A pair of states: + * - A new state with a positive tagged start transition from `m_root`. + * - A new state with a positive tagged end transition to `dest_state`. */ [[nodiscard]] auto new_capture_group_states(Tag const* tag, NFAStateType const* dest_state) -> std::pair; @@ -138,7 +138,6 @@ auto RegexNFA::new_capture_group_states( m_root->add_positive_tagged_start_transition(tag, start_state); auto* end_state = new_state_with_positive_tagged_end_transition(tag, dest_state); - return {start_state, end_state}; } From 98c5b95db954405945794701213d5d37a90dac10 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 20 Nov 2024 16:53:04 -0500 Subject: [PATCH 054/144] Rename to new_start_and_end_states_with_positively_tagged_transitions. --- src/log_surgeon/finite_automata/RegexAST.hpp | 6 +++++- src/log_surgeon/finite_automata/RegexNFA.hpp | 8 +++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index 75d1b220..6247c558 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -899,7 +899,11 @@ auto RegexASTCapture::add_to_nfa( // root --(pos_tagged_start_transition)--> capture_group_start_state --> // [inner capture group NFA] --(neg_tagged_transition)--> neg_state --> // state_with_positive_tagged_end_transition --(pos_tagged_end_transition)--> end_state - auto [start_state, end_state] = nfa->new_capture_group_states(m_tag.get(), dest_state); + auto [start_state, end_state] + = nfa->new_start_and_end_states_with_positively_tagged_transitions( + m_tag.get(), + dest_state + ); auto* initial_root = nfa->get_root(); nfa->set_root(start_state); diff --git a/src/log_surgeon/finite_automata/RegexNFA.hpp b/src/log_surgeon/finite_automata/RegexNFA.hpp index 74847c04..0e425a64 100644 --- a/src/log_surgeon/finite_automata/RegexNFA.hpp +++ b/src/log_surgeon/finite_automata/RegexNFA.hpp @@ -66,8 +66,10 @@ class RegexNFA { * - A new state with a positive tagged start transition from `m_root`. * - A new state with a positive tagged end transition to `dest_state`. */ - [[nodiscard]] auto new_capture_group_states(Tag const* tag, NFAStateType const* dest_state) - -> std::pair; + [[nodiscard]] auto new_start_and_end_states_with_positively_tagged_transitions( + Tag const* tag, + NFAStateType const* dest_state + ) -> std::pair; /** * @return A vector representing the traversal order of the NFA states using breadth-first @@ -130,7 +132,7 @@ auto RegexNFA::new_state_with_negative_tagged_transition( } template -auto RegexNFA::new_capture_group_states( +auto RegexNFA::new_start_and_end_states_with_positively_tagged_transitions( Tag const* tag, NFAStateType const* dest_state ) -> std::pair { From f59cf41ccf2b51fbadd8b47a18e2b16532d2d3f0 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 20 Nov 2024 17:06:43 -0500 Subject: [PATCH 055/144] Rename to capture_X_state. --- src/log_surgeon/finite_automata/RegexAST.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index 6247c558..3b432e96 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -899,15 +899,15 @@ auto RegexASTCapture::add_to_nfa( // root --(pos_tagged_start_transition)--> capture_group_start_state --> // [inner capture group NFA] --(neg_tagged_transition)--> neg_state --> // state_with_positive_tagged_end_transition --(pos_tagged_end_transition)--> end_state - auto [start_state, end_state] + auto [capture_start_state, capture_end_state] = nfa->new_start_and_end_states_with_positively_tagged_transitions( m_tag.get(), dest_state ); auto* initial_root = nfa->get_root(); - nfa->set_root(start_state); - m_group_regex_ast->add_to_nfa_with_negative_tags(nfa, end_state); + nfa->set_root(capture_start_state); + m_group_regex_ast->add_to_nfa_with_negative_tags(nfa, capture_end_state); nfa->set_root(initial_root); } From 85a2d69de00aee0bc8b009b658b879636cd8433b Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 20 Nov 2024 17:16:03 -0500 Subject: [PATCH 056/144] Update docstring. --- src/log_surgeon/finite_automata/RegexNFA.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/RegexNFA.hpp b/src/log_surgeon/finite_automata/RegexNFA.hpp index 0e425a64..7f6ebbaf 100644 --- a/src/log_surgeon/finite_automata/RegexNFA.hpp +++ b/src/log_surgeon/finite_automata/RegexNFA.hpp @@ -39,7 +39,7 @@ class RegexNFA { * `m_states`. * @param tag * @param dest_state - * @return NFAStateType* + * @return A new state with a positive tagged end transition to `dest_state`. */ [[nodiscard]] auto new_state_with_positive_tagged_end_transition( Tag const* tag, From 4c602d485027a7c03b912dc7ffe0b87b18024cf4 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 20 Nov 2024 17:52:20 -0500 Subject: [PATCH 057/144] Updated diagram to match vars used in code. --- src/log_surgeon/finite_automata/RegexAST.hpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index 3b432e96..b9a8329e 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -896,9 +896,8 @@ auto RegexASTCapture::add_to_nfa( RegexNFA* nfa, NFAStateType* dest_state ) const -> void { - // root --(pos_tagged_start_transition)--> capture_group_start_state --> - // [inner capture group NFA] --(neg_tagged_transition)--> neg_state --> - // state_with_positive_tagged_end_transition --(pos_tagged_end_transition)--> end_state + // root --(`m_tag` start)--> capture_start_state --> [`m_group_regex_ast` NFA] + // --(`m_negative_tags`)--> capture_end_state --(`m_tag` end)--> dest_state auto [capture_start_state, capture_end_state] = nfa->new_start_and_end_states_with_positively_tagged_transitions( m_tag.get(), From 2b0143334e5d00f74c7ef3d8f8efe00f91be2774 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 20 Nov 2024 17:56:25 -0500 Subject: [PATCH 058/144] Rename vars to serialized_X. --- src/log_surgeon/finite_automata/RegexNFAState.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexNFAState.hpp b/src/log_surgeon/finite_automata/RegexNFAState.hpp index a28d35a5..0fd59c50 100644 --- a/src/log_surgeon/finite_automata/RegexNFAState.hpp +++ b/src/log_surgeon/finite_automata/RegexNFAState.hpp @@ -187,26 +187,26 @@ auto RegexNFAState::serialize( epsilon_transitions.emplace_back(std::to_string(state_ids.at(dest_state))); } - std::vector positive_tagged_start_transition_strings; + std::vector serialized_positive_tagged_start_transitions; for (auto const& positive_tagged_start_transition : m_positive_tagged_start_transitions) { auto const optional_serialized_positive_start_transition = positive_tagged_start_transition.serialize(state_ids); if (false == optional_serialized_positive_start_transition.has_value()) { return std::nullopt; } - positive_tagged_start_transition_strings.emplace_back( + serialized_positive_tagged_start_transitions.emplace_back( optional_serialized_positive_start_transition.value() ); } - std::string positive_tagged_end_transition_string; + std::string serialized_positive_tagged_end_transition; if (m_positive_tagged_end_transition.has_value()) { auto const optional_serialized_positive_end_transition = m_positive_tagged_end_transition.value().serialize(state_ids); if (false == optional_serialized_positive_end_transition.has_value()) { return std::nullopt; } - positive_tagged_end_transition_string = optional_serialized_positive_end_transition.value(); + serialized_positive_tagged_end_transition = optional_serialized_positive_end_transition.value(); } std::string negative_tagged_transition_string; @@ -230,8 +230,8 @@ auto RegexNFAState::serialize( accepting_tag_string, fmt::join(byte_transitions, ","), fmt::join(epsilon_transitions, ","), - fmt::join(positive_tagged_start_transition_strings, ","), - positive_tagged_end_transition_string, + fmt::join(serialized_positive_tagged_start_transitions, ","), + serialized_positive_tagged_end_transition, negative_tagged_transition_string ); } From e37b29a33e685fa7b7c8bba6df9b139b03f4c932 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 20 Nov 2024 18:05:50 -0500 Subject: [PATCH 059/144] Run Linter. --- src/log_surgeon/finite_automata/RegexNFAState.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/RegexNFAState.hpp b/src/log_surgeon/finite_automata/RegexNFAState.hpp index 0fd59c50..8fce8cf7 100644 --- a/src/log_surgeon/finite_automata/RegexNFAState.hpp +++ b/src/log_surgeon/finite_automata/RegexNFAState.hpp @@ -206,7 +206,8 @@ auto RegexNFAState::serialize( if (false == optional_serialized_positive_end_transition.has_value()) { return std::nullopt; } - serialized_positive_tagged_end_transition = optional_serialized_positive_end_transition.value(); + serialized_positive_tagged_end_transition + = optional_serialized_positive_end_transition.value(); } std::string negative_tagged_transition_string; From c5beca321728c9a3885652450f3ca1183d105693 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 20 Nov 2024 18:20:20 -0500 Subject: [PATCH 060/144] Fix typo. --- src/log_surgeon/finite_automata/RegexAST.hpp | 2 +- src/log_surgeon/finite_automata/RegexNFA.hpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index b9a8329e..ef55071f 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -899,7 +899,7 @@ auto RegexASTCapture::add_to_nfa( // root --(`m_tag` start)--> capture_start_state --> [`m_group_regex_ast` NFA] // --(`m_negative_tags`)--> capture_end_state --(`m_tag` end)--> dest_state auto [capture_start_state, capture_end_state] - = nfa->new_start_and_end_states_with_positively_tagged_transitions( + = nfa->new_start_and_end_states_with_positive_tagged_transitions( m_tag.get(), dest_state ); diff --git a/src/log_surgeon/finite_automata/RegexNFA.hpp b/src/log_surgeon/finite_automata/RegexNFA.hpp index 7f6ebbaf..ba9791b1 100644 --- a/src/log_surgeon/finite_automata/RegexNFA.hpp +++ b/src/log_surgeon/finite_automata/RegexNFA.hpp @@ -66,7 +66,7 @@ class RegexNFA { * - A new state with a positive tagged start transition from `m_root`. * - A new state with a positive tagged end transition to `dest_state`. */ - [[nodiscard]] auto new_start_and_end_states_with_positively_tagged_transitions( + [[nodiscard]] auto new_start_and_end_states_with_positive_tagged_transitions( Tag const* tag, NFAStateType const* dest_state ) -> std::pair; @@ -132,7 +132,7 @@ auto RegexNFA::new_state_with_negative_tagged_transition( } template -auto RegexNFA::new_start_and_end_states_with_positively_tagged_transitions( +auto RegexNFA::new_start_and_end_states_with_positive_tagged_transitions( Tag const* tag, NFAStateType const* dest_state ) -> std::pair { From fe4a7b33b378b1b07c1743fd3dcdc871303670d1 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 20 Nov 2024 18:51:09 -0500 Subject: [PATCH 061/144] Update diagram for capture group NFA. --- src/log_surgeon/finite_automata/RegexAST.hpp | 33 ++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index ef55071f..beeb588e 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -896,8 +896,37 @@ auto RegexASTCapture::add_to_nfa( RegexNFA* nfa, NFAStateType* dest_state ) const -> void { - // root --(`m_tag` start)--> capture_start_state --> [`m_group_regex_ast` NFA] - // --(`m_negative_tags`)--> capture_end_state --(`m_tag` end)--> dest_state + // TODO: move this into a documentation file in the future, and reference it here. + // The NFA constructed for a capture group follows the structure below, with tagged transitions + // explicitly labeled for clarity: + // +---------------------+ + // | `m_root` | + // +---------------------+ + // | `m_tag` start + // | (positive tagged start transition) + // v + // +---------------------+ + // |`capture_start_state`| + // +---------------------+ + // | + // | (epsilon transition) + // v + // +---------------------+ + // | `m_group_regex_ast` | + // | (nested NFA) | + // +---------------------+ + // | `m_negative_tags` + // | (negative tagged transition) + // v + // +---------------------+ + // | `capture_end_state` | + // +---------------------+ + // | `m_tag` end + // | (positive tagged end transition) + // v + // +---------------------+ + // | `dest_state` | + // +---------------------+ auto [capture_start_state, capture_end_state] = nfa->new_start_and_end_states_with_positive_tagged_transitions( m_tag.get(), From 3f1322441d773e35833d57055a616e0040203cfd Mon Sep 17 00:00:00 2001 From: Sharaf Mohamed Date: Wed, 20 Nov 2024 19:18:07 -0500 Subject: [PATCH 062/144] feat: Split NFA positive tags into start and end transitions to encapsulate a capture group. (#50) Co-authored-by: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> --- src/log_surgeon/Lexer.tpp | 12 ++- src/log_surgeon/finite_automata/RegexAST.hpp | 56 ++++++++++-- src/log_surgeon/finite_automata/RegexNFA.hpp | 47 ++++++++-- .../finite_automata/RegexNFAState.hpp | 52 +++++++++--- tests/test-NFA.cpp | 85 +++++++++++++------ 5 files changed, 195 insertions(+), 57 deletions(-) diff --git a/src/log_surgeon/Lexer.tpp b/src/log_surgeon/Lexer.tpp index c7dab9db..8a8aeb33 100644 --- a/src/log_surgeon/Lexer.tpp +++ b/src/log_surgeon/Lexer.tpp @@ -405,11 +405,17 @@ auto Lexer::epsilon_closure(NFAStateType const* stat } // TODO: currently treat tagged transitions as epsilon transitions - for (auto const& positive_tagged_transition : - current_state->get_positive_tagged_transitions()) + for (auto const& positive_tagged_start_transition : + current_state->get_positive_tagged_start_transitions()) { - stack.push(positive_tagged_transition.get_dest_state()); + stack.push(positive_tagged_start_transition.get_dest_state()); } + auto const& optional_positive_tagged_end_transition + = current_state->get_positive_tagged_end_transition(); + if (optional_positive_tagged_end_transition.has_value()) { + stack.push(optional_positive_tagged_end_transition.value().get_dest_state()); + } + auto const& optional_negative_tagged_transition = current_state->get_negative_tagged_transition(); if (optional_negative_tagged_transition.has_value()) { diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index c0c6b04f..beeb588e 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -693,11 +693,11 @@ class RegexASTCapture : public RegexAST { /** * Adds the needed `RegexNFA::states` to the passed in nfa to handle a - * `RegexASTCapture` before transitioning to an accepting `end_state`. + * `RegexASTCapture` before transitioning to a `dest_state`. * @param nfa - * @param end_state + * @param dest_state */ - auto add_to_nfa(RegexNFA* nfa, NFAStateType* end_state) const -> void override; + auto add_to_nfa(RegexNFA* nfa, NFAStateType* dest_state) const -> void override; [[nodiscard]] auto serialize() const -> std::u32string override; @@ -892,11 +892,51 @@ template } template -void RegexASTCapture::add_to_nfa(RegexNFA* nfa, NFAStateType* end_state) - const { - auto* state_with_positive_tagged_transition - = nfa->new_state_with_positive_tagged_transition(m_tag.get(), end_state); - m_group_regex_ast->add_to_nfa_with_negative_tags(nfa, state_with_positive_tagged_transition); +auto RegexASTCapture::add_to_nfa( + RegexNFA* nfa, + NFAStateType* dest_state +) const -> void { + // TODO: move this into a documentation file in the future, and reference it here. + // The NFA constructed for a capture group follows the structure below, with tagged transitions + // explicitly labeled for clarity: + // +---------------------+ + // | `m_root` | + // +---------------------+ + // | `m_tag` start + // | (positive tagged start transition) + // v + // +---------------------+ + // |`capture_start_state`| + // +---------------------+ + // | + // | (epsilon transition) + // v + // +---------------------+ + // | `m_group_regex_ast` | + // | (nested NFA) | + // +---------------------+ + // | `m_negative_tags` + // | (negative tagged transition) + // v + // +---------------------+ + // | `capture_end_state` | + // +---------------------+ + // | `m_tag` end + // | (positive tagged end transition) + // v + // +---------------------+ + // | `dest_state` | + // +---------------------+ + auto [capture_start_state, capture_end_state] + = nfa->new_start_and_end_states_with_positive_tagged_transitions( + m_tag.get(), + dest_state + ); + + auto* initial_root = nfa->get_root(); + nfa->set_root(capture_start_state); + m_group_regex_ast->add_to_nfa_with_negative_tags(nfa, capture_end_state); + nfa->set_root(initial_root); } template diff --git a/src/log_surgeon/finite_automata/RegexNFA.hpp b/src/log_surgeon/finite_automata/RegexNFA.hpp index 7919a0c6..ba9791b1 100644 --- a/src/log_surgeon/finite_automata/RegexNFA.hpp +++ b/src/log_surgeon/finite_automata/RegexNFA.hpp @@ -35,13 +35,13 @@ class RegexNFA { [[nodiscard]] auto new_state() -> NFAStateType*; /** - * Creates a unique_ptr for an NFA state with a positive tagged transition and adds it to + * Creates a unique_ptr for an NFA state with a positive tagged end transition and adds it to * `m_states`. * @param tag * @param dest_state - * @return NFAStateType* + * @return A new state with a positive tagged end transition to `dest_state`. */ - [[nodiscard]] auto new_state_with_positive_tagged_transition( + [[nodiscard]] auto new_state_with_positive_tagged_end_transition( Tag const* tag, NFAStateType const* dest_state ) -> NFAStateType*; @@ -58,6 +58,19 @@ class RegexNFA { NFAStateType const* dest_state ) -> NFAStateType*; + /** + * Creates the start and end states for a capture group. + * @param tag The tag associated with the capture group. + * @param dest_state + * @return A pair of states: + * - A new state with a positive tagged start transition from `m_root`. + * - A new state with a positive tagged end transition to `dest_state`. + */ + [[nodiscard]] auto new_start_and_end_states_with_positive_tagged_transitions( + Tag const* tag, + NFAStateType const* dest_state + ) -> std::pair; + /** * @return A vector representing the traversal order of the NFA states using breadth-first * search (BFS). @@ -101,7 +114,7 @@ auto RegexNFA::new_state() -> NFAStateType* { } template -auto RegexNFA::new_state_with_positive_tagged_transition( +auto RegexNFA::new_state_with_positive_tagged_end_transition( Tag const* tag, NFAStateType const* dest_state ) -> NFAStateType* { @@ -118,6 +131,18 @@ auto RegexNFA::new_state_with_negative_tagged_transition( return m_states.back().get(); } +template +auto RegexNFA::new_start_and_end_states_with_positive_tagged_transitions( + Tag const* tag, + NFAStateType const* dest_state +) -> std::pair { + auto* start_state = new_state(); + m_root->add_positive_tagged_start_transition(tag, start_state); + + auto* end_state = new_state_with_positive_tagged_end_transition(tag, dest_state); + return {start_state, end_state}; +} + template auto RegexNFA::get_bfs_traversal_order() const -> std::vector { std::queue state_queue; @@ -147,11 +172,19 @@ auto RegexNFA::get_bfs_traversal_order() const -> std::vectorget_epsilon_transitions()) { add_to_queue_and_visited(dest_state); } - for (auto const& positive_tagged_transition : - current_state->get_positive_tagged_transitions()) + for (auto const& positive_tagged_start_transition : + current_state->get_positive_tagged_start_transitions()) { - add_to_queue_and_visited(positive_tagged_transition.get_dest_state()); + add_to_queue_and_visited(positive_tagged_start_transition.get_dest_state()); + } + + auto const& optional_positive_tagged_end_transition + = current_state->get_positive_tagged_end_transition(); + if (optional_positive_tagged_end_transition.has_value()) { + add_to_queue_and_visited(optional_positive_tagged_end_transition.value().get_dest_state( + )); } + auto const& optional_negative_tagged_transition = current_state->get_negative_tagged_transition(); if (optional_negative_tagged_transition.has_value()) { diff --git a/src/log_surgeon/finite_automata/RegexNFAState.hpp b/src/log_surgeon/finite_automata/RegexNFAState.hpp index dd21557b..8fce8cf7 100644 --- a/src/log_surgeon/finite_automata/RegexNFAState.hpp +++ b/src/log_surgeon/finite_automata/RegexNFAState.hpp @@ -32,7 +32,7 @@ class RegexNFAState { RegexNFAState() = default; RegexNFAState(Tag const* tag, RegexNFAState const* dest_state) - : m_positive_tagged_transitions{{tag, dest_state}} {} + : m_positive_tagged_end_transition{PositiveTaggedTransition{tag, dest_state}} {} RegexNFAState(std::vector tags, RegexNFAState const* dest_state) : m_negative_tagged_transition{NegativeTaggedTransition{std::move(tags), dest_state}} {} @@ -49,9 +49,19 @@ class RegexNFAState { return m_matching_variable_id; } - [[nodiscard]] auto get_positive_tagged_transitions( + auto + add_positive_tagged_start_transition(Tag const* tag, RegexNFAState const* dest_state) -> void { + m_positive_tagged_start_transitions.emplace_back(tag, dest_state); + } + + [[nodiscard]] auto get_positive_tagged_start_transitions( ) const -> std::vector> const& { - return m_positive_tagged_transitions; + return m_positive_tagged_start_transitions; + } + + [[nodiscard]] auto get_positive_tagged_end_transition( + ) const -> std::optional> const& { + return m_positive_tagged_end_transition; } [[nodiscard]] auto get_negative_tagged_transition( @@ -100,7 +110,8 @@ class RegexNFAState { private: bool m_accepting{false}; uint32_t m_matching_variable_id{0}; - std::vector> m_positive_tagged_transitions; + std::vector> m_positive_tagged_start_transitions; + std::optional> m_positive_tagged_end_transition; std::optional> m_negative_tagged_transition; std::vector m_epsilon_transitions; std::array, cSizeOfByte> m_bytes_transitions; @@ -176,14 +187,27 @@ auto RegexNFAState::serialize( epsilon_transitions.emplace_back(std::to_string(state_ids.at(dest_state))); } - std::vector positive_tagged_transitions; - for (auto const& positive_tagged_transition : m_positive_tagged_transitions) { - auto const optional_serialized_positive_transition - = positive_tagged_transition.serialize(state_ids); - if (false == optional_serialized_positive_transition.has_value()) { + std::vector serialized_positive_tagged_start_transitions; + for (auto const& positive_tagged_start_transition : m_positive_tagged_start_transitions) { + auto const optional_serialized_positive_start_transition + = positive_tagged_start_transition.serialize(state_ids); + if (false == optional_serialized_positive_start_transition.has_value()) { + return std::nullopt; + } + serialized_positive_tagged_start_transitions.emplace_back( + optional_serialized_positive_start_transition.value() + ); + } + + std::string serialized_positive_tagged_end_transition; + if (m_positive_tagged_end_transition.has_value()) { + auto const optional_serialized_positive_end_transition + = m_positive_tagged_end_transition.value().serialize(state_ids); + if (false == optional_serialized_positive_end_transition.has_value()) { return std::nullopt; } - positive_tagged_transitions.emplace_back(optional_serialized_positive_transition.value()); + serialized_positive_tagged_end_transition + = optional_serialized_positive_end_transition.value(); } std::string negative_tagged_transition_string; @@ -200,13 +224,15 @@ auto RegexNFAState::serialize( = m_accepting ? fmt::format("accepting_tag={},", m_matching_variable_id) : ""; return fmt::format( - "{}:{}byte_transitions={{{}}},epsilon_transitions={{{}}},positive_tagged_transitions={{" - "{}}},negative_tagged_transition={{{}}}", + "{}:{}byte_transitions={{{}}},epsilon_transitions={{{}}},positive_tagged_start_" + "transitions={{{}}},positive_tagged_end_transitions={{{}}},negative_tagged_transition={" + "{{}}}", state_ids.at(this), accepting_tag_string, fmt::join(byte_transitions, ","), fmt::join(epsilon_transitions, ","), - fmt::join(positive_tagged_transitions, ","), + fmt::join(serialized_positive_tagged_start_transitions, ","), + serialized_positive_tagged_end_transition, negative_tagged_transition_string ); } diff --git a/tests/test-NFA.cpp b/tests/test-NFA.cpp index c7a599b2..6a92f4bb 100644 --- a/tests/test-NFA.cpp +++ b/tests/test-NFA.cpp @@ -49,58 +49,91 @@ TEST_CASE("Test NFA", "[NFA]") { // Compare against expected output string expected_serialized_nfa = "0:byte_transitions={A-->1,Z-->2}," "epsilon_transitions={}," - "positive_tagged_transitions={}," + "positive_tagged_start_transitions={}," + "positive_tagged_end_transitions={}," "negative_tagged_transition={}\n"; - expected_serialized_nfa += "1:byte_transitions={a-->3,b-->3,c-->4,d-->4}," + expected_serialized_nfa += "1:byte_transitions={}," "epsilon_transitions={}," - "positive_tagged_transitions={}," + "positive_tagged_start_transitions={3[letter]}," + "positive_tagged_end_transitions={}," "negative_tagged_transition={}\n"; expected_serialized_nfa += "2:byte_transitions={}," "epsilon_transitions={}," - "positive_tagged_transitions={}," - "negative_tagged_transition={5[letter1,letter2,letter,containerID]}\n"; + "positive_tagged_start_transitions={}," + "positive_tagged_end_transitions={}," + "negative_tagged_transition={4[letter1,letter2,letter,containerID]}\n"; expected_serialized_nfa += "3:byte_transitions={}," "epsilon_transitions={}," - "positive_tagged_transitions={6[letter1]}," + "positive_tagged_start_transitions={5[letter1],6[letter2]}," + "positive_tagged_end_transitions={}," "negative_tagged_transition={}\n"; - expected_serialized_nfa += "4:byte_transitions={}," + expected_serialized_nfa += "4:accepting_tag=0,byte_transitions={}," "epsilon_transitions={}," - "positive_tagged_transitions={7[letter2]}," + "positive_tagged_start_transitions={}," + "positive_tagged_end_transitions={}," "negative_tagged_transition={}\n"; - expected_serialized_nfa += "5:accepting_tag=0,byte_transitions={}," + expected_serialized_nfa += "5:byte_transitions={a-->7,b-->7}," "epsilon_transitions={}," - "positive_tagged_transitions={}," + "positive_tagged_start_transitions={}," + "positive_tagged_end_transitions={}," "negative_tagged_transition={}\n"; - expected_serialized_nfa += "6:byte_transitions={}," + expected_serialized_nfa += "6:byte_transitions={c-->8,d-->8}," "epsilon_transitions={}," - "positive_tagged_transitions={}," - "negative_tagged_transition={8[letter2]}\n"; + "positive_tagged_start_transitions={}," + "positive_tagged_end_transitions={}," + "negative_tagged_transition={}\n"; expected_serialized_nfa += "7:byte_transitions={}," "epsilon_transitions={}," - "positive_tagged_transitions={}," - "negative_tagged_transition={8[letter1]}\n"; + "positive_tagged_start_transitions={}," + "positive_tagged_end_transitions={9[letter1]}," + "negative_tagged_transition={}\n"; expected_serialized_nfa += "8:byte_transitions={}," "epsilon_transitions={}," - "positive_tagged_transitions={9[letter]}," + "positive_tagged_start_transitions={}," + "positive_tagged_end_transitions={10[letter2]}," + "negative_tagged_transition={}\n"; + expected_serialized_nfa += "9:byte_transitions={}," + "epsilon_transitions={}," + "positive_tagged_start_transitions={}," + "positive_tagged_end_transitions={}," + "negative_tagged_transition={11[letter2]}\n"; + expected_serialized_nfa += "10:byte_transitions={}," + "epsilon_transitions={}," + "positive_tagged_start_transitions={}," + "positive_tagged_end_transitions={}," + "negative_tagged_transition={11[letter1]}\n"; + expected_serialized_nfa += "11:byte_transitions={}," + "epsilon_transitions={}," + "positive_tagged_start_transitions={}," + "positive_tagged_end_transitions={12[letter]}," + "negative_tagged_transition={}\n"; + expected_serialized_nfa += "12:byte_transitions={B-->13}," + "epsilon_transitions={}," + "positive_tagged_start_transitions={}," + "positive_tagged_end_transitions={}," "negative_tagged_transition={}\n"; - expected_serialized_nfa += "9:byte_transitions={B-->10}," + expected_serialized_nfa += "13:byte_transitions={}," "epsilon_transitions={}," - "positive_tagged_transitions={}," + "positive_tagged_start_transitions={14[containerID]}," + "positive_tagged_end_transitions={}," "negative_tagged_transition={}\n"; - expected_serialized_nfa += "10:byte_transitions={0-->11,1-->11,2-->11,3-->11,4-->11,5-->11,6-->" - "11,7-->11,8-->11,9-->11}," + expected_serialized_nfa += "14:byte_transitions={0-->15,1-->15,2-->15,3-->15,4-->15,5-->15,6-->" + "15,7-->15,8-->15,9-->15}," "epsilon_transitions={}," - "positive_tagged_transitions={}," + "positive_tagged_start_transitions={}," + "positive_tagged_end_transitions={}," "negative_tagged_transition={}\n"; - expected_serialized_nfa += "11:byte_transitions={0-->11,1-->11,2-->11,3-->11,4-->11,5-->11,6-->" - "11,7-->11,8-->11,9-->11}," + expected_serialized_nfa += "15:byte_transitions={0-->15,1-->15,2-->15,3-->15,4-->15,5-->15,6-->" + "15,7-->15,8-->15,9-->15}," "epsilon_transitions={}," - "positive_tagged_transitions={12[containerID]}," + "positive_tagged_start_transitions={}," + "positive_tagged_end_transitions={16[containerID]}," "negative_tagged_transition={}\n"; - expected_serialized_nfa += "12:byte_transitions={C-->5}," + expected_serialized_nfa += "16:byte_transitions={C-->4}," "epsilon_transitions={}," - "positive_tagged_transitions={}," + "positive_tagged_start_transitions={}," + "positive_tagged_end_transitions={}," "negative_tagged_transition={}\n"; // Compare expected and actual line-by-line From 0017512c882a3289ce842100d84b4362792a60a7 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 26 Nov 2024 09:45:56 -0500 Subject: [PATCH 063/144] Add register unit-tests, add PrefixTree with unit-tests. --- CMakeLists.txt | 2 + .../finite_automata/PrefixTree.cpp | 21 ++++++ .../finite_automata/PrefixTree.hpp | 66 +++++++++++++++++++ src/log_surgeon/finite_automata/Register.hpp | 7 +- tests/CMakeLists.txt | 4 +- tests/test-prefix-tree.cpp | 36 ++++++++++ tests/test-register.cpp | 19 ++++++ 7 files changed, 150 insertions(+), 5 deletions(-) create mode 100644 src/log_surgeon/finite_automata/PrefixTree.cpp create mode 100644 src/log_surgeon/finite_automata/PrefixTree.hpp create mode 100644 tests/test-prefix-tree.cpp create mode 100644 tests/test-register.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 20326d33..93f59208 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -93,6 +93,8 @@ set(SOURCE_FILES src/log_surgeon/SchemaParser.hpp src/log_surgeon/Token.cpp src/log_surgeon/Token.hpp + src/log_surgeon/finite_automata/PrefixTree.cpp + src/log_surgeon/finite_automata/PrefixTree.hpp src/log_surgeon/finite_automata/RegexAST.hpp src/log_surgeon/finite_automata/RegexDFA.hpp src/log_surgeon/finite_automata/RegexDFA.tpp diff --git a/src/log_surgeon/finite_automata/PrefixTree.cpp b/src/log_surgeon/finite_automata/PrefixTree.cpp new file mode 100644 index 00000000..6b39b1bf --- /dev/null +++ b/src/log_surgeon/finite_automata/PrefixTree.cpp @@ -0,0 +1,21 @@ +#include "PrefixTree.hpp" + +#include + +namespace log_surgeon::finite_automata { +[[nodiscard]] auto PrefixTree::get_reversed_positions(uint32_t const index +) const -> std::vector { + if (m_nodes.size() <= index) { + throw std::invalid_argument("Prefix tree index out-of-bounds."); + } + + std::vector reversed_positions; + auto current_index = index; + while(0 < current_index) { + auto const& current_node = m_nodes[current_index]; + reversed_positions.push_back(current_node.get_position()); + current_index = current_node.get_predecessor_index(); + } + return reversed_positions; +} +} // namespace log_surgeon::finite_automata diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp new file mode 100644 index 00000000..dd6b1229 --- /dev/null +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -0,0 +1,66 @@ +#ifndef LOG_SURGEON_FINITE_AUTOMATA_PREFIX_TREE +#define LOG_SURGEON_FINITE_AUTOMATA_PREFIX_TREE + +#include +#include +#include +#include + +namespace log_surgeon::finite_automata { + +/** + * A prefix tree node helps a register represent a tag by storing the current position where a tag + * was matched in the lexxed string, as well as the index of the prefix tree node that stores the + * previous time the tag was matched. + * + * Note: m_position is -1 when a tag is + * unmatched. + */ +class PrefixTreeNode { +public: + PrefixTreeNode(uint32_t const predecessor_index, int32_t const position) + : m_predecessor_index(predecessor_index), + m_position(position) {} + + [[nodiscard]] auto get_predecessor_index() const -> uint32_t { return m_predecessor_index; } + + [[nodiscard]] auto get_position() const -> int32_t { return m_position; } + +private: + uint32_t m_predecessor_index; + int32_t m_position; +}; + +/** + * A prefix tree structure to store positions associated with registers. + * + * PrefixTree stores positions at nodes, and each node can represent a part of a position. + * Multiple positions can be stored at each index in the tree. The tree allows for the addition of + * positions and the retrieval of positions by their associated index. + */ +class PrefixTree { +public: + PrefixTree() : m_nodes{{0, -1}} {} + + /** + * @return The index of the newly inserted node in the tree. + */ + uint32_t insert(uint32_t const predecessor_index, int32_t const position) { + m_nodes.emplace_back(predecessor_index, position); + return m_nodes.size() - 1; + } + + /** + * @param index Representing the leaf node of the register's sub-tree. + * @return The positions, in reverse order, at which the register places the tag in the + * lexed string. + */ + [[nodiscard]] auto get_reversed_positions(uint32_t index) const -> std::vector; + +private: + std::vector m_nodes; +}; + +} // namespace log_surgeon::finite_automata + +#endif // LOG_SURGEON_FINITE_AUTOMATA_PREFIX_TREE diff --git a/src/log_surgeon/finite_automata/Register.hpp b/src/log_surgeon/finite_automata/Register.hpp index d0be4f15..fddb28eb 100644 --- a/src/log_surgeon/finite_automata/Register.hpp +++ b/src/log_surgeon/finite_automata/Register.hpp @@ -2,7 +2,8 @@ #define LOG_SURGEON_FINITE_AUTOMATA_REGISTER #include - +#include +#include #include namespace log_surgeon::finite_automata { @@ -20,9 +21,7 @@ class Register { [[nodiscard]] auto get_tag() const -> Tag* { return m_tag; } - [[nodiscard]] auto get_last_position() const -> uint32_t { return positions.back(); } - - [[nodiscard]] auto get_all_positions() const -> std::vector const& { + [[nodiscard]] auto get_positions() const -> std::vector const& { return positions; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e911ff58..669af769 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -2,6 +2,8 @@ set( SOURCES_LOG_SURGEON ../src/log_surgeon/FileReader.cpp ../src/log_surgeon/FileReader.hpp + ../src/log_surgeon/finite_automata/PrefixTree.cpp + ../src/log_surgeon/finite_automata/PrefixTree.hpp ../src/log_surgeon/finite_automata/RegexAST.hpp ../src/log_surgeon/finite_automata/RegexNFA.hpp ../src/log_surgeon/finite_automata/RegexNFAState.hpp @@ -22,7 +24,7 @@ set( ../src/log_surgeon/Token.hpp ) -set(SOURCES_TESTS test-lexer.cpp test-NFA.cpp test-tag.cpp) +set(SOURCES_TESTS test-lexer.cpp test-NFA.cpp test-prefix-tree.cpp test-register.cpp test-tag.cpp) add_executable(unit-test ${SOURCES_LOG_SURGEON} ${SOURCES_TESTS}) target_link_libraries(unit-test PRIVATE Catch2::Catch2WithMain log_surgeon::log_surgeon) diff --git a/tests/test-prefix-tree.cpp b/tests/test-prefix-tree.cpp new file mode 100644 index 00000000..89d71003 --- /dev/null +++ b/tests/test-prefix-tree.cpp @@ -0,0 +1,36 @@ +#include + +#include + +#include + +using log_surgeon::finite_automata::PrefixTree; + +TEST_CASE("Prefix tree operations", "[PrefixTree]") { + SECTION("Newly constructed tree works correctly") { + PrefixTree const tree; + + REQUIRE(tree.get_reversed_positions(0).empty()); + } + + SECTION("Adding nodes to the prefix tree works correctly") { + PrefixTree tree; + uint32_t index_1 = tree.insert(0, 4); + REQUIRE(std::vector({4}) == tree.get_reversed_positions(index_1)); + + uint32_t index_2 = tree.insert(index_1, 7); + REQUIRE(std::vector({7, 4}) == tree.get_reversed_positions(index_2)); + + uint32_t index_3 = tree.insert(index_2, 9); + REQUIRE(std::vector({9, 7, 4}) == tree.get_reversed_positions(index_3)); + } + + SECTION("Invalid index access throws correctly") { + PrefixTree tree; + REQUIRE_THROWS_AS(tree.get_reversed_positions(1), std::invalid_argument); + + tree.insert(0, 4); + REQUIRE_THROWS_AS(tree.get_reversed_positions(2), std::invalid_argument); + REQUIRE_THROWS_AS(tree.get_reversed_positions(3), std::invalid_argument); + } +} diff --git a/tests/test-register.cpp b/tests/test-register.cpp new file mode 100644 index 00000000..10b9c0a8 --- /dev/null +++ b/tests/test-register.cpp @@ -0,0 +1,19 @@ +#include + +#include + +#include +#include + +using log_surgeon::finite_automata::Register; +using log_surgeon::finite_automata::Tag; +using std::make_unique; +using std::unique_ptr; + +TEST_CASE("Register operations", "[Register]") { + SECTION("Basic tag retrieval works correctly") { + auto const tag = make_unique("uID"); + Register const reg(tag.get()); + REQUIRE(tag.get() == reg.get_tag()); + } +} From 336f2ae6e426b3bfff6e29617ab16f044b0880af Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 26 Nov 2024 11:04:42 -0500 Subject: [PATCH 064/144] Finished with initial register implementation. --- CMakeLists.txt | 2 +- .../finite_automata/PrefixTree.cpp | 2 +- .../finite_automata/PrefixTree.hpp | 20 +++- src/log_surgeon/finite_automata/Register.hpp | 34 ------ .../finite_automata/RegisterHandler.hpp | 112 ++++++++++++++++++ tests/CMakeLists.txt | 4 +- tests/test-prefix-tree.cpp | 22 +++- tests/test-register-handler.cpp | 66 +++++++++++ tests/test-register.cpp | 19 --- 9 files changed, 219 insertions(+), 62 deletions(-) delete mode 100644 src/log_surgeon/finite_automata/Register.hpp create mode 100644 src/log_surgeon/finite_automata/RegisterHandler.hpp create mode 100644 tests/test-register-handler.cpp delete mode 100644 tests/test-register.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 93f59208..117cde51 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,7 +101,7 @@ set(SOURCE_FILES src/log_surgeon/finite_automata/RegexNFA.hpp src/log_surgeon/finite_automata/RegexNFAState.hpp src/log_surgeon/finite_automata/RegexNFAStateType.hpp - src/log_surgeon/finite_automata/Register.hpp + src/log_surgeon/finite_automata/RegisterHandler.hpp src/log_surgeon/finite_automata/Tag.hpp src/log_surgeon/finite_automata/TaggedTransition.hpp src/log_surgeon/finite_automata/UnicodeIntervalTree.hpp diff --git a/src/log_surgeon/finite_automata/PrefixTree.cpp b/src/log_surgeon/finite_automata/PrefixTree.cpp index 6b39b1bf..de52f5be 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.cpp +++ b/src/log_surgeon/finite_automata/PrefixTree.cpp @@ -6,7 +6,7 @@ namespace log_surgeon::finite_automata { [[nodiscard]] auto PrefixTree::get_reversed_positions(uint32_t const index ) const -> std::vector { if (m_nodes.size() <= index) { - throw std::invalid_argument("Prefix tree index out-of-bounds."); + throw std::out_of_range("Prefix tree index out-of-bounds."); } std::vector reversed_positions; diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index dd6b1229..09adc915 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -19,11 +19,13 @@ namespace log_surgeon::finite_automata { class PrefixTreeNode { public: PrefixTreeNode(uint32_t const predecessor_index, int32_t const position) - : m_predecessor_index(predecessor_index), - m_position(position) {} + : m_predecessor_index{predecessor_index}, + m_position{position} {} [[nodiscard]] auto get_predecessor_index() const -> uint32_t { return m_predecessor_index; } + auto set_position(int32_t const position) -> void { m_position = position; } + [[nodiscard]] auto get_position() const -> int32_t { return m_position; } private: @@ -50,10 +52,24 @@ class PrefixTree { return m_nodes.size() - 1; } + /** + * @param index + * @param position + * @throw std::out_of_range("Prefix tree index out-of-bounds."); + */ + auto set(uint32_t const index, int32_t const position) -> void { + if (m_nodes.size() <= index) { + throw std::out_of_range("Prefix tree index out-of-bounds"); + } + + m_nodes[index].set_position(position); + } + /** * @param index Representing the leaf node of the register's sub-tree. * @return The positions, in reverse order, at which the register places the tag in the * lexed string. + * @throw std::out_of_range("Prefix tree index out-of-bounds."); */ [[nodiscard]] auto get_reversed_positions(uint32_t index) const -> std::vector; diff --git a/src/log_surgeon/finite_automata/Register.hpp b/src/log_surgeon/finite_automata/Register.hpp deleted file mode 100644 index fddb28eb..00000000 --- a/src/log_surgeon/finite_automata/Register.hpp +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef LOG_SURGEON_FINITE_AUTOMATA_REGISTER -#define LOG_SURGEON_FINITE_AUTOMATA_REGISTER - -#include -#include -#include -#include - -namespace log_surgeon::finite_automata { -class Register { -public: - explicit Register(Tag* tag) : m_tag{tag} {} - - auto add_pos(uint32_t const pos) -> void { positions.push_back(pos); } - - auto update_last_position(uint32_t const pos) -> void { positions.back() = pos; } - - auto negate_last_position() -> void { positions.pop_back(); } - - auto negate_all_positions() -> void { positions.clear(); } - - [[nodiscard]] auto get_tag() const -> Tag* { return m_tag; } - - [[nodiscard]] auto get_positions() const -> std::vector const& { - return positions; - } - -private: - Tag* m_tag; - std::vector positions; -}; -} // namespace log_surgeon::finite_automata - -#endif // LOG_SURGEON_FINITE_AUTOMATA_REGISTER diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp new file mode 100644 index 00000000..52d464a8 --- /dev/null +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -0,0 +1,112 @@ +#ifndef LOG_SURGEON_FINITE_AUTOMATA_REGISTER +#define LOG_SURGEON_FINITE_AUTOMATA_REGISTER + +#include +#include +#include + +#include + +namespace log_surgeon::finite_automata { +/** + * A register stores an index in the prefix tree. The index node fully represents the register's + * history. + * + * Note: history refers to the previous tag locations. E.g., given the tagged regex "aaa(1\d2)+", + * after parsing input string "aaa123", a register representing tag 1 would contain the history + * {3,4,5}. + */ +class Register { +public: + explicit Register(uint32_t const index) : m_index{index} {} + + auto set_index(uint32_t const index) -> void { m_index = index; } + + [[nodiscard]] auto get_index() const -> uint32_t { return m_index; } + +private: + uint32_t m_index; +}; + +/** + * The register handler maintains a prefix tree that is sufficient to reperesent all registers. + * The register handler also contains a vector of registers, and performs the set, copy, and append + * operations for these registers. + * + * Note: for efficiency these registers may be re-used, but are not required to be re-initialized. + * It is the responsibility of the DFA to set the register value when needed. + */ +class RegisterHandler { +public: + void add_register(uint32_t const predecessor_index, int32_t const position) { + auto const index = prefix_tree.insert(predecessor_index, position); + m_registers.emplace_back(index); + } + + /** + * + * @param register_index + * @param position + * @throws std::out_of_range("Register index out-of-bounds") + */ + void set_register(uint32_t const register_index, int32_t const position) { + if (m_registers.size() <= register_index) { + throw std::out_of_range("Register index out-of-bounds"); + } + + auto const tree_index = m_registers[register_index].get_index(); + prefix_tree.set(tree_index, position); + } + + /** + * @param dest_register_index + * @param source_register_index + * @throws std::out_of_range("Register index out-of-bounds") + */ + void copy_register(uint32_t const dest_register_index, uint32_t const source_register_index) { + if (m_registers.size() <= source_register_index + || m_registers.size() <= dest_register_index) + { + throw std::out_of_range("Register index out of range"); + } + + m_registers[dest_register_index] = m_registers[source_register_index]; + } + + /** + * @param register_index + * @param position + * @throws std::out_of_range("Register index out-of-bounds") + */ + void append_position(uint32_t register_index, int32_t position) { + if (register_index >= m_registers.size()) { + throw std::out_of_range("Register index out of range"); + } + + uint32_t const tree_index = m_registers[register_index].get_index(); + auto const new_index = prefix_tree.insert(tree_index, position); + m_registers[register_index].set_index(new_index); + } + + /** + * @param register_index + * @return Vector of positions representing the history of the given register. + * @throws std::out_of_range("Register index out-of-bounds") + + */ + [[nodiscard]] auto get_reversed_positions(uint32_t const register_index) const -> std::vector { + if (register_index >= m_registers.size()) { + throw std::out_of_range("Register index out of range"); + } + + uint32_t const tree_index = m_registers[register_index].get_index(); + return prefix_tree.get_reversed_positions(tree_index); + } + +private: + PrefixTree prefix_tree; + std::vector m_registers; +}; +} // namespace log_surgeon::finite_automata + +#endif // LOG_SURGEON_FINITE_AUTOMATA_REGISTER diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 669af769..ec974e6b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -8,7 +8,7 @@ set( ../src/log_surgeon/finite_automata/RegexNFA.hpp ../src/log_surgeon/finite_automata/RegexNFAState.hpp ../src/log_surgeon/finite_automata/RegexNFAStateType.hpp - ../src/log_surgeon/finite_automata/Register.hpp + ../src/log_surgeon/finite_automata/RegisterHandler.hpp ../src/log_surgeon/finite_automata/Tag.hpp ../src/log_surgeon/finite_automata/TaggedTransition.hpp ../src/log_surgeon/LALR1Parser.cpp @@ -24,7 +24,7 @@ set( ../src/log_surgeon/Token.hpp ) -set(SOURCES_TESTS test-lexer.cpp test-NFA.cpp test-prefix-tree.cpp test-register.cpp test-tag.cpp) +set(SOURCES_TESTS test-lexer.cpp test-NFA.cpp test-prefix-tree.cpp test-register-handler.cpp test-tag.cpp) add_executable(unit-test ${SOURCES_LOG_SURGEON} ${SOURCES_TESTS}) target_link_libraries(unit-test PRIVATE Catch2::Catch2WithMain log_surgeon::log_surgeon) diff --git a/tests/test-prefix-tree.cpp b/tests/test-prefix-tree.cpp index 89d71003..5d143f97 100644 --- a/tests/test-prefix-tree.cpp +++ b/tests/test-prefix-tree.cpp @@ -27,10 +27,26 @@ TEST_CASE("Prefix tree operations", "[PrefixTree]") { SECTION("Invalid index access throws correctly") { PrefixTree tree; - REQUIRE_THROWS_AS(tree.get_reversed_positions(1), std::invalid_argument); + REQUIRE_THROWS_AS(tree.get_reversed_positions(1), std::out_of_range); tree.insert(0, 4); - REQUIRE_THROWS_AS(tree.get_reversed_positions(2), std::invalid_argument); - REQUIRE_THROWS_AS(tree.get_reversed_positions(3), std::invalid_argument); + REQUIRE_THROWS_AS(tree.get_reversed_positions(2), std::out_of_range); + REQUIRE_THROWS_AS(tree.get_reversed_positions(3), std::out_of_range); + } + + SECTION("Set position for a valid index works correctly") { + PrefixTree tree; + uint32_t index_1 = tree.insert(0, 4); + tree.set(index_1, 10); + REQUIRE(tree.get_reversed_positions(index_1) == std::vector({10})); + + uint32_t index_2 = tree.insert(index_1, 7); + tree.set(index_2, 12); + REQUIRE(tree.get_reversed_positions(index_2) == std::vector({12, 10})); + } + + SECTION("Set position for an invalid index throws correctly") { + PrefixTree tree; + REQUIRE_THROWS_AS(tree.set(100, 20), std::out_of_range); } } diff --git a/tests/test-register-handler.cpp b/tests/test-register-handler.cpp new file mode 100644 index 00000000..d5b6d75a --- /dev/null +++ b/tests/test-register-handler.cpp @@ -0,0 +1,66 @@ +#include + +#include + +#include +#include + +using log_surgeon::finite_automata::Register; +using log_surgeon::finite_automata::RegisterHandler; +using log_surgeon::finite_automata::Tag; +using std::make_unique; +using std::unique_ptr; + +TEST_CASE("Register operations", "[Register]") { + SECTION("Register constructor and getter initializes correctly") { + Register const reg(5); + REQUIRE(reg.get_index() == 5); + } + + SECTION("Register sets index correctly") { + Register reg(5); + reg.set_index(10); + REQUIRE(reg.get_index() == 10); + } +} + +TEST_CASE("RegisterHandler tests", "[RegisterHandler]") { + RegisterHandler handler; + + // This example will have 5 registers each be the next's predecessor. Example tagged regex that + // may lead to this would be "(((((1a)+a)+a)+a)+a)+", whereeach regex represents 1 at a + // different layer of repetition. + constexpr uint32_t num_registers = 5; + for (uint32_t i = 0; i < num_registers; i++) { + handler.add_register(i, 0); + } + + SECTION("Set register position correctly") { + handler.set_register(0, 5); + REQUIRE(std::vector{{5}} == handler.get_reversed_positions(0)); + handler.set_register(0, 10); + REQUIRE(std::vector{{10}} == handler.get_reversed_positions(0)); + handler.set_register(1, 15); + REQUIRE(std::vector{{15, 10}} == handler.get_reversed_positions(1)); + } + + SECTION("Copy register index correctly") { + handler.set_register(0, 5); + handler.copy_register(1, 0); + REQUIRE(std::vector{{5}} == handler.get_reversed_positions(1)); + } + + SECTION("append_position appends position correctly") { + handler.set_register(0, 5); + handler.append_position(0, 7); + REQUIRE(std::vector{{7, 5}} == handler.get_reversed_positions(0)); + } + + SECTION("Throws out-of-bounds correctly") { + REQUIRE_THROWS_AS(handler.set_register(10, 5), std::out_of_range); + REQUIRE_THROWS_AS(handler.copy_register(10, 1), std::out_of_range); + REQUIRE_THROWS_AS(handler.copy_register(0, 10), std::out_of_range); + REQUIRE_THROWS_AS(handler.append_position(10, 5), std::out_of_range); + REQUIRE_THROWS_AS(handler.get_reversed_positions(10), std::out_of_range); + } +} diff --git a/tests/test-register.cpp b/tests/test-register.cpp deleted file mode 100644 index 10b9c0a8..00000000 --- a/tests/test-register.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include - -#include - -#include -#include - -using log_surgeon::finite_automata::Register; -using log_surgeon::finite_automata::Tag; -using std::make_unique; -using std::unique_ptr; - -TEST_CASE("Register operations", "[Register]") { - SECTION("Basic tag retrieval works correctly") { - auto const tag = make_unique("uID"); - Register const reg(tag.get()); - REQUIRE(tag.get() == reg.get_tag()); - } -} From 3449df26b2a3f4f87555c5693d9d190302876c29 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 26 Nov 2024 11:11:52 -0500 Subject: [PATCH 065/144] Linter. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 09adc915..986d4a83 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -53,7 +53,7 @@ class PrefixTree { } /** - * @param index + * @param index * @param position * @throw std::out_of_range("Prefix tree index out-of-bounds."); */ @@ -61,7 +61,7 @@ class PrefixTree { if (m_nodes.size() <= index) { throw std::out_of_range("Prefix tree index out-of-bounds"); } - + m_nodes[index].set_position(position); } From ef62df17a382ab352f29877bef795a1cc9568c2e Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 26 Nov 2024 11:13:38 -0500 Subject: [PATCH 066/144] Linter. --- src/log_surgeon/finite_automata/PrefixTree.cpp | 2 +- src/log_surgeon/finite_automata/RegisterHandler.hpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.cpp b/src/log_surgeon/finite_automata/PrefixTree.cpp index de52f5be..b3296953 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.cpp +++ b/src/log_surgeon/finite_automata/PrefixTree.cpp @@ -11,7 +11,7 @@ namespace log_surgeon::finite_automata { std::vector reversed_positions; auto current_index = index; - while(0 < current_index) { + while (0 < current_index) { auto const& current_node = m_nodes[current_index]; reversed_positions.push_back(current_node.get_position()); current_index = current_node.get_predecessor_index(); diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index 52d464a8..7ddfa573 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -94,7 +94,8 @@ class RegisterHandler { * @throws std::out_of_range("Register index out-of-bounds") */ - [[nodiscard]] auto get_reversed_positions(uint32_t const register_index) const -> std::vector { + [[nodiscard]] auto get_reversed_positions(uint32_t const register_index + ) const -> std::vector { if (register_index >= m_registers.size()) { throw std::out_of_range("Register index out of range"); } From a0856501b4eaad56af4c56f432aeadaef6ec53d5 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 15:04:15 -0500 Subject: [PATCH 067/144] Docstring fixes. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 9 +++++---- src/log_surgeon/finite_automata/RegisterHandler.hpp | 8 ++++---- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 986d4a83..2698fed8 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -66,10 +66,11 @@ class PrefixTree { } /** - * @param index Representing the leaf node of the register's sub-tree. - * @return The positions, in reverse order, at which the register places the tag in the - * lexed string. - * @throw std::out_of_range("Prefix tree index out-of-bounds."); + * Retrieves a vector of positions in reverse order by traversing from the given index to the + * root. + * @param index The index of the node to start the tarversal from. + * @return A vector containing positions in reverse order from the given index to root. + * @throw std::out_of_range if the index is out of bounds */ [[nodiscard]] auto get_reversed_positions(uint32_t index) const -> std::vector; diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index 7ddfa573..fdbd9052 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -47,7 +47,7 @@ class RegisterHandler { * * @param register_index * @param position - * @throws std::out_of_range("Register index out-of-bounds") + * @throw std::out_of_range if the register index is out of bounds */ void set_register(uint32_t const register_index, int32_t const position) { if (m_registers.size() <= register_index) { @@ -61,7 +61,7 @@ class RegisterHandler { /** * @param dest_register_index * @param source_register_index - * @throws std::out_of_range("Register index out-of-bounds") + * @throw std::out_of_range if the register index is out of bounds */ void copy_register(uint32_t const dest_register_index, uint32_t const source_register_index) { if (m_registers.size() <= source_register_index @@ -76,7 +76,7 @@ class RegisterHandler { /** * @param register_index * @param position - * @throws std::out_of_range("Register index out-of-bounds") + * @throw std::out_of_range if the register index is out of bounds */ void append_position(uint32_t register_index, int32_t position) { if (register_index >= m_registers.size()) { @@ -91,7 +91,7 @@ class RegisterHandler { /** * @param register_index * @return Vector of positions representing the history of the given register. - * @throws std::out_of_range("Register index out-of-bounds") + * @throw std::out_of_range if the register index is out of bounds */ [[nodiscard]] auto get_reversed_positions(uint32_t const register_index From 2be06c0b47a95803e33fb7bd16bf22b90014dfd9 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 15:07:07 -0500 Subject: [PATCH 068/144] Add boundry test case. --- tests/test-prefix-tree.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test-prefix-tree.cpp b/tests/test-prefix-tree.cpp index 5d143f97..cf5b9304 100644 --- a/tests/test-prefix-tree.cpp +++ b/tests/test-prefix-tree.cpp @@ -32,6 +32,11 @@ TEST_CASE("Prefix tree operations", "[PrefixTree]") { tree.insert(0, 4); REQUIRE_THROWS_AS(tree.get_reversed_positions(2), std::out_of_range); REQUIRE_THROWS_AS(tree.get_reversed_positions(3), std::out_of_range); + + REQUIRE_THROWS_AS( + tree.get_reversed_positions(std::numeric_limits::max()), + std::out_of_range + ); } SECTION("Set position for a valid index works correctly") { From 9ec01dd8478d2972a5b88e13752afd57819004e1 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 15:17:39 -0500 Subject: [PATCH 069/144] Improve test cases for setting positions in prefix tree. --- tests/test-prefix-tree.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tests/test-prefix-tree.cpp b/tests/test-prefix-tree.cpp index cf5b9304..415a252d 100644 --- a/tests/test-prefix-tree.cpp +++ b/tests/test-prefix-tree.cpp @@ -41,13 +41,25 @@ TEST_CASE("Prefix tree operations", "[PrefixTree]") { SECTION("Set position for a valid index works correctly") { PrefixTree tree; - uint32_t index_1 = tree.insert(0, 4); - tree.set(index_1, 10); - REQUIRE(tree.get_reversed_positions(index_1) == std::vector({10})); + // Test updates to different nodes + uint32_t index_1 = tree.insert(0, 4); uint32_t index_2 = tree.insert(index_1, 7); + tree.set(index_1, 10); tree.set(index_2, 12); + REQUIRE(tree.get_reversed_positions(index_1) == std::vector({10})); REQUIRE(tree.get_reversed_positions(index_2) == std::vector({12, 10})); + + // Test multiple updates to the same node + tree.set(index_2, 15); + tree.set(index_2, 20); + REQUIRE(tree.get_reversed_positions(index_2) == std::vector({20, 10})); + + // Test that updates don't affect unrelated paths + uint32_t index_3 = tree.insert(0, 30); + tree.set(index_3, 25); + REQUIRE(tree.get_reversed_positions(index_1) == std::vector({10})); + REQUIRE(tree.get_reversed_positions(index_2) == std::vector({20, 10})); } SECTION("Set position for an invalid index throws correctly") { From 019e675648953df85cac9fed297baa1ea360a33b Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 15:21:05 -0500 Subject: [PATCH 070/144] Improve test cases for setting invalid positions in prefix tree. --- tests/test-prefix-tree.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test-prefix-tree.cpp b/tests/test-prefix-tree.cpp index 415a252d..6b97f9a7 100644 --- a/tests/test-prefix-tree.cpp +++ b/tests/test-prefix-tree.cpp @@ -64,6 +64,12 @@ TEST_CASE("Prefix tree operations", "[PrefixTree]") { SECTION("Set position for an invalid index throws correctly") { PrefixTree tree; + + // Test setting position before any insertions REQUIRE_THROWS_AS(tree.set(100, 20), std::out_of_range); + + // Test setting position just beyond valid range + uint32_t index_1 = tree.insert(0, 4); + REQUIRE_THROWS_AS(tree.set(index_1 + 1, 20), std::out_of_range); } } From 83a411a4dc807b58bd637dc788a086d6ba753b15 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 15:22:44 -0500 Subject: [PATCH 071/144] Remove confusing description; Remove unused include. --- tests/test-register-handler.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/test-register-handler.cpp b/tests/test-register-handler.cpp index d5b6d75a..9d691c84 100644 --- a/tests/test-register-handler.cpp +++ b/tests/test-register-handler.cpp @@ -3,11 +3,9 @@ #include #include -#include using log_surgeon::finite_automata::Register; using log_surgeon::finite_automata::RegisterHandler; -using log_surgeon::finite_automata::Tag; using std::make_unique; using std::unique_ptr; @@ -27,9 +25,6 @@ TEST_CASE("Register operations", "[Register]") { TEST_CASE("RegisterHandler tests", "[RegisterHandler]") { RegisterHandler handler; - // This example will have 5 registers each be the next's predecessor. Example tagged regex that - // may lead to this would be "(((((1a)+a)+a)+a)+a)+", whereeach regex represents 1 at a - // different layer of repetition. constexpr uint32_t num_registers = 5; for (uint32_t i = 0; i < num_registers; i++) { handler.add_register(i, 0); From c88fbb59f5e715420731b5bb62972a2046516ff9 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 15:27:00 -0500 Subject: [PATCH 072/144] Add edge case test to register unit-tests. --- tests/test-register-handler.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test-register-handler.cpp b/tests/test-register-handler.cpp index 9d691c84..16be1217 100644 --- a/tests/test-register-handler.cpp +++ b/tests/test-register-handler.cpp @@ -20,6 +20,14 @@ TEST_CASE("Register operations", "[Register]") { reg.set_index(10); REQUIRE(reg.get_index() == 10); } + + SECTION("Register handles edge cases correctly") { + Register reg(-1); + REQUIRE(reg.get_index() == -1); + + reg.set_index(std::numeric_limits::max()); + REQUIRE(reg.get_index() == std::numeric_limits::max()); + } } TEST_CASE("RegisterHandler tests", "[RegisterHandler]") { From 7c91ddc4c925cd87b922cda9a1a6fbb6c787712d Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 15:27:26 -0500 Subject: [PATCH 073/144] Update docstring for PrefixTreeNode. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 2698fed8..4c3ee8af 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -13,8 +13,7 @@ namespace log_surgeon::finite_automata { * was matched in the lexxed string, as well as the index of the prefix tree node that stores the * previous time the tag was matched. * - * Note: m_position is -1 when a tag is - * unmatched. + * Note: m_position is -1 to indicate that a tag is currently unmatched in the lexed string. */ class PrefixTreeNode { public: From 4c507695bfa49ab29a38774eac9e80429780d935 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 15:39:30 -0500 Subject: [PATCH 074/144] Add comments to test-case; Add new test case for setting root value. --- tests/test-prefix-tree.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test-prefix-tree.cpp b/tests/test-prefix-tree.cpp index 6b97f9a7..c92f21da 100644 --- a/tests/test-prefix-tree.cpp +++ b/tests/test-prefix-tree.cpp @@ -10,6 +10,7 @@ TEST_CASE("Prefix tree operations", "[PrefixTree]") { SECTION("Newly constructed tree works correctly") { PrefixTree const tree; + // A newly constructed tree should return no positions as the root node is ignored REQUIRE(tree.get_reversed_positions(0).empty()); } @@ -41,6 +42,8 @@ TEST_CASE("Prefix tree operations", "[PrefixTree]") { SECTION("Set position for a valid index works correctly") { PrefixTree tree; + // Test that you can set the root node for sanity, although this value is not used + tree.set(0, 10); // Test updates to different nodes uint32_t index_1 = tree.insert(0, 4); From 98200b47702c9992fbcd75a1b29da8d283464c00 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 15:46:40 -0500 Subject: [PATCH 075/144] Update docstring to make it clear that any negative value of m_position is for unmatched tags. This makes it better defined what any assigned value means. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 4c3ee8af..46870409 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -13,7 +13,7 @@ namespace log_surgeon::finite_automata { * was matched in the lexxed string, as well as the index of the prefix tree node that stores the * previous time the tag was matched. * - * Note: m_position is -1 to indicate that a tag is currently unmatched in the lexed string. + * Note: m_position < 0 indicates that a tag is currently unmatched in the lexed string. */ class PrefixTreeNode { public: From afaf01aa8b2e9a66c27fa33cd3757f3e8c0c5ed0 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 15:52:46 -0500 Subject: [PATCH 076/144] Fix header gaurd. --- src/log_surgeon/finite_automata/RegisterHandler.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index fdbd9052..c9fbe753 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -1,5 +1,5 @@ -#ifndef LOG_SURGEON_FINITE_AUTOMATA_REGISTER -#define LOG_SURGEON_FINITE_AUTOMATA_REGISTER +#ifndef LOG_SURGEON_FINITE_AUTOMATA_REGISTER_HANDLER +#define LOG_SURGEON_FINITE_AUTOMATA_REGISTER_HANDLER #include #include @@ -110,4 +110,4 @@ class RegisterHandler { }; } // namespace log_surgeon::finite_automata -#endif // LOG_SURGEON_FINITE_AUTOMATA_REGISTER +#endif // LOG_SURGEON_FINITE_AUTOMATA_REGISTER_HANDLER From 8dea4769881e729870c9de74fec733927d38acfb Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 15:53:09 -0500 Subject: [PATCH 077/144] Fix typo. --- src/log_surgeon/finite_automata/RegisterHandler.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index c9fbe753..cf785646 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -29,7 +29,7 @@ class Register { }; /** - * The register handler maintains a prefix tree that is sufficient to reperesent all registers. + * The register handler maintains a prefix tree that is sufficient to represent all registers. * The register handler also contains a vector of registers, and performs the set, copy, and append * operations for these registers. * From dbb1e164bba46251e5f9ed8e72428a069fd06077 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 15:53:49 -0500 Subject: [PATCH 078/144] Remove newline in docstring. --- src/log_surgeon/finite_automata/RegisterHandler.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index cf785646..b7880e9d 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -44,7 +44,6 @@ class RegisterHandler { } /** - * * @param register_index * @param position * @throw std::out_of_range if the register index is out of bounds From e0548255ec8f2d245af760268d9e0b21d91d4067 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 15:54:51 -0500 Subject: [PATCH 079/144] Improve throw consistency. --- src/log_surgeon/finite_automata/RegisterHandler.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index b7880e9d..28f9960c 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -50,7 +50,7 @@ class RegisterHandler { */ void set_register(uint32_t const register_index, int32_t const position) { if (m_registers.size() <= register_index) { - throw std::out_of_range("Register index out-of-bounds"); + throw std::out_of_range("Register index out of range"); } auto const tree_index = m_registers[register_index].get_index(); From 792ce9618c90f0015b1b5f17d741ed5da42adee6 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 16:01:55 -0500 Subject: [PATCH 080/144] Update prefix tree insertion test cases. --- tests/test-prefix-tree.cpp | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/tests/test-prefix-tree.cpp b/tests/test-prefix-tree.cpp index c92f21da..ef2a1882 100644 --- a/tests/test-prefix-tree.cpp +++ b/tests/test-prefix-tree.cpp @@ -14,16 +14,26 @@ TEST_CASE("Prefix tree operations", "[PrefixTree]") { REQUIRE(tree.get_reversed_positions(0).empty()); } - SECTION("Adding nodes to the prefix tree works correctly") { + SECTION("Inserting nodes into the prefix tree works correctly") { PrefixTree tree; - uint32_t index_1 = tree.insert(0, 4); - REQUIRE(std::vector({4}) == tree.get_reversed_positions(index_1)); + // Test basic insertions + uint32_t index_1 = tree.insert(0, 4); uint32_t index_2 = tree.insert(index_1, 7); - REQUIRE(std::vector({7, 4}) == tree.get_reversed_positions(index_2)); - uint32_t index_3 = tree.insert(index_2, 9); - REQUIRE(std::vector({9, 7, 4}) == tree.get_reversed_positions(index_3)); + REQUIRE(std::vector{4} == tree.get_reversed_positions(index_1)); + REQUIRE(std::vector{7, 4} == tree.get_reversed_positions(index_2)); + REQUIRE(std::vector{9, 7, 4} == tree.get_reversed_positions(index_3)); + + // Test insertion with large position values + uint32_t index_4 = tree.insert(0, std::numeric_limits::max()); + REQUIRE(std::numeric_limits::max() == tree.get_reversed_positions(index_4)[0]); + + // Test insertion with negative position values + uint32_t index_5 = tree.insert(0, -1); + uint32_t index_6 = tree.insert(index_5, -100); + REQUIRE(std::vector{-1} == tree.get_reversed_positions(index_5)); + REQUIRE(std::vector{-1, -100} == tree.get_reversed_positions(index_6)); } SECTION("Invalid index access throws correctly") { From cab6e811f06c4d5ed5f3ffb0a9d3b203b888f3b5 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 16:02:20 -0500 Subject: [PATCH 081/144] Fix test case. --- tests/test-prefix-tree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-prefix-tree.cpp b/tests/test-prefix-tree.cpp index ef2a1882..2d8822dc 100644 --- a/tests/test-prefix-tree.cpp +++ b/tests/test-prefix-tree.cpp @@ -33,7 +33,7 @@ TEST_CASE("Prefix tree operations", "[PrefixTree]") { uint32_t index_5 = tree.insert(0, -1); uint32_t index_6 = tree.insert(index_5, -100); REQUIRE(std::vector{-1} == tree.get_reversed_positions(index_5)); - REQUIRE(std::vector{-1, -100} == tree.get_reversed_positions(index_6)); + REQUIRE(std::vector{-100, -1} == tree.get_reversed_positions(index_6)); } SECTION("Invalid index access throws correctly") { From ffda5e64e25c381161f28a16667f6c5271685ac7 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 16:21:34 -0500 Subject: [PATCH 082/144] Fix @throws doscstring for consistency; Improve insert() docstring. --- src/log_surgeon/finite_automata/PrefixTree.cpp | 2 +- src/log_surgeon/finite_automata/PrefixTree.hpp | 15 +++++++++++---- .../finite_automata/RegisterHandler.hpp | 8 ++++---- tests/test-register-handler.cpp | 2 +- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.cpp b/src/log_surgeon/finite_automata/PrefixTree.cpp index b3296953..84feccc5 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.cpp +++ b/src/log_surgeon/finite_automata/PrefixTree.cpp @@ -6,7 +6,7 @@ namespace log_surgeon::finite_automata { [[nodiscard]] auto PrefixTree::get_reversed_positions(uint32_t const index ) const -> std::vector { if (m_nodes.size() <= index) { - throw std::out_of_range("Prefix tree index out-of-bounds."); + throw std::out_of_range("Prefix tree index out of range"); } std::vector reversed_positions; diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 46870409..66866901 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -44,9 +44,16 @@ class PrefixTree { PrefixTree() : m_nodes{{0, -1}} {} /** - * @return The index of the newly inserted node in the tree. + * @param predecessor_index Index of the inserted node's predecessor in the prefix tree + * @param position The position in the lexed string + * @return The index of the newly inserted node in the tree + * @throw std::out_of_range if the predecessor index is out of range */ uint32_t insert(uint32_t const predecessor_index, int32_t const position) { + if (m_nodes.size() <= predecessor_index) { + throw std::out_of_range("Predecessor index out of range"); + } + m_nodes.emplace_back(predecessor_index, position); return m_nodes.size() - 1; } @@ -54,11 +61,11 @@ class PrefixTree { /** * @param index * @param position - * @throw std::out_of_range("Prefix tree index out-of-bounds."); + * @throw std::out_of_range if prefix tree index is out of range */ auto set(uint32_t const index, int32_t const position) -> void { if (m_nodes.size() <= index) { - throw std::out_of_range("Prefix tree index out-of-bounds"); + throw std::out_of_range("Prefix tree index out of range"); } m_nodes[index].set_position(position); @@ -69,7 +76,7 @@ class PrefixTree { * root. * @param index The index of the node to start the tarversal from. * @return A vector containing positions in reverse order from the given index to root. - * @throw std::out_of_range if the index is out of bounds + * @throw std::out_of_range if the index is out of range */ [[nodiscard]] auto get_reversed_positions(uint32_t index) const -> std::vector; diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index 28f9960c..1c52fe2c 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -46,7 +46,7 @@ class RegisterHandler { /** * @param register_index * @param position - * @throw std::out_of_range if the register index is out of bounds + * @throw std::out_of_range if the register index is out of range */ void set_register(uint32_t const register_index, int32_t const position) { if (m_registers.size() <= register_index) { @@ -60,7 +60,7 @@ class RegisterHandler { /** * @param dest_register_index * @param source_register_index - * @throw std::out_of_range if the register index is out of bounds + * @throw std::out_of_range if the register index is out of range */ void copy_register(uint32_t const dest_register_index, uint32_t const source_register_index) { if (m_registers.size() <= source_register_index @@ -75,7 +75,7 @@ class RegisterHandler { /** * @param register_index * @param position - * @throw std::out_of_range if the register index is out of bounds + * @throw std::out_of_range if the register index is out of range */ void append_position(uint32_t register_index, int32_t position) { if (register_index >= m_registers.size()) { @@ -90,7 +90,7 @@ class RegisterHandler { /** * @param register_index * @return Vector of positions representing the history of the given register. - * @throw std::out_of_range if the register index is out of bounds + * @throw std::out_of_range if the register index is out of range */ [[nodiscard]] auto get_reversed_positions(uint32_t const register_index diff --git a/tests/test-register-handler.cpp b/tests/test-register-handler.cpp index 16be1217..7d9db03f 100644 --- a/tests/test-register-handler.cpp +++ b/tests/test-register-handler.cpp @@ -59,7 +59,7 @@ TEST_CASE("RegisterHandler tests", "[RegisterHandler]") { REQUIRE(std::vector{{7, 5}} == handler.get_reversed_positions(0)); } - SECTION("Throws out-of-bounds correctly") { + SECTION("Throws out of range correctly") { REQUIRE_THROWS_AS(handler.set_register(10, 5), std::out_of_range); REQUIRE_THROWS_AS(handler.copy_register(10, 1), std::out_of_range); REQUIRE_THROWS_AS(handler.copy_register(0, 10), std::out_of_range); From ff1167224b1d6deab572519b108b52b0d9de1330 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 16:28:35 -0500 Subject: [PATCH 083/144] Improve register handler test coverage. --- tests/test-register-handler.cpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/tests/test-register-handler.cpp b/tests/test-register-handler.cpp index 7d9db03f..2876f8d2 100644 --- a/tests/test-register-handler.cpp +++ b/tests/test-register-handler.cpp @@ -33,6 +33,10 @@ TEST_CASE("Register operations", "[Register]") { TEST_CASE("RegisterHandler tests", "[RegisterHandler]") { RegisterHandler handler; + SECTION("Initial state is empty") { + REQUIRE_THROWS_AS(handler.get_reversed_positions(0), std::out_of_range); + } + constexpr uint32_t num_registers = 5; for (uint32_t i = 0; i < num_registers; i++) { handler.add_register(i, 0); @@ -40,11 +44,16 @@ TEST_CASE("RegisterHandler tests", "[RegisterHandler]") { SECTION("Set register position correctly") { handler.set_register(0, 5); - REQUIRE(std::vector{{5}} == handler.get_reversed_positions(0)); - handler.set_register(0, 10); - REQUIRE(std::vector{{10}} == handler.get_reversed_positions(0)); - handler.set_register(1, 15); - REQUIRE(std::vector{{15, 10}} == handler.get_reversed_positions(1)); + REQUIRE(std::vector{5} == handler.get_reversed_positions(0)); + } + + SECTION("Register relationships are maintained") { + handler.set_register(0, 5); + handler.set_register(1, 10); + handler.set_register(2, 15); + + auto positions = handler.get_reversed_positions(2); + REQUIRE(std::vector{15, 10, 5} == handler.get_reversed_positions(2)); } SECTION("Copy register index correctly") { From 536b50b3a31c2dcc987889e04178f6340e6291fa Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 16:43:47 -0500 Subject: [PATCH 084/144] Fix == ordering in test-cases; Fix vector initialization to remove redundant braces. --- tests/test-prefix-tree.cpp | 10 +++++----- tests/test-register-handler.cpp | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test-prefix-tree.cpp b/tests/test-prefix-tree.cpp index 2d8822dc..4207eec3 100644 --- a/tests/test-prefix-tree.cpp +++ b/tests/test-prefix-tree.cpp @@ -60,19 +60,19 @@ TEST_CASE("Prefix tree operations", "[PrefixTree]") { uint32_t index_2 = tree.insert(index_1, 7); tree.set(index_1, 10); tree.set(index_2, 12); - REQUIRE(tree.get_reversed_positions(index_1) == std::vector({10})); - REQUIRE(tree.get_reversed_positions(index_2) == std::vector({12, 10})); + REQUIRE(std::vector{10} == tree.get_reversed_positions(index_1)); + REQUIRE(std::vector{12, 10} == tree.get_reversed_positions(index_2)); // Test multiple updates to the same node tree.set(index_2, 15); tree.set(index_2, 20); - REQUIRE(tree.get_reversed_positions(index_2) == std::vector({20, 10})); + REQUIRE(std::vector{20, 10} == tree.get_reversed_positions(index_2)); // Test that updates don't affect unrelated paths uint32_t index_3 = tree.insert(0, 30); tree.set(index_3, 25); - REQUIRE(tree.get_reversed_positions(index_1) == std::vector({10})); - REQUIRE(tree.get_reversed_positions(index_2) == std::vector({20, 10})); + REQUIRE(std::vector{10} == tree.get_reversed_positions(index_1)); + REQUIRE(std::vector{20, 10} == tree.get_reversed_positions(index_2)); } SECTION("Set position for an invalid index throws correctly") { diff --git a/tests/test-register-handler.cpp b/tests/test-register-handler.cpp index 2876f8d2..74294134 100644 --- a/tests/test-register-handler.cpp +++ b/tests/test-register-handler.cpp @@ -59,13 +59,13 @@ TEST_CASE("RegisterHandler tests", "[RegisterHandler]") { SECTION("Copy register index correctly") { handler.set_register(0, 5); handler.copy_register(1, 0); - REQUIRE(std::vector{{5}} == handler.get_reversed_positions(1)); + REQUIRE(std::vector{5} == handler.get_reversed_positions(1)); } SECTION("append_position appends position correctly") { handler.set_register(0, 5); handler.append_position(0, 7); - REQUIRE(std::vector{{7, 5}} == handler.get_reversed_positions(0)); + REQUIRE(std::vector{7, 5} == handler.get_reversed_positions(0)); } SECTION("Throws out of range correctly") { From 77c20f7c0b9cfa09c81b547530f8dda34dea4ad8 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 16:52:55 -0500 Subject: [PATCH 085/144] Add const for consistency. --- src/log_surgeon/finite_automata/RegisterHandler.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index 1c52fe2c..7840384b 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -77,7 +77,7 @@ class RegisterHandler { * @param position * @throw std::out_of_range if the register index is out of range */ - void append_position(uint32_t register_index, int32_t position) { + void append_position(uint32_t const register_index, int32_t const position) { if (register_index >= m_registers.size()) { throw std::out_of_range("Register index out of range"); } From f43759c449d0a8168af4a3b39faa4e4f0d34135c Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 16:55:10 -0500 Subject: [PATCH 086/144] Add _HPP to header guards; Remove unused include. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 7 +++---- src/log_surgeon/finite_automata/RegisterHandler.hpp | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 66866901..2a4c3d65 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -1,9 +1,8 @@ -#ifndef LOG_SURGEON_FINITE_AUTOMATA_PREFIX_TREE -#define LOG_SURGEON_FINITE_AUTOMATA_PREFIX_TREE +#ifndef LOG_SURGEON_FINITE_AUTOMATA_PREFIX_TREE_HPP +#define LOG_SURGEON_FINITE_AUTOMATA_PREFIX_TREE_HPP #include #include -#include #include namespace log_surgeon::finite_automata { @@ -86,4 +85,4 @@ class PrefixTree { } // namespace log_surgeon::finite_automata -#endif // LOG_SURGEON_FINITE_AUTOMATA_PREFIX_TREE +#endif // LOG_SURGEON_FINITE_AUTOMATA_PREFIX_TREE_HPP diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index 7840384b..8e4b7607 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -1,5 +1,5 @@ -#ifndef LOG_SURGEON_FINITE_AUTOMATA_REGISTER_HANDLER -#define LOG_SURGEON_FINITE_AUTOMATA_REGISTER_HANDLER +#ifndef LOG_SURGEON_FINITE_AUTOMATA_REGISTER_HANDLER_HPP +#define LOG_SURGEON_FINITE_AUTOMATA_REGISTER_HANDLER_HPP #include #include @@ -109,4 +109,4 @@ class RegisterHandler { }; } // namespace log_surgeon::finite_automata -#endif // LOG_SURGEON_FINITE_AUTOMATA_REGISTER_HANDLER +#endif // LOG_SURGEON_FINITE_AUTOMATA_REGISTER_HANDLER_HPP From 01e8881ce445dac67ef146339e1a0a932a4e0856 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 16:59:46 -0500 Subject: [PATCH 087/144] Fix typo. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 2a4c3d65..5cbf7d15 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -73,7 +73,7 @@ class PrefixTree { /** * Retrieves a vector of positions in reverse order by traversing from the given index to the * root. - * @param index The index of the node to start the tarversal from. + * @param index The index of the node to start the traversal from. * @return A vector containing positions in reverse order from the given index to root. * @throw std::out_of_range if the index is out of range */ From fbb3d362e6c1fe6adae353fa0cfbd40680ae8761 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 17:03:21 -0500 Subject: [PATCH 088/144] Remove blank line. --- src/log_surgeon/finite_automata/RegisterHandler.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index 8e4b7607..76f6d26d 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -90,8 +90,7 @@ class RegisterHandler { /** * @param register_index * @return Vector of positions representing the history of the given register. - * @throw std::out_of_range if the register index is out of range - + * @throw std::out_of_range if the register index is out of range. */ [[nodiscard]] auto get_reversed_positions(uint32_t const register_index ) const -> std::vector { From e1f2b18ee2469ec81802495d2da504a2ecb6719b Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 17:04:36 -0500 Subject: [PATCH 089/144] Rename to m_prefix_tree; Remove unused include. --- src/log_surgeon/finite_automata/RegisterHandler.hpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index 76f6d26d..ce222f0c 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -2,7 +2,6 @@ #define LOG_SURGEON_FINITE_AUTOMATA_REGISTER_HANDLER_HPP #include -#include #include #include @@ -39,7 +38,7 @@ class Register { class RegisterHandler { public: void add_register(uint32_t const predecessor_index, int32_t const position) { - auto const index = prefix_tree.insert(predecessor_index, position); + auto const index = m_prefix_tree.insert(predecessor_index, position); m_registers.emplace_back(index); } @@ -54,7 +53,7 @@ class RegisterHandler { } auto const tree_index = m_registers[register_index].get_index(); - prefix_tree.set(tree_index, position); + m_prefix_tree.set(tree_index, position); } /** @@ -83,7 +82,7 @@ class RegisterHandler { } uint32_t const tree_index = m_registers[register_index].get_index(); - auto const new_index = prefix_tree.insert(tree_index, position); + auto const new_index = m_prefix_tree.insert(tree_index, position); m_registers[register_index].set_index(new_index); } @@ -99,11 +98,11 @@ class RegisterHandler { } uint32_t const tree_index = m_registers[register_index].get_index(); - return prefix_tree.get_reversed_positions(tree_index); + return m_prefix_tree.get_reversed_positions(tree_index); } private: - PrefixTree prefix_tree; + PrefixTree m_prefix_tree; std::vector m_registers; }; } // namespace log_surgeon::finite_automata From a51b49d7d565df5355b7b02073a759da5384c180 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 17:08:31 -0500 Subject: [PATCH 090/144] Add param descriptions to docstrings. --- .../finite_automata/RegisterHandler.hpp | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index ce222f0c..8eba80c2 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -43,9 +43,9 @@ class RegisterHandler { } /** - * @param register_index - * @param position - * @throw std::out_of_range if the register index is out of range + * @param register_index The index of the register to set. + * @param position The position value to set in the register. + * @throw std::out_of_range if the register index is out of range. */ void set_register(uint32_t const register_index, int32_t const position) { if (m_registers.size() <= register_index) { @@ -57,9 +57,9 @@ class RegisterHandler { } /** - * @param dest_register_index - * @param source_register_index - * @throw std::out_of_range if the register index is out of range + * @param dest_register_index The index of the destination register. + * @param source_register_index The index of the source register. + * @throw std::out_of_range if the register index is out of range. */ void copy_register(uint32_t const dest_register_index, uint32_t const source_register_index) { if (m_registers.size() <= source_register_index @@ -72,9 +72,9 @@ class RegisterHandler { } /** - * @param register_index - * @param position - * @throw std::out_of_range if the register index is out of range + * @param register_index The index of the register to append to. + * @param position The position to append to the register's history. + * @throw std::out_of_range if the register index is out of range. */ void append_position(uint32_t const register_index, int32_t const position) { if (register_index >= m_registers.size()) { @@ -87,8 +87,8 @@ class RegisterHandler { } /** - * @param register_index - * @return Vector of positions representing the history of the given register. + * @param register_index The index of the register whose positions are retrieved. + * @return A vector of positions representing the history of the given register. * @throw std::out_of_range if the register index is out of range. */ [[nodiscard]] auto get_reversed_positions(uint32_t const register_index From 002577e4d855ea6307c029b33e9be1575feb60c0 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 17:11:15 -0500 Subject: [PATCH 091/144] Improve out of range check to be consistent. --- src/log_surgeon/finite_automata/RegisterHandler.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index 8eba80c2..be655384 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -77,7 +77,7 @@ class RegisterHandler { * @throw std::out_of_range if the register index is out of range. */ void append_position(uint32_t const register_index, int32_t const position) { - if (register_index >= m_registers.size()) { + if (m_registers.size() <= register_index) { throw std::out_of_range("Register index out of range"); } @@ -93,7 +93,7 @@ class RegisterHandler { */ [[nodiscard]] auto get_reversed_positions(uint32_t const register_index ) const -> std::vector { - if (register_index >= m_registers.size()) { + if (m_registers.size() <= register_index) { throw std::out_of_range("Register index out of range"); } From 52a155c4761115fdac5493ccbcd91dbd2bcadabf Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 17:13:35 -0500 Subject: [PATCH 092/144] Update set docstring. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 5cbf7d15..8cb98fa3 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -58,9 +58,9 @@ class PrefixTree { } /** - * @param index - * @param position - * @throw std::out_of_range if prefix tree index is out of range + * @param index Index of the node to update. + * @param position New position value to set for the node. + * @throw std::out_of_range if prefix tree index is out of range. */ auto set(uint32_t const index, int32_t const position) -> void { if (m_nodes.size() <= index) { From a6beafcaaf0af4d57ea22d88bedac8866c0a685f Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Nov 2024 17:19:28 -0500 Subject: [PATCH 093/144] Punctuate docstrings. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 8cb98fa3..128a112a 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -43,10 +43,10 @@ class PrefixTree { PrefixTree() : m_nodes{{0, -1}} {} /** - * @param predecessor_index Index of the inserted node's predecessor in the prefix tree - * @param position The position in the lexed string - * @return The index of the newly inserted node in the tree - * @throw std::out_of_range if the predecessor index is out of range + * @param predecessor_index Index of the inserted node's predecessor in the prefix tree. + * @param position The position in the lexed string. + * @return The index of the newly inserted node in the tree. + * @throw std::out_of_range if the predecessor index is out of range. */ uint32_t insert(uint32_t const predecessor_index, int32_t const position) { if (m_nodes.size() <= predecessor_index) { @@ -75,7 +75,7 @@ class PrefixTree { * root. * @param index The index of the node to start the traversal from. * @return A vector containing positions in reverse order from the given index to root. - * @throw std::out_of_range if the index is out of range + * @throw std::out_of_range if the index is out of range. */ [[nodiscard]] auto get_reversed_positions(uint32_t index) const -> std::vector; From ec1f7571e7a4ac44b68ecde82a72f26277e08107 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 28 Nov 2024 11:05:22 -0500 Subject: [PATCH 094/144] Update PregixTreeNode docstring. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 128a112a..caabdf29 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -8,11 +8,12 @@ namespace log_surgeon::finite_automata { /** - * A prefix tree node helps a register represent a tag by storing the current position where a tag - * was matched in the lexxed string, as well as the index of the prefix tree node that stores the - * previous time the tag was matched. + * Represents a prefix tree node used by a register to track tag matches in a lexed string. + * This node stores the current position where a tag was matched, as well as the index of the prefix + * tree node corresponding to the previous match of the same tag. * - * Note: m_position < 0 indicates that a tag is currently unmatched in the lexed string. + * Note: A value of m_position < 0 indicates that the tag is currently unmatched in the lexed + * string. */ class PrefixTreeNode { public: From f35741f954b7b43fc33b5da85c914c61d6da7515 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 28 Nov 2024 11:09:24 -0500 Subject: [PATCH 095/144] Improve docstring for PrefixTree. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index caabdf29..363eede6 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -33,11 +33,8 @@ class PrefixTreeNode { }; /** - * A prefix tree structure to store positions associated with registers. - * - * PrefixTree stores positions at nodes, and each node can represent a part of a position. - * Multiple positions can be stored at each index in the tree. The tree allows for the addition of - * positions and the retrieval of positions by their associated index. + * A prefix tree for storing registers. + * Each path from the root to an index represents a sequence of matched tag positions. */ class PrefixTree { public: From e8e5e5545a55052ac63e5549340bfa3911a974cf Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 28 Nov 2024 11:21:01 -0500 Subject: [PATCH 096/144] Change to use auto -> void; Punctuate out_of_range throws. --- src/log_surgeon/finite_automata/PrefixTree.cpp | 2 +- src/log_surgeon/finite_automata/PrefixTree.hpp | 6 +++--- .../finite_automata/RegisterHandler.hpp | 17 +++++++++-------- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.cpp b/src/log_surgeon/finite_automata/PrefixTree.cpp index 84feccc5..c92b2a90 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.cpp +++ b/src/log_surgeon/finite_automata/PrefixTree.cpp @@ -6,7 +6,7 @@ namespace log_surgeon::finite_automata { [[nodiscard]] auto PrefixTree::get_reversed_positions(uint32_t const index ) const -> std::vector { if (m_nodes.size() <= index) { - throw std::out_of_range("Prefix tree index out of range"); + throw std::out_of_range("Prefix tree index out of range."); } std::vector reversed_positions; diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 363eede6..0bf0eb8c 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -46,9 +46,9 @@ class PrefixTree { * @return The index of the newly inserted node in the tree. * @throw std::out_of_range if the predecessor index is out of range. */ - uint32_t insert(uint32_t const predecessor_index, int32_t const position) { + auto insert(uint32_t const predecessor_index, int32_t const position) -> uint32_t { if (m_nodes.size() <= predecessor_index) { - throw std::out_of_range("Predecessor index out of range"); + throw std::out_of_range("Predecessor index out of range."); } m_nodes.emplace_back(predecessor_index, position); @@ -62,7 +62,7 @@ class PrefixTree { */ auto set(uint32_t const index, int32_t const position) -> void { if (m_nodes.size() <= index) { - throw std::out_of_range("Prefix tree index out of range"); + throw std::out_of_range("Prefix tree index out of range."); } m_nodes[index].set_position(position); diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index be655384..53b2882a 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -37,7 +37,7 @@ class Register { */ class RegisterHandler { public: - void add_register(uint32_t const predecessor_index, int32_t const position) { + auto add_register(uint32_t const predecessor_index, int32_t const position) -> void { auto const index = m_prefix_tree.insert(predecessor_index, position); m_registers.emplace_back(index); } @@ -47,9 +47,9 @@ class RegisterHandler { * @param position The position value to set in the register. * @throw std::out_of_range if the register index is out of range. */ - void set_register(uint32_t const register_index, int32_t const position) { + auto set_register(uint32_t const register_index, int32_t const position) -> void { if (m_registers.size() <= register_index) { - throw std::out_of_range("Register index out of range"); + throw std::out_of_range("Register index out of range."); } auto const tree_index = m_registers[register_index].get_index(); @@ -61,11 +61,12 @@ class RegisterHandler { * @param source_register_index The index of the source register. * @throw std::out_of_range if the register index is out of range. */ - void copy_register(uint32_t const dest_register_index, uint32_t const source_register_index) { + auto copy_register(uint32_t const dest_register_index, uint32_t const source_register_index) + -> void { if (m_registers.size() <= source_register_index || m_registers.size() <= dest_register_index) { - throw std::out_of_range("Register index out of range"); + throw std::out_of_range("Register index out of range."); } m_registers[dest_register_index] = m_registers[source_register_index]; @@ -76,9 +77,9 @@ class RegisterHandler { * @param position The position to append to the register's history. * @throw std::out_of_range if the register index is out of range. */ - void append_position(uint32_t const register_index, int32_t const position) { + auto append_position(uint32_t const register_index, int32_t const position) -> void { if (m_registers.size() <= register_index) { - throw std::out_of_range("Register index out of range"); + throw std::out_of_range("Register index out of range."); } uint32_t const tree_index = m_registers[register_index].get_index(); @@ -94,7 +95,7 @@ class RegisterHandler { [[nodiscard]] auto get_reversed_positions(uint32_t const register_index ) const -> std::vector { if (m_registers.size() <= register_index) { - throw std::out_of_range("Register index out of range"); + throw std::out_of_range("Register index out of range."); } uint32_t const tree_index = m_registers[register_index].get_index(); From f1ece306fb4f05b39aa07699c45fb4ccb5d01468 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 28 Nov 2024 11:27:58 -0500 Subject: [PATCH 097/144] Update Register docstring. --- src/log_surgeon/finite_automata/RegisterHandler.hpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index 53b2882a..5a5bfc60 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -8,12 +8,11 @@ namespace log_surgeon::finite_automata { /** - * A register stores an index in the prefix tree. The index node fully represents the register's - * history. + * Represents a register that tracks a sequence of positions where a tag was matched in a lexed + * string. * - * Note: history refers to the previous tag locations. E.g., given the tagged regex "aaa(1\d2)+", - * after parsing input string "aaa123", a register representing tag 1 would contain the history - * {3,4,5}. + * To improve efficiency, registers are stored in a prefix tree. This class holds only the index + * of the prefix tree node that represents the current state of the register. */ class Register { public: From 08997aeebf5180ee3ca274fc82570465b63e7f3e Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 28 Nov 2024 11:28:59 -0500 Subject: [PATCH 098/144] Update PrefixTree docstring. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 0bf0eb8c..5bc77a56 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -33,7 +33,7 @@ class PrefixTreeNode { }; /** - * A prefix tree for storing registers. + * Represents a prefix tree that stores all data needed by registers. * Each path from the root to an index represents a sequence of matched tag positions. */ class PrefixTree { From 0910c626c606601d8902b5767754428a129544c4 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 28 Nov 2024 11:29:49 -0500 Subject: [PATCH 099/144] Grammar fix. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 5bc77a56..1ea9c7af 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -9,8 +9,8 @@ namespace log_surgeon::finite_automata { /** * Represents a prefix tree node used by a register to track tag matches in a lexed string. - * This node stores the current position where a tag was matched, as well as the index of the prefix - * tree node corresponding to the previous match of the same tag. + * This node stores the current position at which a tag was matched, as well as the index of the + * prefix tree node corresponding to the previous match of the same tag. * * Note: A value of m_position < 0 indicates that the tag is currently unmatched in the lexed * string. From ede680e26e051737341591b8ceba2bfd2a0a6cfb Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 28 Nov 2024 11:30:48 -0500 Subject: [PATCH 100/144] Grammar fix. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 1ea9c7af..a7e2639b 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -34,6 +34,7 @@ class PrefixTreeNode { /** * Represents a prefix tree that stores all data needed by registers. + * * Each path from the root to an index represents a sequence of matched tag positions. */ class PrefixTree { From c7b047c53a0918dab6ac536f70640883e92373d5 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 28 Nov 2024 11:51:59 -0500 Subject: [PATCH 101/144] Use auto where possible. --- .../finite_automata/RegisterHandler.hpp | 4 ++-- tests/test-prefix-tree.cpp | 20 +++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index 5a5bfc60..f701e9a3 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -81,7 +81,7 @@ class RegisterHandler { throw std::out_of_range("Register index out of range."); } - uint32_t const tree_index = m_registers[register_index].get_index(); + auto const tree_index = m_registers[register_index].get_index(); auto const new_index = m_prefix_tree.insert(tree_index, position); m_registers[register_index].set_index(new_index); } @@ -97,7 +97,7 @@ class RegisterHandler { throw std::out_of_range("Register index out of range."); } - uint32_t const tree_index = m_registers[register_index].get_index(); + auto const tree_index = m_registers[register_index].get_index(); return m_prefix_tree.get_reversed_positions(tree_index); } diff --git a/tests/test-prefix-tree.cpp b/tests/test-prefix-tree.cpp index 4207eec3..17a41a49 100644 --- a/tests/test-prefix-tree.cpp +++ b/tests/test-prefix-tree.cpp @@ -18,20 +18,20 @@ TEST_CASE("Prefix tree operations", "[PrefixTree]") { PrefixTree tree; // Test basic insertions - uint32_t index_1 = tree.insert(0, 4); - uint32_t index_2 = tree.insert(index_1, 7); - uint32_t index_3 = tree.insert(index_2, 9); + auto const index_1 = tree.insert(0, 4); + auto const index_2 = tree.insert(index_1, 7); + auto const index_3 = tree.insert(index_2, 9); REQUIRE(std::vector{4} == tree.get_reversed_positions(index_1)); REQUIRE(std::vector{7, 4} == tree.get_reversed_positions(index_2)); REQUIRE(std::vector{9, 7, 4} == tree.get_reversed_positions(index_3)); // Test insertion with large position values - uint32_t index_4 = tree.insert(0, std::numeric_limits::max()); + auto const index_4 = tree.insert(0, std::numeric_limits::max()); REQUIRE(std::numeric_limits::max() == tree.get_reversed_positions(index_4)[0]); // Test insertion with negative position values - uint32_t index_5 = tree.insert(0, -1); - uint32_t index_6 = tree.insert(index_5, -100); + auto const index_5 = tree.insert(0, -1); + auto const index_6 = tree.insert(index_5, -100); REQUIRE(std::vector{-1} == tree.get_reversed_positions(index_5)); REQUIRE(std::vector{-100, -1} == tree.get_reversed_positions(index_6)); } @@ -56,8 +56,8 @@ TEST_CASE("Prefix tree operations", "[PrefixTree]") { tree.set(0, 10); // Test updates to different nodes - uint32_t index_1 = tree.insert(0, 4); - uint32_t index_2 = tree.insert(index_1, 7); + auto const index_1 = tree.insert(0, 4); + auto const index_2 = tree.insert(index_1, 7); tree.set(index_1, 10); tree.set(index_2, 12); REQUIRE(std::vector{10} == tree.get_reversed_positions(index_1)); @@ -69,7 +69,7 @@ TEST_CASE("Prefix tree operations", "[PrefixTree]") { REQUIRE(std::vector{20, 10} == tree.get_reversed_positions(index_2)); // Test that updates don't affect unrelated paths - uint32_t index_3 = tree.insert(0, 30); + auto const index_3 = tree.insert(0, 30); tree.set(index_3, 25); REQUIRE(std::vector{10} == tree.get_reversed_positions(index_1)); REQUIRE(std::vector{20, 10} == tree.get_reversed_positions(index_2)); @@ -82,7 +82,7 @@ TEST_CASE("Prefix tree operations", "[PrefixTree]") { REQUIRE_THROWS_AS(tree.set(100, 20), std::out_of_range); // Test setting position just beyond valid range - uint32_t index_1 = tree.insert(0, 4); + auto const index_1 = tree.insert(0, 4); REQUIRE_THROWS_AS(tree.set(index_1 + 1, 20), std::out_of_range); } } From 6fa8fcb3be09cc4490dff4d8fb5ff51792f3e32c Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 2 Dec 2024 10:39:03 -0500 Subject: [PATCH 102/144] Use uniform initialization. --- .../finite_automata/PrefixTree.cpp | 4 ++-- .../finite_automata/RegisterHandler.hpp | 10 +++++----- tests/test-prefix-tree.cpp | 20 +++++++++---------- tests/test-register-handler.cpp | 4 ++-- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.cpp b/src/log_surgeon/finite_automata/PrefixTree.cpp index c92b2a90..0cd0415c 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.cpp +++ b/src/log_surgeon/finite_automata/PrefixTree.cpp @@ -10,9 +10,9 @@ namespace log_surgeon::finite_automata { } std::vector reversed_positions; - auto current_index = index; + auto current_index{index}; while (0 < current_index) { - auto const& current_node = m_nodes[current_index]; + auto const& current_node{m_nodes[current_index]}; reversed_positions.push_back(current_node.get_position()); current_index = current_node.get_predecessor_index(); } diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index f701e9a3..4fc864a1 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -37,7 +37,7 @@ class Register { class RegisterHandler { public: auto add_register(uint32_t const predecessor_index, int32_t const position) -> void { - auto const index = m_prefix_tree.insert(predecessor_index, position); + auto const index{m_prefix_tree.insert(predecessor_index, position)}; m_registers.emplace_back(index); } @@ -51,7 +51,7 @@ class RegisterHandler { throw std::out_of_range("Register index out of range."); } - auto const tree_index = m_registers[register_index].get_index(); + auto const tree_index{m_registers[register_index].get_index()}; m_prefix_tree.set(tree_index, position); } @@ -81,8 +81,8 @@ class RegisterHandler { throw std::out_of_range("Register index out of range."); } - auto const tree_index = m_registers[register_index].get_index(); - auto const new_index = m_prefix_tree.insert(tree_index, position); + auto const tree_index{m_registers[register_index].get_index()}; + auto const new_index{m_prefix_tree.insert(tree_index, position)}; m_registers[register_index].set_index(new_index); } @@ -97,7 +97,7 @@ class RegisterHandler { throw std::out_of_range("Register index out of range."); } - auto const tree_index = m_registers[register_index].get_index(); + auto const tree_index{m_registers[register_index].get_index()}; return m_prefix_tree.get_reversed_positions(tree_index); } diff --git a/tests/test-prefix-tree.cpp b/tests/test-prefix-tree.cpp index 17a41a49..22ad8029 100644 --- a/tests/test-prefix-tree.cpp +++ b/tests/test-prefix-tree.cpp @@ -18,20 +18,20 @@ TEST_CASE("Prefix tree operations", "[PrefixTree]") { PrefixTree tree; // Test basic insertions - auto const index_1 = tree.insert(0, 4); - auto const index_2 = tree.insert(index_1, 7); - auto const index_3 = tree.insert(index_2, 9); + auto const index_1{tree.insert(0, 4)}; + auto const index_2{tree.insert(index_1, 7)}; + auto const index_3{tree.insert(index_2, 9)}; REQUIRE(std::vector{4} == tree.get_reversed_positions(index_1)); REQUIRE(std::vector{7, 4} == tree.get_reversed_positions(index_2)); REQUIRE(std::vector{9, 7, 4} == tree.get_reversed_positions(index_3)); // Test insertion with large position values - auto const index_4 = tree.insert(0, std::numeric_limits::max()); + auto const index_4{tree.insert(0, std::numeric_limits::max())}; REQUIRE(std::numeric_limits::max() == tree.get_reversed_positions(index_4)[0]); // Test insertion with negative position values - auto const index_5 = tree.insert(0, -1); - auto const index_6 = tree.insert(index_5, -100); + auto const index_5{tree.insert(0, -1)}; + auto const index_6{tree.insert(index_5, -100)}; REQUIRE(std::vector{-1} == tree.get_reversed_positions(index_5)); REQUIRE(std::vector{-100, -1} == tree.get_reversed_positions(index_6)); } @@ -56,8 +56,8 @@ TEST_CASE("Prefix tree operations", "[PrefixTree]") { tree.set(0, 10); // Test updates to different nodes - auto const index_1 = tree.insert(0, 4); - auto const index_2 = tree.insert(index_1, 7); + auto const index_1{tree.insert(0, 4)}; + auto const index_2{tree.insert(index_1, 7)}; tree.set(index_1, 10); tree.set(index_2, 12); REQUIRE(std::vector{10} == tree.get_reversed_positions(index_1)); @@ -69,7 +69,7 @@ TEST_CASE("Prefix tree operations", "[PrefixTree]") { REQUIRE(std::vector{20, 10} == tree.get_reversed_positions(index_2)); // Test that updates don't affect unrelated paths - auto const index_3 = tree.insert(0, 30); + auto const index_3{tree.insert(0, 30)}; tree.set(index_3, 25); REQUIRE(std::vector{10} == tree.get_reversed_positions(index_1)); REQUIRE(std::vector{20, 10} == tree.get_reversed_positions(index_2)); @@ -82,7 +82,7 @@ TEST_CASE("Prefix tree operations", "[PrefixTree]") { REQUIRE_THROWS_AS(tree.set(100, 20), std::out_of_range); // Test setting position just beyond valid range - auto const index_1 = tree.insert(0, 4); + auto const index_1{tree.insert(0, 4)}; REQUIRE_THROWS_AS(tree.set(index_1 + 1, 20), std::out_of_range); } } diff --git a/tests/test-register-handler.cpp b/tests/test-register-handler.cpp index 74294134..b55bed40 100644 --- a/tests/test-register-handler.cpp +++ b/tests/test-register-handler.cpp @@ -37,7 +37,7 @@ TEST_CASE("RegisterHandler tests", "[RegisterHandler]") { REQUIRE_THROWS_AS(handler.get_reversed_positions(0), std::out_of_range); } - constexpr uint32_t num_registers = 5; + constexpr uint32_t num_registers{5}; for (uint32_t i = 0; i < num_registers; i++) { handler.add_register(i, 0); } @@ -52,7 +52,7 @@ TEST_CASE("RegisterHandler tests", "[RegisterHandler]") { handler.set_register(1, 10); handler.set_register(2, 15); - auto positions = handler.get_reversed_positions(2); + auto positions{handler.get_reversed_positions(2)}; REQUIRE(std::vector{15, 10, 5} == handler.get_reversed_positions(2)); } From 18b91604e837dd702701216d1457fddebf923855 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 2 Dec 2024 10:43:46 -0500 Subject: [PATCH 103/144] Add missing header. --- src/log_surgeon/finite_automata/RegisterHandler.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index 4fc864a1..a3a5eb56 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -2,6 +2,7 @@ #define LOG_SURGEON_FINITE_AUTOMATA_REGISTER_HANDLER_HPP #include +#include #include #include From 3f08fa3484e588f7d0b064eb0ff37443e569aebf Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 2 Dec 2024 10:44:54 -0500 Subject: [PATCH 104/144] Linter. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index a7e2639b..ed53d835 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -34,7 +34,7 @@ class PrefixTreeNode { /** * Represents a prefix tree that stores all data needed by registers. - * + * * Each path from the root to an index represents a sequence of matched tag positions. */ class PrefixTree { From e281f043528598944a64e4bb202bdf9f16e35fa7 Mon Sep 17 00:00:00 2001 From: Sharaf Mohamed Date: Mon, 2 Dec 2024 10:45:16 -0500 Subject: [PATCH 105/144] Fix spacing. Co-authored-by: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> --- src/log_surgeon/finite_automata/PrefixTree.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index ed53d835..21244eac 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -6,7 +6,6 @@ #include namespace log_surgeon::finite_automata { - /** * Represents a prefix tree node used by a register to track tag matches in a lexed string. * This node stores the current position at which a tag was matched, as well as the index of the From a03734e88c82fbcda477c880f00a379d72a7dfdf Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 2 Dec 2024 10:50:03 -0500 Subject: [PATCH 106/144] Make Node a member of PrefixTree. --- .../finite_automata/PrefixTree.hpp | 49 ++++++++++--------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 21244eac..0bf01c78 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -7,36 +7,37 @@ namespace log_surgeon::finite_automata { /** - * Represents a prefix tree node used by a register to track tag matches in a lexed string. - * This node stores the current position at which a tag was matched, as well as the index of the - * prefix tree node corresponding to the previous match of the same tag. + * Represents a prefix tree that stores all data needed by the TDFA registers. * - * Note: A value of m_position < 0 indicates that the tag is currently unmatched in the lexed - * string. + * Each path from the root to an index represents a sequence of matched tag positions. */ -class PrefixTreeNode { -public: - PrefixTreeNode(uint32_t const predecessor_index, int32_t const position) - : m_predecessor_index{predecessor_index}, - m_position{position} {} +class PrefixTree { + /** + * Represents a prefix tree node. A node stores a potential value for a TDFA register. + * + * A node stores the current position at which a tag was matched, as well as the index of the + * prefix tree node corresponding to the previous match of the same tag. + * + * Note: A value of m_position < 0 indicates that the tag is currently unmatched in the lexed + * string. + */ + class Node { + public: + Node(uint32_t const predecessor_index, int32_t const position) + : m_predecessor_index{predecessor_index}, + m_position{position} {} - [[nodiscard]] auto get_predecessor_index() const -> uint32_t { return m_predecessor_index; } + [[nodiscard]] auto get_predecessor_index() const -> uint32_t { return m_predecessor_index; } - auto set_position(int32_t const position) -> void { m_position = position; } + auto set_position(int32_t const position) -> void { m_position = position; } - [[nodiscard]] auto get_position() const -> int32_t { return m_position; } + [[nodiscard]] auto get_position() const -> int32_t { return m_position; } -private: - uint32_t m_predecessor_index; - int32_t m_position; -}; + private: + uint32_t m_predecessor_index; + int32_t m_position; + }; -/** - * Represents a prefix tree that stores all data needed by registers. - * - * Each path from the root to an index represents a sequence of matched tag positions. - */ -class PrefixTree { public: PrefixTree() : m_nodes{{0, -1}} {} @@ -78,7 +79,7 @@ class PrefixTree { [[nodiscard]] auto get_reversed_positions(uint32_t index) const -> std::vector; private: - std::vector m_nodes; + std::vector m_nodes; }; } // namespace log_surgeon::finite_automata From 9123c7ac191f933241615d3ba96da72220ec1b3e Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 2 Dec 2024 10:55:28 -0500 Subject: [PATCH 107/144] Rename index to prefix_tree_node_id. --- .../finite_automata/RegisterHandler.hpp | 17 +++++++++-------- tests/test-register-handler.cpp | 12 ++++++------ 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index a3a5eb56..39c17054 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -17,14 +17,15 @@ namespace log_surgeon::finite_automata { */ class Register { public: - explicit Register(uint32_t const index) : m_index{index} {} + explicit Register(uint32_t const prefix_tree_node_id) + : m_prefix_tree_node_id{prefix_tree_node_id} {} - auto set_index(uint32_t const index) -> void { m_index = index; } + auto set_prefix_tree_node_id(uint32_t const index) -> void { m_prefix_tree_node_id = index; } - [[nodiscard]] auto get_index() const -> uint32_t { return m_index; } + [[nodiscard]] auto get_prefix_tree_node_id() const -> uint32_t { return m_prefix_tree_node_id; } private: - uint32_t m_index; + uint32_t m_prefix_tree_node_id; }; /** @@ -52,7 +53,7 @@ class RegisterHandler { throw std::out_of_range("Register index out of range."); } - auto const tree_index{m_registers[register_index].get_index()}; + auto const tree_index{m_registers[register_index].get_prefix_tree_node_id()}; m_prefix_tree.set(tree_index, position); } @@ -82,9 +83,9 @@ class RegisterHandler { throw std::out_of_range("Register index out of range."); } - auto const tree_index{m_registers[register_index].get_index()}; + auto const tree_index{m_registers[register_index].get_prefix_tree_node_id()}; auto const new_index{m_prefix_tree.insert(tree_index, position)}; - m_registers[register_index].set_index(new_index); + m_registers[register_index].set_prefix_tree_node_id(new_index); } /** @@ -98,7 +99,7 @@ class RegisterHandler { throw std::out_of_range("Register index out of range."); } - auto const tree_index{m_registers[register_index].get_index()}; + auto const tree_index{m_registers[register_index].get_prefix_tree_node_id()}; return m_prefix_tree.get_reversed_positions(tree_index); } diff --git a/tests/test-register-handler.cpp b/tests/test-register-handler.cpp index b55bed40..90de0edf 100644 --- a/tests/test-register-handler.cpp +++ b/tests/test-register-handler.cpp @@ -12,21 +12,21 @@ using std::unique_ptr; TEST_CASE("Register operations", "[Register]") { SECTION("Register constructor and getter initializes correctly") { Register const reg(5); - REQUIRE(reg.get_index() == 5); + REQUIRE(reg.get_prefix_tree_node_id() == 5); } SECTION("Register sets index correctly") { Register reg(5); - reg.set_index(10); - REQUIRE(reg.get_index() == 10); + reg.set_prefix_tree_node_id(10); + REQUIRE(reg.get_prefix_tree_node_id() == 10); } SECTION("Register handles edge cases correctly") { Register reg(-1); - REQUIRE(reg.get_index() == -1); + REQUIRE(reg.get_prefix_tree_node_id() == -1); - reg.set_index(std::numeric_limits::max()); - REQUIRE(reg.get_index() == std::numeric_limits::max()); + reg.set_prefix_tree_node_id(std::numeric_limits::max()); + REQUIRE(reg.get_prefix_tree_node_id() == std::numeric_limits::max()); } } From fe35fe0cf2759dcd0450f99d56af8fbc3a5c8ce3 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 2 Dec 2024 19:55:10 -0500 Subject: [PATCH 108/144] Make it clear indicies in add_register are refering to prefix_tree nodes. --- src/log_surgeon/finite_automata/RegisterHandler.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index 39c17054..c05fb509 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -38,9 +38,9 @@ class Register { */ class RegisterHandler { public: - auto add_register(uint32_t const predecessor_index, int32_t const position) -> void { - auto const index{m_prefix_tree.insert(predecessor_index, position)}; - m_registers.emplace_back(index); + auto add_register(uint32_t const prefix_tree_parent_node_id, int32_t const position) -> void { + auto const prefix_tree_node_id{m_prefix_tree.insert(prefix_tree_parent_node_id, position)}; + m_registers.emplace_back(prefix_tree_node_id); } /** From de58e088669b2700397d1035733571a83e2e2fab Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 2 Dec 2024 19:56:28 -0500 Subject: [PATCH 109/144] Linter. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 0bf01c78..6bae551e 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -14,7 +14,7 @@ namespace log_surgeon::finite_automata { class PrefixTree { /** * Represents a prefix tree node. A node stores a potential value for a TDFA register. - * + * * A node stores the current position at which a tag was matched, as well as the index of the * prefix tree node corresponding to the previous match of the same tag. * From 1426179489108a497594b83f12dd4b98970d98b9 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 2 Dec 2024 20:05:02 -0500 Subject: [PATCH 110/144] rename to reg_id. --- src/log_surgeon/finite_automata/RegisterHandler.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index c05fb509..f2bc4d30 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -44,16 +44,16 @@ class RegisterHandler { } /** - * @param register_index The index of the register to set. + * @param reg_id The index of the register to set. * @param position The position value to set in the register. * @throw std::out_of_range if the register index is out of range. */ - auto set_register(uint32_t const register_index, int32_t const position) -> void { - if (m_registers.size() <= register_index) { + auto set_register(uint32_t const reg_id, int32_t const position) -> void { + if (m_registers.size() <= reg_id) { throw std::out_of_range("Register index out of range."); } - auto const tree_index{m_registers[register_index].get_prefix_tree_node_id()}; + auto const tree_index{m_registers[reg_id].get_prefix_tree_node_id()}; m_prefix_tree.set(tree_index, position); } From 3301f14a35ca102b4277b6c51fca3990ad7eda73 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 2 Dec 2024 20:12:03 -0500 Subject: [PATCH 111/144] Rename to reg_id. --- src/log_surgeon/finite_automata/RegisterHandler.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index f2bc4d30..c9989af4 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -62,15 +62,15 @@ class RegisterHandler { * @param source_register_index The index of the source register. * @throw std::out_of_range if the register index is out of range. */ - auto copy_register(uint32_t const dest_register_index, uint32_t const source_register_index) + auto copy_register(uint32_t const dest_reg_id, uint32_t const source_reg_id) -> void { - if (m_registers.size() <= source_register_index - || m_registers.size() <= dest_register_index) + if (m_registers.size() <= source_reg_id + || m_registers.size() <= dest_reg_id) { throw std::out_of_range("Register index out of range."); } - m_registers[dest_register_index] = m_registers[source_register_index]; + m_registers[dest_reg_id] = m_registers[source_reg_id]; } /** From c9b1369fa312543cef381b3b976279d2672da332 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 2 Dec 2024 20:36:21 -0500 Subject: [PATCH 112/144] Use at(). --- src/log_surgeon/finite_automata/PrefixTree.hpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 6bae551e..f874a2bb 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -62,11 +62,7 @@ class PrefixTree { * @throw std::out_of_range if prefix tree index is out of range. */ auto set(uint32_t const index, int32_t const position) -> void { - if (m_nodes.size() <= index) { - throw std::out_of_range("Prefix tree index out of range."); - } - - m_nodes[index].set_position(position); + m_nodes.at(index).set_position(position); } /** From e2aee661ea58e02abceff79e466b8f27f69f682e Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 2 Dec 2024 21:14:35 -0500 Subject: [PATCH 113/144] Remove Register class and use uint32_t instead; Rename vers to xxx_reg_id; Remove error checking in favor of using .at(). --- .../finite_automata/RegisterHandler.hpp | 79 ++----------------- tests/test-register-handler.cpp | 22 ------ 2 files changed, 8 insertions(+), 93 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index c9989af4..e736c26a 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -8,26 +8,6 @@ #include namespace log_surgeon::finite_automata { -/** - * Represents a register that tracks a sequence of positions where a tag was matched in a lexed - * string. - * - * To improve efficiency, registers are stored in a prefix tree. This class holds only the index - * of the prefix tree node that represents the current state of the register. - */ -class Register { -public: - explicit Register(uint32_t const prefix_tree_node_id) - : m_prefix_tree_node_id{prefix_tree_node_id} {} - - auto set_prefix_tree_node_id(uint32_t const index) -> void { m_prefix_tree_node_id = index; } - - [[nodiscard]] auto get_prefix_tree_node_id() const -> uint32_t { return m_prefix_tree_node_id; } - -private: - uint32_t m_prefix_tree_node_id; -}; - /** * The register handler maintains a prefix tree that is sufficient to represent all registers. * The register handler also contains a vector of registers, and performs the set, copy, and append @@ -43,69 +23,26 @@ class RegisterHandler { m_registers.emplace_back(prefix_tree_node_id); } - /** - * @param reg_id The index of the register to set. - * @param position The position value to set in the register. - * @throw std::out_of_range if the register index is out of range. - */ auto set_register(uint32_t const reg_id, int32_t const position) -> void { - if (m_registers.size() <= reg_id) { - throw std::out_of_range("Register index out of range."); - } - - auto const tree_index{m_registers[reg_id].get_prefix_tree_node_id()}; - m_prefix_tree.set(tree_index, position); + m_prefix_tree.set(m_registers.at(reg_id), position); } - /** - * @param dest_register_index The index of the destination register. - * @param source_register_index The index of the source register. - * @throw std::out_of_range if the register index is out of range. - */ - auto copy_register(uint32_t const dest_reg_id, uint32_t const source_reg_id) - -> void { - if (m_registers.size() <= source_reg_id - || m_registers.size() <= dest_reg_id) - { - throw std::out_of_range("Register index out of range."); - } - - m_registers[dest_reg_id] = m_registers[source_reg_id]; + auto copy_register(uint32_t const dest_reg_id, uint32_t const source_reg_id) -> void { + m_registers.at(dest_reg_id) = m_registers.at(source_reg_id); } - /** - * @param register_index The index of the register to append to. - * @param position The position to append to the register's history. - * @throw std::out_of_range if the register index is out of range. - */ auto append_position(uint32_t const register_index, int32_t const position) -> void { - if (m_registers.size() <= register_index) { - throw std::out_of_range("Register index out of range."); - } - - auto const tree_index{m_registers[register_index].get_prefix_tree_node_id()}; - auto const new_index{m_prefix_tree.insert(tree_index, position)}; - m_registers[register_index].set_prefix_tree_node_id(new_index); + auto& reg{m_registers.at(register_index)}; + reg = m_prefix_tree.insert(reg, position); } - /** - * @param register_index The index of the register whose positions are retrieved. - * @return A vector of positions representing the history of the given register. - * @throw std::out_of_range if the register index is out of range. - */ - [[nodiscard]] auto get_reversed_positions(uint32_t const register_index - ) const -> std::vector { - if (m_registers.size() <= register_index) { - throw std::out_of_range("Register index out of range."); - } - - auto const tree_index{m_registers[register_index].get_prefix_tree_node_id()}; - return m_prefix_tree.get_reversed_positions(tree_index); + [[nodiscard]] auto get_reversed_positions(uint32_t const reg_id) const -> std::vector { + return m_prefix_tree.get_reversed_positions(m_registers.at(reg_id)); } private: PrefixTree m_prefix_tree; - std::vector m_registers; + std::vector m_registers; }; } // namespace log_surgeon::finite_automata diff --git a/tests/test-register-handler.cpp b/tests/test-register-handler.cpp index 90de0edf..67fa3d95 100644 --- a/tests/test-register-handler.cpp +++ b/tests/test-register-handler.cpp @@ -4,32 +4,10 @@ #include -using log_surgeon::finite_automata::Register; using log_surgeon::finite_automata::RegisterHandler; using std::make_unique; using std::unique_ptr; -TEST_CASE("Register operations", "[Register]") { - SECTION("Register constructor and getter initializes correctly") { - Register const reg(5); - REQUIRE(reg.get_prefix_tree_node_id() == 5); - } - - SECTION("Register sets index correctly") { - Register reg(5); - reg.set_prefix_tree_node_id(10); - REQUIRE(reg.get_prefix_tree_node_id() == 10); - } - - SECTION("Register handles edge cases correctly") { - Register reg(-1); - REQUIRE(reg.get_prefix_tree_node_id() == -1); - - reg.set_prefix_tree_node_id(std::numeric_limits::max()); - REQUIRE(reg.get_prefix_tree_node_id() == std::numeric_limits::max()); - } -} - TEST_CASE("RegisterHandler tests", "[RegisterHandler]") { RegisterHandler handler; From 36c1810779b2e7ee404a3999130acf632170b036 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 2 Dec 2024 21:20:41 -0500 Subject: [PATCH 114/144] Rename to reg_id. --- src/log_surgeon/finite_automata/RegisterHandler.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index e736c26a..e78ba58f 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -31,8 +31,8 @@ class RegisterHandler { m_registers.at(dest_reg_id) = m_registers.at(source_reg_id); } - auto append_position(uint32_t const register_index, int32_t const position) -> void { - auto& reg{m_registers.at(register_index)}; + auto append_position(uint32_t const reg_id, int32_t const position) -> void { + auto& reg{m_registers.at(reg_id)}; reg = m_prefix_tree.insert(reg, position); } From 48df8b0ff25d7f2ea77a5cdce81d2bb7d87c62a2 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 2 Dec 2024 21:22:11 -0500 Subject: [PATCH 115/144] Remove unused header. --- src/log_surgeon/finite_automata/RegisterHandler.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index e78ba58f..feb9f83b 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -2,7 +2,6 @@ #define LOG_SURGEON_FINITE_AUTOMATA_REGISTER_HANDLER_HPP #include -#include #include #include From a8605fc8fbb589147165875c717a4661fcecb295 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 2 Dec 2024 21:43:21 -0500 Subject: [PATCH 116/144] Change pred index to be optional and nullopt for root. --- src/log_surgeon/finite_automata/PrefixTree.cpp | 7 +++---- src/log_surgeon/finite_automata/PrefixTree.hpp | 12 +++++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.cpp b/src/log_surgeon/finite_automata/PrefixTree.cpp index 0cd0415c..a57bac04 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.cpp +++ b/src/log_surgeon/finite_automata/PrefixTree.cpp @@ -10,11 +10,10 @@ namespace log_surgeon::finite_automata { } std::vector reversed_positions; - auto current_index{index}; - while (0 < current_index) { - auto const& current_node{m_nodes[current_index]}; + auto current_node{m_nodes[index]}; + while (current_node.get_predecessor_index().has_value()) { reversed_positions.push_back(current_node.get_position()); - current_index = current_node.get_predecessor_index(); + current_node = m_nodes[current_node.get_predecessor_index().value()]; } return reversed_positions; } diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index f874a2bb..6a84bf2a 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -2,6 +2,7 @@ #define LOG_SURGEON_FINITE_AUTOMATA_PREFIX_TREE_HPP #include +#include #include #include @@ -23,23 +24,25 @@ class PrefixTree { */ class Node { public: - Node(uint32_t const predecessor_index, int32_t const position) + Node(std::optional const predecessor_index, int32_t const position) : m_predecessor_index{predecessor_index}, m_position{position} {} - [[nodiscard]] auto get_predecessor_index() const -> uint32_t { return m_predecessor_index; } + [[nodiscard]] auto get_predecessor_index() const -> std::optional { + return m_predecessor_index; + } auto set_position(int32_t const position) -> void { m_position = position; } [[nodiscard]] auto get_position() const -> int32_t { return m_position; } private: - uint32_t m_predecessor_index; + std::optional m_predecessor_index; int32_t m_position; }; public: - PrefixTree() : m_nodes{{0, -1}} {} + PrefixTree() : m_nodes{{std::nullopt, -1}} {} /** * @param predecessor_index Index of the inserted node's predecessor in the prefix tree. @@ -59,7 +62,6 @@ class PrefixTree { /** * @param index Index of the node to update. * @param position New position value to set for the node. - * @throw std::out_of_range if prefix tree index is out of range. */ auto set(uint32_t const index, int32_t const position) -> void { m_nodes.at(index).set_position(position); From 15cb1b6be4006cdb81dfaff66eb2d8a6cceef5d1 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 2 Dec 2024 21:49:38 -0500 Subject: [PATCH 117/144] Add and use node_id_t. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 16 ++++++++-------- .../finite_automata/RegisterHandler.hpp | 7 +++++-- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 6a84bf2a..39e1c3da 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -13,6 +13,10 @@ namespace log_surgeon::finite_automata { * Each path from the root to an index represents a sequence of matched tag positions. */ class PrefixTree { +public: + using node_id_t = uint32_t; + +private: /** * Represents a prefix tree node. A node stores a potential value for a TDFA register. * @@ -24,11 +28,11 @@ class PrefixTree { */ class Node { public: - Node(std::optional const predecessor_index, int32_t const position) + Node(std::optional const predecessor_index, int32_t const position) : m_predecessor_index{predecessor_index}, m_position{position} {} - [[nodiscard]] auto get_predecessor_index() const -> std::optional { + [[nodiscard]] auto get_predecessor_index() const -> std::optional { return m_predecessor_index; } @@ -37,7 +41,7 @@ class PrefixTree { [[nodiscard]] auto get_position() const -> int32_t { return m_position; } private: - std::optional m_predecessor_index; + std::optional m_predecessor_index; int32_t m_position; }; @@ -50,7 +54,7 @@ class PrefixTree { * @return The index of the newly inserted node in the tree. * @throw std::out_of_range if the predecessor index is out of range. */ - auto insert(uint32_t const predecessor_index, int32_t const position) -> uint32_t { + auto insert(node_id_t const predecessor_index, int32_t const position) -> uint32_t { if (m_nodes.size() <= predecessor_index) { throw std::out_of_range("Predecessor index out of range."); } @@ -59,10 +63,6 @@ class PrefixTree { return m_nodes.size() - 1; } - /** - * @param index Index of the node to update. - * @param position New position value to set for the node. - */ auto set(uint32_t const index, int32_t const position) -> void { m_nodes.at(index).set_position(position); } diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index feb9f83b..8892b481 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -17,7 +17,10 @@ namespace log_surgeon::finite_automata { */ class RegisterHandler { public: - auto add_register(uint32_t const prefix_tree_parent_node_id, int32_t const position) -> void { + auto add_register( + PrefixTree::node_id_t const prefix_tree_parent_node_id, + int32_t const position + ) -> void { auto const prefix_tree_node_id{m_prefix_tree.insert(prefix_tree_parent_node_id, position)}; m_registers.emplace_back(prefix_tree_node_id); } @@ -41,7 +44,7 @@ class RegisterHandler { private: PrefixTree m_prefix_tree; - std::vector m_registers; + std::vector m_registers; }; } // namespace log_surgeon::finite_automata From 6b787d036f2055e1ba57cce3b837f9299952dc05 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 2 Dec 2024 21:56:29 -0500 Subject: [PATCH 118/144] Add position_t. --- src/log_surgeon/finite_automata/PrefixTree.cpp | 8 +++----- src/log_surgeon/finite_automata/PrefixTree.hpp | 15 ++++++++------- .../finite_automata/RegisterHandler.hpp | 9 +++++---- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.cpp b/src/log_surgeon/finite_automata/PrefixTree.cpp index a57bac04..7f4f17e9 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.cpp +++ b/src/log_surgeon/finite_automata/PrefixTree.cpp @@ -1,15 +1,13 @@ #include "PrefixTree.hpp" -#include - namespace log_surgeon::finite_automata { -[[nodiscard]] auto PrefixTree::get_reversed_positions(uint32_t const index -) const -> std::vector { +[[nodiscard]] auto PrefixTree::get_reversed_positions(node_id_t const index +) const -> std::vector { if (m_nodes.size() <= index) { throw std::out_of_range("Prefix tree index out of range."); } - std::vector reversed_positions; + std::vector reversed_positions; auto current_node{m_nodes[index]}; while (current_node.get_predecessor_index().has_value()) { reversed_positions.push_back(current_node.get_position()); diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 39e1c3da..3fdb3e6e 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -14,6 +14,7 @@ namespace log_surgeon::finite_automata { */ class PrefixTree { public: + using position_t = int32_t; using node_id_t = uint32_t; private: @@ -28,7 +29,7 @@ class PrefixTree { */ class Node { public: - Node(std::optional const predecessor_index, int32_t const position) + Node(std::optional const predecessor_index, position_t const position) : m_predecessor_index{predecessor_index}, m_position{position} {} @@ -36,13 +37,13 @@ class PrefixTree { return m_predecessor_index; } - auto set_position(int32_t const position) -> void { m_position = position; } + auto set_position(position_t const position) -> void { m_position = position; } - [[nodiscard]] auto get_position() const -> int32_t { return m_position; } + [[nodiscard]] auto get_position() const -> position_t { return m_position; } private: std::optional m_predecessor_index; - int32_t m_position; + position_t m_position; }; public: @@ -54,7 +55,7 @@ class PrefixTree { * @return The index of the newly inserted node in the tree. * @throw std::out_of_range if the predecessor index is out of range. */ - auto insert(node_id_t const predecessor_index, int32_t const position) -> uint32_t { + auto insert(node_id_t const predecessor_index, position_t const position) -> node_id_t { if (m_nodes.size() <= predecessor_index) { throw std::out_of_range("Predecessor index out of range."); } @@ -63,7 +64,7 @@ class PrefixTree { return m_nodes.size() - 1; } - auto set(uint32_t const index, int32_t const position) -> void { + auto set(node_id_t const index, position_t const position) -> void { m_nodes.at(index).set_position(position); } @@ -74,7 +75,7 @@ class PrefixTree { * @return A vector containing positions in reverse order from the given index to root. * @throw std::out_of_range if the index is out of range. */ - [[nodiscard]] auto get_reversed_positions(uint32_t index) const -> std::vector; + [[nodiscard]] auto get_reversed_positions(node_id_t index) const -> std::vector; private: std::vector m_nodes; diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index 8892b481..fa820897 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -19,13 +19,13 @@ class RegisterHandler { public: auto add_register( PrefixTree::node_id_t const prefix_tree_parent_node_id, - int32_t const position + PrefixTree::position_t const position ) -> void { auto const prefix_tree_node_id{m_prefix_tree.insert(prefix_tree_parent_node_id, position)}; m_registers.emplace_back(prefix_tree_node_id); } - auto set_register(uint32_t const reg_id, int32_t const position) -> void { + auto set_register(uint32_t const reg_id, PrefixTree::position_t const position) -> void { m_prefix_tree.set(m_registers.at(reg_id), position); } @@ -33,12 +33,13 @@ class RegisterHandler { m_registers.at(dest_reg_id) = m_registers.at(source_reg_id); } - auto append_position(uint32_t const reg_id, int32_t const position) -> void { + auto append_position(uint32_t const reg_id, PrefixTree::position_t const position) -> void { auto& reg{m_registers.at(reg_id)}; reg = m_prefix_tree.insert(reg, position); } - [[nodiscard]] auto get_reversed_positions(uint32_t const reg_id) const -> std::vector { + [[nodiscard]] auto get_reversed_positions(uint32_t const reg_id + ) const -> std::vector { return m_prefix_tree.get_reversed_positions(m_registers.at(reg_id)); } From cd8f4e3d8fc7febe79152661be1b563d1db4c60a Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 2 Dec 2024 21:58:24 -0500 Subject: [PATCH 119/144] Change to id_t. --- src/log_surgeon/finite_automata/PrefixTree.cpp | 2 +- src/log_surgeon/finite_automata/PrefixTree.hpp | 14 +++++++------- .../finite_automata/RegisterHandler.hpp | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.cpp b/src/log_surgeon/finite_automata/PrefixTree.cpp index 7f4f17e9..c51c7938 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.cpp +++ b/src/log_surgeon/finite_automata/PrefixTree.cpp @@ -1,7 +1,7 @@ #include "PrefixTree.hpp" namespace log_surgeon::finite_automata { -[[nodiscard]] auto PrefixTree::get_reversed_positions(node_id_t const index +[[nodiscard]] auto PrefixTree::get_reversed_positions(id_t const index ) const -> std::vector { if (m_nodes.size() <= index) { throw std::out_of_range("Prefix tree index out of range."); diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 3fdb3e6e..60a21932 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -14,8 +14,8 @@ namespace log_surgeon::finite_automata { */ class PrefixTree { public: + using id_t = uint32_t; using position_t = int32_t; - using node_id_t = uint32_t; private: /** @@ -29,11 +29,11 @@ class PrefixTree { */ class Node { public: - Node(std::optional const predecessor_index, position_t const position) + Node(std::optional const predecessor_index, position_t const position) : m_predecessor_index{predecessor_index}, m_position{position} {} - [[nodiscard]] auto get_predecessor_index() const -> std::optional { + [[nodiscard]] auto get_predecessor_index() const -> std::optional { return m_predecessor_index; } @@ -42,7 +42,7 @@ class PrefixTree { [[nodiscard]] auto get_position() const -> position_t { return m_position; } private: - std::optional m_predecessor_index; + std::optional m_predecessor_index; position_t m_position; }; @@ -55,7 +55,7 @@ class PrefixTree { * @return The index of the newly inserted node in the tree. * @throw std::out_of_range if the predecessor index is out of range. */ - auto insert(node_id_t const predecessor_index, position_t const position) -> node_id_t { + auto insert(id_t const predecessor_index, position_t const position) -> id_t { if (m_nodes.size() <= predecessor_index) { throw std::out_of_range("Predecessor index out of range."); } @@ -64,7 +64,7 @@ class PrefixTree { return m_nodes.size() - 1; } - auto set(node_id_t const index, position_t const position) -> void { + auto set(id_t const index, position_t const position) -> void { m_nodes.at(index).set_position(position); } @@ -75,7 +75,7 @@ class PrefixTree { * @return A vector containing positions in reverse order from the given index to root. * @throw std::out_of_range if the index is out of range. */ - [[nodiscard]] auto get_reversed_positions(node_id_t index) const -> std::vector; + [[nodiscard]] auto get_reversed_positions(id_t index) const -> std::vector; private: std::vector m_nodes; diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index fa820897..3e85cd84 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -18,7 +18,7 @@ namespace log_surgeon::finite_automata { class RegisterHandler { public: auto add_register( - PrefixTree::node_id_t const prefix_tree_parent_node_id, + PrefixTree::id_t const prefix_tree_parent_node_id, PrefixTree::position_t const position ) -> void { auto const prefix_tree_node_id{m_prefix_tree.insert(prefix_tree_parent_node_id, position)}; @@ -45,7 +45,7 @@ class RegisterHandler { private: PrefixTree m_prefix_tree; - std::vector m_registers; + std::vector m_registers; }; } // namespace log_surgeon::finite_automata From 72da50c4b58105fa440f548fdad289fd1d36868e Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 06:18:42 -0500 Subject: [PATCH 120/144] Add is_root(). --- src/log_surgeon/finite_automata/PrefixTree.cpp | 2 +- src/log_surgeon/finite_automata/PrefixTree.hpp | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.cpp b/src/log_surgeon/finite_automata/PrefixTree.cpp index c51c7938..18edbdac 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.cpp +++ b/src/log_surgeon/finite_automata/PrefixTree.cpp @@ -9,7 +9,7 @@ namespace log_surgeon::finite_automata { std::vector reversed_positions; auto current_node{m_nodes[index]}; - while (current_node.get_predecessor_index().has_value()) { + while (false == current_node.is_root()) { reversed_positions.push_back(current_node.get_position()); current_node = m_nodes[current_node.get_predecessor_index().value()]; } diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 60a21932..57350550 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -33,6 +33,10 @@ class PrefixTree { : m_predecessor_index{predecessor_index}, m_position{position} {} + [[nodiscard]] auto is_root() const -> bool { + return false == m_predecessor_index.has_value(); + } + [[nodiscard]] auto get_predecessor_index() const -> std::optional { return m_predecessor_index; } From 3fc7ea776bd9aa4275fa1079f8dc7c12fe4f4b2b Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 06:20:23 -0500 Subject: [PATCH 121/144] Add missing header. --- src/log_surgeon/finite_automata/PrefixTree.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/log_surgeon/finite_automata/PrefixTree.cpp b/src/log_surgeon/finite_automata/PrefixTree.cpp index 18edbdac..9d3dfb15 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.cpp +++ b/src/log_surgeon/finite_automata/PrefixTree.cpp @@ -1,5 +1,7 @@ #include "PrefixTree.hpp" +#include + namespace log_surgeon::finite_automata { [[nodiscard]] auto PrefixTree::get_reversed_positions(id_t const index ) const -> std::vector { From 6443d6619d569488ee32182a0e76bc63b2638339 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 06:41:38 -0500 Subject: [PATCH 122/144] Update PrefixTree docstring. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 57350550..c67dbb58 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -8,9 +8,13 @@ namespace log_surgeon::finite_automata { /** - * Represents a prefix tree that stores all data needed by the TDFA registers. - * - * Each path from the root to an index represents a sequence of matched tag positions. + * Represents a prefix tree to store register data during TDFA simulation. Each path from the root + * to an index corresponds to a sequence of positions for an individual tag: + * - Positive position node: Indicates the tag was matched at the position. + * - Negative position node: Indicates the tag was unmatched. If a negative node is the entire path, + * it indicates the tag was never matched. If the negative tag is along a path containing positive + * nodes, it functions as a placeholder. This can be useful for nested capture groups, to maintain a + * one-to-one mapping between the contained capture group and the enclosing capture group. */ class PrefixTree { public: From 63aec4d3c8307a6de5d08d0167bdd8f310f5d3bc Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 06:45:32 -0500 Subject: [PATCH 123/144] Removing node docstring as its redundant. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index c67dbb58..ed6ebd6b 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -8,8 +8,9 @@ namespace log_surgeon::finite_automata { /** - * Represents a prefix tree to store register data during TDFA simulation. Each path from the root - * to an index corresponds to a sequence of positions for an individual tag: + * Represents a prefix tree to store register data during TDFA simulation. Each node in the tree + * stores a single posiiton in the lexed string. Each path from the root to an index corresponds to + * a sequence of positions for an individual tag: * - Positive position node: Indicates the tag was matched at the position. * - Negative position node: Indicates the tag was unmatched. If a negative node is the entire path, * it indicates the tag was never matched. If the negative tag is along a path containing positive @@ -22,15 +23,6 @@ class PrefixTree { using position_t = int32_t; private: - /** - * Represents a prefix tree node. A node stores a potential value for a TDFA register. - * - * A node stores the current position at which a tag was matched, as well as the index of the - * prefix tree node corresponding to the previous match of the same tag. - * - * Note: A value of m_position < 0 indicates that the tag is currently unmatched in the lexed - * string. - */ class Node { public: Node(std::optional const predecessor_index, position_t const position) From 295f3eed024d3c3888504acc3d06c261a77a309b Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 06:47:14 -0500 Subject: [PATCH 124/144] Combine private section in PrefixTree. --- .../finite_automata/PrefixTree.hpp | 48 +++++++++---------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index ed6ebd6b..04392238 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -22,31 +22,6 @@ class PrefixTree { using id_t = uint32_t; using position_t = int32_t; -private: - class Node { - public: - Node(std::optional const predecessor_index, position_t const position) - : m_predecessor_index{predecessor_index}, - m_position{position} {} - - [[nodiscard]] auto is_root() const -> bool { - return false == m_predecessor_index.has_value(); - } - - [[nodiscard]] auto get_predecessor_index() const -> std::optional { - return m_predecessor_index; - } - - auto set_position(position_t const position) -> void { m_position = position; } - - [[nodiscard]] auto get_position() const -> position_t { return m_position; } - - private: - std::optional m_predecessor_index; - position_t m_position; - }; - -public: PrefixTree() : m_nodes{{std::nullopt, -1}} {} /** @@ -78,6 +53,29 @@ class PrefixTree { [[nodiscard]] auto get_reversed_positions(id_t index) const -> std::vector; private: + class Node { + public: + Node(std::optional const predecessor_index, position_t const position) + : m_predecessor_index{predecessor_index}, + m_position{position} {} + + [[nodiscard]] auto is_root() const -> bool { + return false == m_predecessor_index.has_value(); + } + + [[nodiscard]] auto get_predecessor_index() const -> std::optional { + return m_predecessor_index; + } + + auto set_position(position_t const position) -> void { m_position = position; } + + [[nodiscard]] auto get_position() const -> position_t { return m_position; } + + private: + std::optional m_predecessor_index; + position_t m_position; + }; + std::vector m_nodes; }; From 11866669fcfb1f0d97f1eb3640001a2cff62a9c8 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 06:54:35 -0500 Subject: [PATCH 125/144] Add missing header; Remove copy paste error. --- src/log_surgeon/finite_automata/PrefixTree.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.cpp b/src/log_surgeon/finite_automata/PrefixTree.cpp index 9d3dfb15..08b71982 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.cpp +++ b/src/log_surgeon/finite_automata/PrefixTree.cpp @@ -1,9 +1,10 @@ #include "PrefixTree.hpp" #include +#include namespace log_surgeon::finite_automata { -[[nodiscard]] auto PrefixTree::get_reversed_positions(id_t const index +auto PrefixTree::get_reversed_positions(id_t const index ) const -> std::vector { if (m_nodes.size() <= index) { throw std::out_of_range("Prefix tree index out of range."); From 06ee38e7ebafa072b0bb9fc94128786ffc8462eb Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 07:03:30 -0500 Subject: [PATCH 126/144] Rename to node_id and parent_node_id. --- .../finite_automata/PrefixTree.cpp | 9 ++-- .../finite_automata/PrefixTree.hpp | 32 ++++++----- tests/test-prefix-tree.cpp | 54 +++++++++---------- 3 files changed, 46 insertions(+), 49 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.cpp b/src/log_surgeon/finite_automata/PrefixTree.cpp index 08b71982..bf0705c8 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.cpp +++ b/src/log_surgeon/finite_automata/PrefixTree.cpp @@ -4,17 +4,16 @@ #include namespace log_surgeon::finite_automata { -auto PrefixTree::get_reversed_positions(id_t const index -) const -> std::vector { - if (m_nodes.size() <= index) { +auto PrefixTree::get_reversed_positions(id_t const node_id) const -> std::vector { + if (m_nodes.size() <= node_id) { throw std::out_of_range("Prefix tree index out of range."); } std::vector reversed_positions; - auto current_node{m_nodes[index]}; + auto current_node{m_nodes[node_id]}; while (false == current_node.is_root()) { reversed_positions.push_back(current_node.get_position()); - current_node = m_nodes[current_node.get_predecessor_index().value()]; + current_node = m_nodes[current_node.get_parent_node_id().value()]; } return reversed_positions; } diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 04392238..53e72037 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -25,46 +25,44 @@ class PrefixTree { PrefixTree() : m_nodes{{std::nullopt, -1}} {} /** - * @param predecessor_index Index of the inserted node's predecessor in the prefix tree. + * @param parent_node_id Index of the inserted node's parent in the prefix tree. * @param position The position in the lexed string. * @return The index of the newly inserted node in the tree. - * @throw std::out_of_range if the predecessor index is out of range. + * @throw std::out_of_range if the parent's index is out of range. */ - auto insert(id_t const predecessor_index, position_t const position) -> id_t { - if (m_nodes.size() <= predecessor_index) { + auto insert(id_t const parent_node_id, position_t const position) -> id_t { + if (m_nodes.size() <= parent_node_id) { throw std::out_of_range("Predecessor index out of range."); } - m_nodes.emplace_back(predecessor_index, position); + m_nodes.emplace_back(parent_node_id, position); return m_nodes.size() - 1; } - auto set(id_t const index, position_t const position) -> void { - m_nodes.at(index).set_position(position); + auto set(id_t const node_id, position_t const position) -> void { + m_nodes.at(node_id).set_position(position); } /** * Retrieves a vector of positions in reverse order by traversing from the given index to the * root. - * @param index The index of the node to start the traversal from. + * @param node_id The index of the node to start the traversal from. * @return A vector containing positions in reverse order from the given index to root. * @throw std::out_of_range if the index is out of range. */ - [[nodiscard]] auto get_reversed_positions(id_t index) const -> std::vector; + [[nodiscard]] auto get_reversed_positions(id_t node_id) const -> std::vector; private: class Node { public: - Node(std::optional const predecessor_index, position_t const position) - : m_predecessor_index{predecessor_index}, + Node(std::optional const parent_node_id, position_t const position) + : m_parent_node_id{parent_node_id}, m_position{position} {} - [[nodiscard]] auto is_root() const -> bool { - return false == m_predecessor_index.has_value(); - } + [[nodiscard]] auto is_root() const -> bool { return false == m_parent_node_id.has_value(); } - [[nodiscard]] auto get_predecessor_index() const -> std::optional { - return m_predecessor_index; + [[nodiscard]] auto get_parent_node_id() const -> std::optional { + return m_parent_node_id; } auto set_position(position_t const position) -> void { m_position = position; } @@ -72,7 +70,7 @@ class PrefixTree { [[nodiscard]] auto get_position() const -> position_t { return m_position; } private: - std::optional m_predecessor_index; + std::optional m_parent_node_id; position_t m_position; }; diff --git a/tests/test-prefix-tree.cpp b/tests/test-prefix-tree.cpp index 22ad8029..ec19c156 100644 --- a/tests/test-prefix-tree.cpp +++ b/tests/test-prefix-tree.cpp @@ -18,22 +18,22 @@ TEST_CASE("Prefix tree operations", "[PrefixTree]") { PrefixTree tree; // Test basic insertions - auto const index_1{tree.insert(0, 4)}; - auto const index_2{tree.insert(index_1, 7)}; - auto const index_3{tree.insert(index_2, 9)}; - REQUIRE(std::vector{4} == tree.get_reversed_positions(index_1)); - REQUIRE(std::vector{7, 4} == tree.get_reversed_positions(index_2)); - REQUIRE(std::vector{9, 7, 4} == tree.get_reversed_positions(index_3)); + auto const node_id_1{tree.insert(0, 4)}; + auto const node_id_2{tree.insert(node_id_1, 7)}; + auto const node_id_3{tree.insert(node_id_2, 9)}; + REQUIRE(std::vector{4} == tree.get_reversed_positions(node_id_1)); + REQUIRE(std::vector{7, 4} == tree.get_reversed_positions(node_id_2)); + REQUIRE(std::vector{9, 7, 4} == tree.get_reversed_positions(node_id_3)); // Test insertion with large position values - auto const index_4{tree.insert(0, std::numeric_limits::max())}; - REQUIRE(std::numeric_limits::max() == tree.get_reversed_positions(index_4)[0]); + auto const node_id_4{tree.insert(0, std::numeric_limits::max())}; + REQUIRE(std::numeric_limits::max() == tree.get_reversed_positions(node_id_4)[0]); // Test insertion with negative position values - auto const index_5{tree.insert(0, -1)}; - auto const index_6{tree.insert(index_5, -100)}; - REQUIRE(std::vector{-1} == tree.get_reversed_positions(index_5)); - REQUIRE(std::vector{-100, -1} == tree.get_reversed_positions(index_6)); + auto const node_id_5{tree.insert(0, -1)}; + auto const node_id_6{tree.insert(node_id_5, -100)}; + REQUIRE(std::vector{-1} == tree.get_reversed_positions(node_id_5)); + REQUIRE(std::vector{-100, -1} == tree.get_reversed_positions(node_id_6)); } SECTION("Invalid index access throws correctly") { @@ -56,23 +56,23 @@ TEST_CASE("Prefix tree operations", "[PrefixTree]") { tree.set(0, 10); // Test updates to different nodes - auto const index_1{tree.insert(0, 4)}; - auto const index_2{tree.insert(index_1, 7)}; - tree.set(index_1, 10); - tree.set(index_2, 12); - REQUIRE(std::vector{10} == tree.get_reversed_positions(index_1)); - REQUIRE(std::vector{12, 10} == tree.get_reversed_positions(index_2)); + auto const node_id_1{tree.insert(0, 4)}; + auto const node_id_2{tree.insert(node_id_1, 7)}; + tree.set(node_id_1, 10); + tree.set(node_id_2, 12); + REQUIRE(std::vector{10} == tree.get_reversed_positions(node_id_1)); + REQUIRE(std::vector{12, 10} == tree.get_reversed_positions(node_id_2)); // Test multiple updates to the same node - tree.set(index_2, 15); - tree.set(index_2, 20); - REQUIRE(std::vector{20, 10} == tree.get_reversed_positions(index_2)); + tree.set(node_id_2, 15); + tree.set(node_id_2, 20); + REQUIRE(std::vector{20, 10} == tree.get_reversed_positions(node_id_2)); // Test that updates don't affect unrelated paths - auto const index_3{tree.insert(0, 30)}; - tree.set(index_3, 25); - REQUIRE(std::vector{10} == tree.get_reversed_positions(index_1)); - REQUIRE(std::vector{20, 10} == tree.get_reversed_positions(index_2)); + auto const node_id_3{tree.insert(0, 30)}; + tree.set(node_id_3, 25); + REQUIRE(std::vector{10} == tree.get_reversed_positions(node_id_1)); + REQUIRE(std::vector{20, 10} == tree.get_reversed_positions(node_id_2)); } SECTION("Set position for an invalid index throws correctly") { @@ -82,7 +82,7 @@ TEST_CASE("Prefix tree operations", "[PrefixTree]") { REQUIRE_THROWS_AS(tree.set(100, 20), std::out_of_range); // Test setting position just beyond valid range - auto const index_1{tree.insert(0, 4)}; - REQUIRE_THROWS_AS(tree.set(index_1 + 1, 20), std::out_of_range); + auto const node_id_1{tree.insert(0, 4)}; + REQUIRE_THROWS_AS(tree.set(node_id_1 + 1, 20), std::out_of_range); } } From e103011e1cd46382815a756e5fa2da21c98fd5ea Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 07:04:29 -0500 Subject: [PATCH 127/144] Update get_reversed_positions' docstring. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 53e72037..47de73e4 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -44,8 +44,6 @@ class PrefixTree { } /** - * Retrieves a vector of positions in reverse order by traversing from the given index to the - * root. * @param node_id The index of the node to start the traversal from. * @return A vector containing positions in reverse order from the given index to root. * @throw std::out_of_range if the index is out of range. From 31b03465af5d4cca352e21344e1fb4669ce2d230 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 07:11:08 -0500 Subject: [PATCH 128/144] Update get_reversed positions' docstring to clarify exlcusivity of the root. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 47de73e4..47c7bdb8 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -44,8 +44,9 @@ class PrefixTree { } /** - * @param node_id The index of the node to start the traversal from. - * @return A vector containing positions in reverse order from the given index to root. + * @param node_id The index of the node. + * @return A vector containing positions in the path defined by `node_id`, in reverse order, + * i.e., [index, root). * @throw std::out_of_range if the index is out of range. */ [[nodiscard]] auto get_reversed_positions(id_t node_id) const -> std::vector; From 4005e41c27074cb233a7ba7a335c9544afbb86e1 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 07:11:57 -0500 Subject: [PATCH 129/144] Grammar fix. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 47c7bdb8..79e8cd17 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -45,7 +45,7 @@ class PrefixTree { /** * @param node_id The index of the node. - * @return A vector containing positions in the path defined by `node_id`, in reverse order, + * @return A vector containing positions along the path defined by `node_id`, in reverse order, * i.e., [index, root). * @throw std::out_of_range if the index is out of range. */ From e38940c3b38e3bc4422485adbb8805aad3f2c050 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 07:15:17 -0500 Subject: [PATCH 130/144] Add maybe_unusued. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 79e8cd17..d6f74eef 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -30,7 +30,7 @@ class PrefixTree { * @return The index of the newly inserted node in the tree. * @throw std::out_of_range if the parent's index is out of range. */ - auto insert(id_t const parent_node_id, position_t const position) -> id_t { + [[maybe_unused]] auto insert(id_t const parent_node_id, position_t const position) -> id_t { if (m_nodes.size() <= parent_node_id) { throw std::out_of_range("Predecessor index out of range."); } From d71368d36c5b1d7160a1e27ef3a129e2165e4545 Mon Sep 17 00:00:00 2001 From: Sharaf Mohamed Date: Wed, 4 Dec 2024 07:21:37 -0500 Subject: [PATCH 131/144] Update src/log_surgeon/finite_automata/RegisterHandler.hpp Co-authored-by: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> --- src/log_surgeon/finite_automata/RegisterHandler.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index 3e85cd84..3c61bdc5 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -34,8 +34,8 @@ class RegisterHandler { } auto append_position(uint32_t const reg_id, PrefixTree::position_t const position) -> void { - auto& reg{m_registers.at(reg_id)}; - reg = m_prefix_tree.insert(reg, position); + auto const node_id{m_registers.at(reg_id)}; + m_registers.at(reg_id) = m_prefix_tree.insert(node_id, position); } [[nodiscard]] auto get_reversed_positions(uint32_t const reg_id From dd4b6e1a2a644b624928e4b4358fba05ae31b11f Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 07:23:34 -0500 Subject: [PATCH 132/144] Update test case names to document code names better. --- tests/test-prefix-tree.cpp | 2 +- tests/test-register-handler.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test-prefix-tree.cpp b/tests/test-prefix-tree.cpp index ec19c156..153c077b 100644 --- a/tests/test-prefix-tree.cpp +++ b/tests/test-prefix-tree.cpp @@ -6,7 +6,7 @@ using log_surgeon::finite_automata::PrefixTree; -TEST_CASE("Prefix tree operations", "[PrefixTree]") { +TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { SECTION("Newly constructed tree works correctly") { PrefixTree const tree; diff --git a/tests/test-register-handler.cpp b/tests/test-register-handler.cpp index 67fa3d95..8c470d98 100644 --- a/tests/test-register-handler.cpp +++ b/tests/test-register-handler.cpp @@ -8,7 +8,7 @@ using log_surgeon::finite_automata::RegisterHandler; using std::make_unique; using std::unique_ptr; -TEST_CASE("RegisterHandler tests", "[RegisterHandler]") { +TEST_CASE("`RegisterHandler` tests", "[RegisterHandler]") { RegisterHandler handler; SECTION("Initial state is empty") { @@ -40,7 +40,7 @@ TEST_CASE("RegisterHandler tests", "[RegisterHandler]") { REQUIRE(std::vector{5} == handler.get_reversed_positions(1)); } - SECTION("append_position appends position correctly") { + SECTION("`append_position` appends position correctly") { handler.set_register(0, 5); handler.append_position(0, 7); REQUIRE(std::vector{7, 5} == handler.get_reversed_positions(0)); From 7322852be0525d9284f2e2d4c41f86b210b1531e Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 07:32:58 -0500 Subject: [PATCH 133/144] Implicitily use auto in vectors. --- tests/test-prefix-tree.cpp | 29 ++++++++++++++++------------- tests/test-register-handler.cpp | 8 ++++---- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/tests/test-prefix-tree.cpp b/tests/test-prefix-tree.cpp index 153c077b..3f16b5fd 100644 --- a/tests/test-prefix-tree.cpp +++ b/tests/test-prefix-tree.cpp @@ -5,6 +5,8 @@ #include using log_surgeon::finite_automata::PrefixTree; +using id_t = PrefixTree::id_t; +using position_t = PrefixTree::position_t; TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { SECTION("Newly constructed tree works correctly") { @@ -21,19 +23,20 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { auto const node_id_1{tree.insert(0, 4)}; auto const node_id_2{tree.insert(node_id_1, 7)}; auto const node_id_3{tree.insert(node_id_2, 9)}; - REQUIRE(std::vector{4} == tree.get_reversed_positions(node_id_1)); - REQUIRE(std::vector{7, 4} == tree.get_reversed_positions(node_id_2)); - REQUIRE(std::vector{9, 7, 4} == tree.get_reversed_positions(node_id_3)); + REQUIRE(std::vector{4} == tree.get_reversed_positions(node_id_1)); + REQUIRE(std::vector{7, 4} == tree.get_reversed_positions(node_id_2)); + REQUIRE(std::vector{9, 7, 4} == tree.get_reversed_positions(node_id_3)); // Test insertion with large position values - auto const node_id_4{tree.insert(0, std::numeric_limits::max())}; - REQUIRE(std::numeric_limits::max() == tree.get_reversed_positions(node_id_4)[0]); + auto const node_id_4{tree.insert(0, std::numeric_limits::max())}; + REQUIRE(std::numeric_limits::max() == tree.get_reversed_positions(node_id_4)[0] + ); // Test insertion with negative position values auto const node_id_5{tree.insert(0, -1)}; auto const node_id_6{tree.insert(node_id_5, -100)}; - REQUIRE(std::vector{-1} == tree.get_reversed_positions(node_id_5)); - REQUIRE(std::vector{-100, -1} == tree.get_reversed_positions(node_id_6)); + REQUIRE(std::vector{-1} == tree.get_reversed_positions(node_id_5)); + REQUIRE(std::vector{-100, -1} == tree.get_reversed_positions(node_id_6)); } SECTION("Invalid index access throws correctly") { @@ -45,7 +48,7 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { REQUIRE_THROWS_AS(tree.get_reversed_positions(3), std::out_of_range); REQUIRE_THROWS_AS( - tree.get_reversed_positions(std::numeric_limits::max()), + tree.get_reversed_positions(std::numeric_limits::max()), std::out_of_range ); } @@ -60,19 +63,19 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { auto const node_id_2{tree.insert(node_id_1, 7)}; tree.set(node_id_1, 10); tree.set(node_id_2, 12); - REQUIRE(std::vector{10} == tree.get_reversed_positions(node_id_1)); - REQUIRE(std::vector{12, 10} == tree.get_reversed_positions(node_id_2)); + REQUIRE(std::vector{10} == tree.get_reversed_positions(node_id_1)); + REQUIRE(std::vector{12, 10} == tree.get_reversed_positions(node_id_2)); // Test multiple updates to the same node tree.set(node_id_2, 15); tree.set(node_id_2, 20); - REQUIRE(std::vector{20, 10} == tree.get_reversed_positions(node_id_2)); + REQUIRE(std::vector{20, 10} == tree.get_reversed_positions(node_id_2)); // Test that updates don't affect unrelated paths auto const node_id_3{tree.insert(0, 30)}; tree.set(node_id_3, 25); - REQUIRE(std::vector{10} == tree.get_reversed_positions(node_id_1)); - REQUIRE(std::vector{20, 10} == tree.get_reversed_positions(node_id_2)); + REQUIRE(std::vector{10} == tree.get_reversed_positions(node_id_1)); + REQUIRE(std::vector{20, 10} == tree.get_reversed_positions(node_id_2)); } SECTION("Set position for an invalid index throws correctly") { diff --git a/tests/test-register-handler.cpp b/tests/test-register-handler.cpp index 8c470d98..3ba5de4d 100644 --- a/tests/test-register-handler.cpp +++ b/tests/test-register-handler.cpp @@ -22,7 +22,7 @@ TEST_CASE("`RegisterHandler` tests", "[RegisterHandler]") { SECTION("Set register position correctly") { handler.set_register(0, 5); - REQUIRE(std::vector{5} == handler.get_reversed_positions(0)); + REQUIRE(std::vector{5} == handler.get_reversed_positions(0)); } SECTION("Register relationships are maintained") { @@ -31,19 +31,19 @@ TEST_CASE("`RegisterHandler` tests", "[RegisterHandler]") { handler.set_register(2, 15); auto positions{handler.get_reversed_positions(2)}; - REQUIRE(std::vector{15, 10, 5} == handler.get_reversed_positions(2)); + REQUIRE(std::vector{15, 10, 5} == handler.get_reversed_positions(2)); } SECTION("Copy register index correctly") { handler.set_register(0, 5); handler.copy_register(1, 0); - REQUIRE(std::vector{5} == handler.get_reversed_positions(1)); + REQUIRE(std::vector{5} == handler.get_reversed_positions(1)); } SECTION("`append_position` appends position correctly") { handler.set_register(0, 5); handler.append_position(0, 7); - REQUIRE(std::vector{7, 5} == handler.get_reversed_positions(0)); + REQUIRE(std::vector{7, 5} == handler.get_reversed_positions(0)); } SECTION("Throws out of range correctly") { From dba1a183aae90eaf4e557f37e1efab10fd70ed5a Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 07:36:42 -0500 Subject: [PATCH 134/144] Explicitily use position_t for vectors. --- tests/test-prefix-tree.cpp | 20 ++++++++++---------- tests/test-register-handler.cpp | 10 ++++++---- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/tests/test-prefix-tree.cpp b/tests/test-prefix-tree.cpp index 3f16b5fd..e11b749e 100644 --- a/tests/test-prefix-tree.cpp +++ b/tests/test-prefix-tree.cpp @@ -23,9 +23,9 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { auto const node_id_1{tree.insert(0, 4)}; auto const node_id_2{tree.insert(node_id_1, 7)}; auto const node_id_3{tree.insert(node_id_2, 9)}; - REQUIRE(std::vector{4} == tree.get_reversed_positions(node_id_1)); - REQUIRE(std::vector{7, 4} == tree.get_reversed_positions(node_id_2)); - REQUIRE(std::vector{9, 7, 4} == tree.get_reversed_positions(node_id_3)); + REQUIRE(std::vector{4} == tree.get_reversed_positions(node_id_1)); + REQUIRE(std::vector{7, 4} == tree.get_reversed_positions(node_id_2)); + REQUIRE(std::vector{9, 7, 4} == tree.get_reversed_positions(node_id_3)); // Test insertion with large position values auto const node_id_4{tree.insert(0, std::numeric_limits::max())}; @@ -35,8 +35,8 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { // Test insertion with negative position values auto const node_id_5{tree.insert(0, -1)}; auto const node_id_6{tree.insert(node_id_5, -100)}; - REQUIRE(std::vector{-1} == tree.get_reversed_positions(node_id_5)); - REQUIRE(std::vector{-100, -1} == tree.get_reversed_positions(node_id_6)); + REQUIRE(std::vector{-1} == tree.get_reversed_positions(node_id_5)); + REQUIRE(std::vector{-100, -1} == tree.get_reversed_positions(node_id_6)); } SECTION("Invalid index access throws correctly") { @@ -63,19 +63,19 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { auto const node_id_2{tree.insert(node_id_1, 7)}; tree.set(node_id_1, 10); tree.set(node_id_2, 12); - REQUIRE(std::vector{10} == tree.get_reversed_positions(node_id_1)); - REQUIRE(std::vector{12, 10} == tree.get_reversed_positions(node_id_2)); + REQUIRE(std::vector{10} == tree.get_reversed_positions(node_id_1)); + REQUIRE(std::vector{12, 10} == tree.get_reversed_positions(node_id_2)); // Test multiple updates to the same node tree.set(node_id_2, 15); tree.set(node_id_2, 20); - REQUIRE(std::vector{20, 10} == tree.get_reversed_positions(node_id_2)); + REQUIRE(std::vector{20, 10} == tree.get_reversed_positions(node_id_2)); // Test that updates don't affect unrelated paths auto const node_id_3{tree.insert(0, 30)}; tree.set(node_id_3, 25); - REQUIRE(std::vector{10} == tree.get_reversed_positions(node_id_1)); - REQUIRE(std::vector{20, 10} == tree.get_reversed_positions(node_id_2)); + REQUIRE(std::vector{10} == tree.get_reversed_positions(node_id_1)); + REQUIRE(std::vector{20, 10} == tree.get_reversed_positions(node_id_2)); } SECTION("Set position for an invalid index throws correctly") { diff --git a/tests/test-register-handler.cpp b/tests/test-register-handler.cpp index 3ba5de4d..815e6f2f 100644 --- a/tests/test-register-handler.cpp +++ b/tests/test-register-handler.cpp @@ -2,10 +2,12 @@ #include +#include #include using log_surgeon::finite_automata::RegisterHandler; using std::make_unique; +using position_t = log_surgeon::finite_automata::PrefixTree::position_t; using std::unique_ptr; TEST_CASE("`RegisterHandler` tests", "[RegisterHandler]") { @@ -22,7 +24,7 @@ TEST_CASE("`RegisterHandler` tests", "[RegisterHandler]") { SECTION("Set register position correctly") { handler.set_register(0, 5); - REQUIRE(std::vector{5} == handler.get_reversed_positions(0)); + REQUIRE(std::vector{5} == handler.get_reversed_positions(0)); } SECTION("Register relationships are maintained") { @@ -31,19 +33,19 @@ TEST_CASE("`RegisterHandler` tests", "[RegisterHandler]") { handler.set_register(2, 15); auto positions{handler.get_reversed_positions(2)}; - REQUIRE(std::vector{15, 10, 5} == handler.get_reversed_positions(2)); + REQUIRE(std::vector{15, 10, 5} == handler.get_reversed_positions(2)); } SECTION("Copy register index correctly") { handler.set_register(0, 5); handler.copy_register(1, 0); - REQUIRE(std::vector{5} == handler.get_reversed_positions(1)); + REQUIRE(std::vector{5} == handler.get_reversed_positions(1)); } SECTION("`append_position` appends position correctly") { handler.set_register(0, 5); handler.append_position(0, 7); - REQUIRE(std::vector{7, 5} == handler.get_reversed_positions(0)); + REQUIRE(std::vector{7, 5} == handler.get_reversed_positions(0)); } SECTION("Throws out of range correctly") { From ee6efab78936cc32722f488724bab93478cd8c03 Mon Sep 17 00:00:00 2001 From: Sharaf Mohamed Date: Wed, 4 Dec 2024 07:37:12 -0500 Subject: [PATCH 135/144] Update tests/test-register-handler.cpp Co-authored-by: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> --- tests/test-register-handler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-register-handler.cpp b/tests/test-register-handler.cpp index 815e6f2f..8da726ca 100644 --- a/tests/test-register-handler.cpp +++ b/tests/test-register-handler.cpp @@ -18,7 +18,7 @@ TEST_CASE("`RegisterHandler` tests", "[RegisterHandler]") { } constexpr uint32_t num_registers{5}; - for (uint32_t i = 0; i < num_registers; i++) { + for (uint32_t i{0}; i < num_registers; ++i) { handler.add_register(i, 0); } From 9ba980cc4492b7f3bba0667caea4667514d538be Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 07:40:49 -0500 Subject: [PATCH 136/144] Switch to size_t. --- src/log_surgeon/finite_automata/RegisterHandler.hpp | 8 ++++---- tests/test-register-handler.cpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index 3c61bdc5..6690fef0 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -25,20 +25,20 @@ class RegisterHandler { m_registers.emplace_back(prefix_tree_node_id); } - auto set_register(uint32_t const reg_id, PrefixTree::position_t const position) -> void { + auto set_register(size_t const reg_id, PrefixTree::position_t const position) -> void { m_prefix_tree.set(m_registers.at(reg_id), position); } - auto copy_register(uint32_t const dest_reg_id, uint32_t const source_reg_id) -> void { + auto copy_register(size_t const dest_reg_id, size_t const source_reg_id) -> void { m_registers.at(dest_reg_id) = m_registers.at(source_reg_id); } - auto append_position(uint32_t const reg_id, PrefixTree::position_t const position) -> void { + auto append_position(size_t const reg_id, PrefixTree::position_t const position) -> void { auto const node_id{m_registers.at(reg_id)}; m_registers.at(reg_id) = m_prefix_tree.insert(node_id, position); } - [[nodiscard]] auto get_reversed_positions(uint32_t const reg_id + [[nodiscard]] auto get_reversed_positions(size_t const reg_id ) const -> std::vector { return m_prefix_tree.get_reversed_positions(m_registers.at(reg_id)); } diff --git a/tests/test-register-handler.cpp b/tests/test-register-handler.cpp index 8da726ca..7c413890 100644 --- a/tests/test-register-handler.cpp +++ b/tests/test-register-handler.cpp @@ -18,7 +18,7 @@ TEST_CASE("`RegisterHandler` tests", "[RegisterHandler]") { } constexpr uint32_t num_registers{5}; - for (uint32_t i{0}; i < num_registers; ++i) { + for (size_t i{0}; i < num_registers; ++i) { handler.add_register(i, 0); } From 27b324c762a0355bbac94a806f7c4e5147e29b83 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 09:21:06 -0500 Subject: [PATCH 137/144] Clang-tidy: Remove magic numbers + Fix headers. --- .../finite_automata/RegisterHandler.hpp | 2 +- tests/test-prefix-tree.cpp | 81 ++++++++++++------- tests/test-register-handler.cpp | 61 ++++++++------ 3 files changed, 87 insertions(+), 57 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index 6690fef0..2c245907 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -1,7 +1,7 @@ #ifndef LOG_SURGEON_FINITE_AUTOMATA_REGISTER_HANDLER_HPP #define LOG_SURGEON_FINITE_AUTOMATA_REGISTER_HANDLER_HPP -#include +#include #include #include diff --git a/tests/test-prefix-tree.cpp b/tests/test-prefix-tree.cpp index e11b749e..988961df 100644 --- a/tests/test-prefix-tree.cpp +++ b/tests/test-prefix-tree.cpp @@ -1,4 +1,6 @@ -#include +#include +#include +#include #include @@ -8,6 +10,18 @@ using log_surgeon::finite_automata::PrefixTree; using id_t = PrefixTree::id_t; using position_t = PrefixTree::position_t; +constexpr id_t cInvaidNodeId{100}; +constexpr position_t cInsertPos1{4}; +constexpr position_t cInsertPos2{7}; +constexpr position_t cInsertPos3{9}; +constexpr position_t cMaxPos{std::numeric_limits::max()}; +constexpr position_t cNegativePos1{-1}; +constexpr position_t cNegativePos2{-100}; +constexpr position_t cSetPos1{10}; +constexpr position_t cSetPos2{12}; +constexpr position_t cSetPos3{15}; +constexpr position_t cSetPos4{20}; + TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { SECTION("Newly constructed tree works correctly") { PrefixTree const tree; @@ -20,23 +34,25 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { PrefixTree tree; // Test basic insertions - auto const node_id_1{tree.insert(0, 4)}; - auto const node_id_2{tree.insert(node_id_1, 7)}; - auto const node_id_3{tree.insert(node_id_2, 9)}; - REQUIRE(std::vector{4} == tree.get_reversed_positions(node_id_1)); - REQUIRE(std::vector{7, 4} == tree.get_reversed_positions(node_id_2)); - REQUIRE(std::vector{9, 7, 4} == tree.get_reversed_positions(node_id_3)); + auto const node_id_1{tree.insert(0, cInsertPos1)}; + auto const node_id_2{tree.insert(node_id_1, cInsertPos2)}; + auto const node_id_3{tree.insert(node_id_2, cInsertPos3)}; + REQUIRE(std::vector{cInsertPos1} == tree.get_reversed_positions(node_id_1)); + REQUIRE(std::vector{cInsertPos2, cInsertPos1} + == tree.get_reversed_positions(node_id_2)); + REQUIRE(std::vector{cInsertPos3, cInsertPos2, cInsertPos1} + == tree.get_reversed_positions(node_id_3)); // Test insertion with large position values - auto const node_id_4{tree.insert(0, std::numeric_limits::max())}; - REQUIRE(std::numeric_limits::max() == tree.get_reversed_positions(node_id_4)[0] - ); + auto const node_id_4{tree.insert(0, cMaxPos)}; + REQUIRE(cMaxPos == tree.get_reversed_positions(node_id_4)[0]); // Test insertion with negative position values - auto const node_id_5{tree.insert(0, -1)}; - auto const node_id_6{tree.insert(node_id_5, -100)}; - REQUIRE(std::vector{-1} == tree.get_reversed_positions(node_id_5)); - REQUIRE(std::vector{-100, -1} == tree.get_reversed_positions(node_id_6)); + auto const node_id_5{tree.insert(0, cNegativePos1)}; + auto const node_id_6{tree.insert(node_id_5, cNegativePos2)}; + REQUIRE(std::vector{cNegativePos1} == tree.get_reversed_positions(node_id_5)); + REQUIRE(std::vector{cNegativePos2, cNegativePos1} + == tree.get_reversed_positions(node_id_6)); } SECTION("Invalid index access throws correctly") { @@ -56,36 +72,39 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { SECTION("Set position for a valid index works correctly") { PrefixTree tree; // Test that you can set the root node for sanity, although this value is not used - tree.set(0, 10); + tree.set(0, cSetPos1); // Test updates to different nodes - auto const node_id_1{tree.insert(0, 4)}; - auto const node_id_2{tree.insert(node_id_1, 7)}; - tree.set(node_id_1, 10); - tree.set(node_id_2, 12); - REQUIRE(std::vector{10} == tree.get_reversed_positions(node_id_1)); - REQUIRE(std::vector{12, 10} == tree.get_reversed_positions(node_id_2)); + auto const node_id_1{tree.insert(0, cInsertPos1)}; + auto const node_id_2{tree.insert(node_id_1, cInsertPos1)}; + tree.set(node_id_1, cSetPos1); + tree.set(node_id_2, cSetPos2); + REQUIRE(std::vector{cSetPos1} == tree.get_reversed_positions(node_id_1)); + REQUIRE(std::vector{cSetPos2, cSetPos1} + == tree.get_reversed_positions(node_id_2)); // Test multiple updates to the same node - tree.set(node_id_2, 15); - tree.set(node_id_2, 20); - REQUIRE(std::vector{20, 10} == tree.get_reversed_positions(node_id_2)); + tree.set(node_id_2, cSetPos3); + tree.set(node_id_2, cSetPos4); + REQUIRE(std::vector{cSetPos4, cSetPos1} + == tree.get_reversed_positions(node_id_2)); // Test that updates don't affect unrelated paths - auto const node_id_3{tree.insert(0, 30)}; - tree.set(node_id_3, 25); - REQUIRE(std::vector{10} == tree.get_reversed_positions(node_id_1)); - REQUIRE(std::vector{20, 10} == tree.get_reversed_positions(node_id_2)); + auto const node_id_3{tree.insert(0, cSetPos2)}; + tree.set(node_id_3, cSetPos3); + REQUIRE(std::vector{cSetPos1} == tree.get_reversed_positions(node_id_1)); + REQUIRE(std::vector{cSetPos4, cSetPos1} + == tree.get_reversed_positions(node_id_2)); } SECTION("Set position for an invalid index throws correctly") { PrefixTree tree; // Test setting position before any insertions - REQUIRE_THROWS_AS(tree.set(100, 20), std::out_of_range); + REQUIRE_THROWS_AS(tree.set(cInvaidNodeId, cSetPos4), std::out_of_range); // Test setting position just beyond valid range - auto const node_id_1{tree.insert(0, 4)}; - REQUIRE_THROWS_AS(tree.set(node_id_1 + 1, 20), std::out_of_range); + auto const node_id_1{tree.insert(0, cInsertPos1)}; + REQUIRE_THROWS_AS(tree.set(node_id_1 + 1, cSetPos4), std::out_of_range); } } diff --git a/tests/test-register-handler.cpp b/tests/test-register-handler.cpp index 7c413890..4f741961 100644 --- a/tests/test-register-handler.cpp +++ b/tests/test-register-handler.cpp @@ -1,4 +1,6 @@ -#include +#include +#include +#include #include @@ -6,53 +8,62 @@ #include using log_surgeon::finite_automata::RegisterHandler; -using std::make_unique; using position_t = log_surgeon::finite_automata::PrefixTree::position_t; -using std::unique_ptr; + +constexpr position_t cInitialPos{0}; +constexpr position_t cSetPos1{5}; +constexpr position_t cSetPos2{10}; +constexpr position_t cSetPos3{15}; +constexpr size_t cNumRegisters{5}; +constexpr size_t cRegId1{0}; +constexpr size_t cRegId2{1}; +constexpr size_t cRegId3{2}; +constexpr size_t cInvalidRegId{10}; TEST_CASE("`RegisterHandler` tests", "[RegisterHandler]") { RegisterHandler handler; SECTION("Initial state is empty") { - REQUIRE_THROWS_AS(handler.get_reversed_positions(0), std::out_of_range); + REQUIRE_THROWS_AS(handler.get_reversed_positions(cRegId1), std::out_of_range); } - constexpr uint32_t num_registers{5}; - for (size_t i{0}; i < num_registers; ++i) { - handler.add_register(i, 0); + for (size_t i{0}; i < cNumRegisters; ++i) { + handler.add_register(i, cInitialPos); } SECTION("Set register position correctly") { - handler.set_register(0, 5); - REQUIRE(std::vector{5} == handler.get_reversed_positions(0)); + handler.set_register(cRegId1, cSetPos1); + REQUIRE(std::vector{cSetPos1} == handler.get_reversed_positions(cRegId1)); } SECTION("Register relationships are maintained") { - handler.set_register(0, 5); - handler.set_register(1, 10); - handler.set_register(2, 15); + handler.set_register(cRegId1, cSetPos1); + handler.set_register(cRegId2, cSetPos2); + handler.set_register(cRegId3, cSetPos3); - auto positions{handler.get_reversed_positions(2)}; - REQUIRE(std::vector{15, 10, 5} == handler.get_reversed_positions(2)); + auto positions{handler.get_reversed_positions(cRegId3)}; + REQUIRE(std::vector{cSetPos3, cSetPos2, cSetPos1} + == handler.get_reversed_positions(cRegId3)); } SECTION("Copy register index correctly") { - handler.set_register(0, 5); - handler.copy_register(1, 0); - REQUIRE(std::vector{5} == handler.get_reversed_positions(1)); + handler.set_register(cRegId1, cSetPos1); + handler.copy_register(cRegId2, cRegId1); + REQUIRE(std::vector{cSetPos1} == handler.get_reversed_positions(cRegId2)); } SECTION("`append_position` appends position correctly") { - handler.set_register(0, 5); - handler.append_position(0, 7); - REQUIRE(std::vector{7, 5} == handler.get_reversed_positions(0)); + handler.set_register(cRegId1, cSetPos1); + handler.append_position(cRegId1, cSetPos2); + REQUIRE(std::vector{cSetPos2, cSetPos1} + == handler.get_reversed_positions(cRegId1)); } SECTION("Throws out of range correctly") { - REQUIRE_THROWS_AS(handler.set_register(10, 5), std::out_of_range); - REQUIRE_THROWS_AS(handler.copy_register(10, 1), std::out_of_range); - REQUIRE_THROWS_AS(handler.copy_register(0, 10), std::out_of_range); - REQUIRE_THROWS_AS(handler.append_position(10, 5), std::out_of_range); - REQUIRE_THROWS_AS(handler.get_reversed_positions(10), std::out_of_range); + REQUIRE_THROWS_AS(handler.set_register(cInvalidRegId, cSetPos1), std::out_of_range); + REQUIRE_THROWS_AS(handler.copy_register(cInvalidRegId, cRegId2), std::out_of_range); + REQUIRE_THROWS_AS(handler.copy_register(cRegId1, cInvalidRegId), std::out_of_range); + REQUIRE_THROWS_AS(handler.append_position(cInvalidRegId, cSetPos1), std::out_of_range); + REQUIRE_THROWS_AS(handler.get_reversed_positions(cInvalidRegId), std::out_of_range); } } From f651a24f677246a56c3262eff5f5b71a5af02604 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 09:32:51 -0500 Subject: [PATCH 138/144] Reduce complexity for clang-tidy. --- tests/test-register-handler.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/test-register-handler.cpp b/tests/test-register-handler.cpp index 4f741961..4ec3ccf1 100644 --- a/tests/test-register-handler.cpp +++ b/tests/test-register-handler.cpp @@ -20,6 +20,14 @@ constexpr size_t cRegId2{1}; constexpr size_t cRegId3{2}; constexpr size_t cInvalidRegId{10}; +namespace { +auto add_register_to_handler(RegisterHandler& handler) -> void { + for (size_t i{0}; i < cNumRegisters; ++i) { + handler.add_register(i, 0); + } +} +} // namespace + TEST_CASE("`RegisterHandler` tests", "[RegisterHandler]") { RegisterHandler handler; @@ -27,9 +35,7 @@ TEST_CASE("`RegisterHandler` tests", "[RegisterHandler]") { REQUIRE_THROWS_AS(handler.get_reversed_positions(cRegId1), std::out_of_range); } - for (size_t i{0}; i < cNumRegisters; ++i) { - handler.add_register(i, cInitialPos); - } + add_register_to_handler(handler); SECTION("Set register position correctly") { handler.set_register(cRegId1, cSetPos1); From fc6f4262a9bec09a98da6e89a619c8082aa8164f Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 09:37:07 -0500 Subject: [PATCH 139/144] Add negative pos test case in test-register-handler.cpp. --- tests/test-register-handler.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test-register-handler.cpp b/tests/test-register-handler.cpp index 4ec3ccf1..b3a71f37 100644 --- a/tests/test-register-handler.cpp +++ b/tests/test-register-handler.cpp @@ -11,6 +11,8 @@ using log_surgeon::finite_automata::RegisterHandler; using position_t = log_surgeon::finite_automata::PrefixTree::position_t; constexpr position_t cInitialPos{0}; +constexpr position_t cNegativePos1{-1}; +constexpr position_t cNegativePos2{-100}; constexpr position_t cSetPos1{5}; constexpr position_t cSetPos2{10}; constexpr position_t cSetPos3{15}; @@ -72,4 +74,12 @@ TEST_CASE("`RegisterHandler` tests", "[RegisterHandler]") { REQUIRE_THROWS_AS(handler.append_position(cInvalidRegId, cSetPos1), std::out_of_range); REQUIRE_THROWS_AS(handler.get_reversed_positions(cInvalidRegId), std::out_of_range); } + + SECTION("Handles negative position values correctly") { + handler.set_register(cRegId1, cNegativePos1); + handler.append_position(cRegId1, cSetPos1); + handler.append_position(cRegId1, cNegativePos2); + REQUIRE(std::vector{cNegativePos2, cSetPos1, cNegativePos1} + == handler.get_reversed_positions(cRegId1)); + } } From c8fb570a95a58e26d422009d86154c4b2f45d80a Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 09:39:45 -0500 Subject: [PATCH 140/144] Alternate b/w positive and negative positions in test-prefix-tree negative position test as this is what is seen in practice when using negative positions. --- tests/test-prefix-tree.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/test-prefix-tree.cpp b/tests/test-prefix-tree.cpp index 988961df..eb748a58 100644 --- a/tests/test-prefix-tree.cpp +++ b/tests/test-prefix-tree.cpp @@ -49,10 +49,13 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { // Test insertion with negative position values auto const node_id_5{tree.insert(0, cNegativePos1)}; - auto const node_id_6{tree.insert(node_id_5, cNegativePos2)}; + auto const node_id_6{tree.insert(node_id_5, cInsertPos1)}; + auto const node_id_7{tree.insert(node_id_6, cNegativePos2)}; REQUIRE(std::vector{cNegativePos1} == tree.get_reversed_positions(node_id_5)); - REQUIRE(std::vector{cNegativePos2, cNegativePos1} + REQUIRE(std::vector{cInsertPos1, cNegativePos1} == tree.get_reversed_positions(node_id_6)); + REQUIRE(std::vector{cNegativePos2, cInsertPos1, cNegativePos1} + == tree.get_reversed_positions(node_id_7)); } SECTION("Invalid index access throws correctly") { From 1f66918ea8aae2d4b1030d55cfc9ad986f4dd233 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 09:58:53 -0500 Subject: [PATCH 141/144] Add cRootId and size() to PrefixTree. --- .../finite_automata/PrefixTree.hpp | 4 +++ tests/test-prefix-tree.cpp | 28 +++++++++++-------- tests/test-register-handler.cpp | 2 +- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index d6f74eef..7a76c4a4 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -22,6 +22,8 @@ class PrefixTree { using id_t = uint32_t; using position_t = int32_t; + static constexpr id_t cRootId{0}; + PrefixTree() : m_nodes{{std::nullopt, -1}} {} /** @@ -43,6 +45,8 @@ class PrefixTree { m_nodes.at(node_id).set_position(position); } + [[nodiscard]] auto size() const -> size_t { return m_nodes.size(); } + /** * @param node_id The index of the node. * @return A vector containing positions along the path defined by `node_id`, in reverse order, diff --git a/tests/test-prefix-tree.cpp b/tests/test-prefix-tree.cpp index eb748a58..47262bdd 100644 --- a/tests/test-prefix-tree.cpp +++ b/tests/test-prefix-tree.cpp @@ -10,6 +10,7 @@ using log_surgeon::finite_automata::PrefixTree; using id_t = PrefixTree::id_t; using position_t = PrefixTree::position_t; +constexpr auto cRootId{PrefixTree::cRootId}; constexpr id_t cInvaidNodeId{100}; constexpr position_t cInsertPos1{4}; constexpr position_t cInsertPos2{7}; @@ -21,20 +22,22 @@ constexpr position_t cSetPos1{10}; constexpr position_t cSetPos2{12}; constexpr position_t cSetPos3{15}; constexpr position_t cSetPos4{20}; +constexpr position_t cTreeSize1{4}; +constexpr position_t cTreeSize2{8}; TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { SECTION("Newly constructed tree works correctly") { PrefixTree const tree; // A newly constructed tree should return no positions as the root node is ignored - REQUIRE(tree.get_reversed_positions(0).empty()); + REQUIRE(tree.get_reversed_positions(cRootId).empty()); } SECTION("Inserting nodes into the prefix tree works correctly") { PrefixTree tree; // Test basic insertions - auto const node_id_1{tree.insert(0, cInsertPos1)}; + auto const node_id_1{tree.insert(cRootId, cInsertPos1)}; auto const node_id_2{tree.insert(node_id_1, cInsertPos2)}; auto const node_id_3{tree.insert(node_id_2, cInsertPos3)}; REQUIRE(std::vector{cInsertPos1} == tree.get_reversed_positions(node_id_1)); @@ -42,13 +45,14 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { == tree.get_reversed_positions(node_id_2)); REQUIRE(std::vector{cInsertPos3, cInsertPos2, cInsertPos1} == tree.get_reversed_positions(node_id_3)); + REQUIRE(cTreeSize1 == tree.size()); // Test insertion with large position values - auto const node_id_4{tree.insert(0, cMaxPos)}; + auto const node_id_4{tree.insert(cRootId, cMaxPos)}; REQUIRE(cMaxPos == tree.get_reversed_positions(node_id_4)[0]); // Test insertion with negative position values - auto const node_id_5{tree.insert(0, cNegativePos1)}; + auto const node_id_5{tree.insert(cRootId, cNegativePos1)}; auto const node_id_6{tree.insert(node_id_5, cInsertPos1)}; auto const node_id_7{tree.insert(node_id_6, cNegativePos2)}; REQUIRE(std::vector{cNegativePos1} == tree.get_reversed_positions(node_id_5)); @@ -56,15 +60,15 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { == tree.get_reversed_positions(node_id_6)); REQUIRE(std::vector{cNegativePos2, cInsertPos1, cNegativePos1} == tree.get_reversed_positions(node_id_7)); + REQUIRE(cTreeSize2 == tree.size()); } SECTION("Invalid index access throws correctly") { PrefixTree tree; - REQUIRE_THROWS_AS(tree.get_reversed_positions(1), std::out_of_range); + REQUIRE_THROWS_AS(tree.get_reversed_positions(tree.size()), std::out_of_range); - tree.insert(0, 4); - REQUIRE_THROWS_AS(tree.get_reversed_positions(2), std::out_of_range); - REQUIRE_THROWS_AS(tree.get_reversed_positions(3), std::out_of_range); + tree.insert(cRootId, cInsertPos1); + REQUIRE_THROWS_AS(tree.get_reversed_positions(tree.size()), std::out_of_range); REQUIRE_THROWS_AS( tree.get_reversed_positions(std::numeric_limits::max()), @@ -75,10 +79,10 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { SECTION("Set position for a valid index works correctly") { PrefixTree tree; // Test that you can set the root node for sanity, although this value is not used - tree.set(0, cSetPos1); + tree.set(cRootId, cSetPos1); // Test updates to different nodes - auto const node_id_1{tree.insert(0, cInsertPos1)}; + auto const node_id_1{tree.insert(cRootId, cInsertPos1)}; auto const node_id_2{tree.insert(node_id_1, cInsertPos1)}; tree.set(node_id_1, cSetPos1); tree.set(node_id_2, cSetPos2); @@ -93,7 +97,7 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { == tree.get_reversed_positions(node_id_2)); // Test that updates don't affect unrelated paths - auto const node_id_3{tree.insert(0, cSetPos2)}; + auto const node_id_3{tree.insert(cRootId, cSetPos2)}; tree.set(node_id_3, cSetPos3); REQUIRE(std::vector{cSetPos1} == tree.get_reversed_positions(node_id_1)); REQUIRE(std::vector{cSetPos4, cSetPos1} @@ -107,7 +111,7 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { REQUIRE_THROWS_AS(tree.set(cInvaidNodeId, cSetPos4), std::out_of_range); // Test setting position just beyond valid range - auto const node_id_1{tree.insert(0, cInsertPos1)}; + auto const node_id_1{tree.insert(cRootId, cInsertPos1)}; REQUIRE_THROWS_AS(tree.set(node_id_1 + 1, cSetPos4), std::out_of_range); } } diff --git a/tests/test-register-handler.cpp b/tests/test-register-handler.cpp index b3a71f37..9cec3ff5 100644 --- a/tests/test-register-handler.cpp +++ b/tests/test-register-handler.cpp @@ -25,7 +25,7 @@ constexpr size_t cInvalidRegId{10}; namespace { auto add_register_to_handler(RegisterHandler& handler) -> void { for (size_t i{0}; i < cNumRegisters; ++i) { - handler.add_register(i, 0); + handler.add_register(i, cInitialPos); } } } // namespace From a388c809a60ed9a8072e6301fc2e17354a096972 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 10:06:41 -0500 Subject: [PATCH 142/144] Update note. --- src/log_surgeon/finite_automata/RegisterHandler.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp index 2c245907..86a06f21 100644 --- a/src/log_surgeon/finite_automata/RegisterHandler.hpp +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -12,8 +12,8 @@ namespace log_surgeon::finite_automata { * The register handler also contains a vector of registers, and performs the set, copy, and append * operations for these registers. * - * Note: for efficiency these registers may be re-used, but are not required to be re-initialized. - * It is the responsibility of the DFA to set the register value when needed. + * NOTE: For efficiency, registers are not initialized when lexing a new string; instead, it is the + * responsibility of the DFA to set the register values when needed. */ class RegisterHandler { public: From 340eaf7ae986bdd09fa692776c7b9eddd3780e27 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 10:10:01 -0500 Subject: [PATCH 143/144] Update docstring. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index 7a76c4a4..a111cee7 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -49,8 +49,8 @@ class PrefixTree { /** * @param node_id The index of the node. - * @return A vector containing positions along the path defined by `node_id`, in reverse order, - * i.e., [index, root). + * @return A vector containing positions in order from the given index up to but not including + * the root node. * @throw std::out_of_range if the index is out of range. */ [[nodiscard]] auto get_reversed_positions(id_t node_id) const -> std::vector; From 22cf931a3333a32c6543458cc0be1ca1ff2bccf1 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 4 Dec 2024 10:12:10 -0500 Subject: [PATCH 144/144] Fix typo. --- src/log_surgeon/finite_automata/PrefixTree.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index a111cee7..815c7dda 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -9,7 +9,7 @@ namespace log_surgeon::finite_automata { /** * Represents a prefix tree to store register data during TDFA simulation. Each node in the tree - * stores a single posiiton in the lexed string. Each path from the root to an index corresponds to + * stores a single position in the lexed string. Each path from the root to an index corresponds to * a sequence of positions for an individual tag: * - Positive position node: Indicates the tag was matched at the position. * - Negative position node: Indicates the tag was unmatched. If a negative node is the entire path,