From 6bf8c014841efbaa48e8f9f3cf73491571ba13ed Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 7 Jan 2025 07:50:44 -0500 Subject: [PATCH 1/2] Fix test-NFA to test-nfa and ByteNFA to ByteNfa. --- tests/CMakeLists.txt | 2 +- tests/{test-NFA.cpp => test-nfa.cpp} | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename tests/{test-NFA.cpp => test-nfa.cpp} (98%) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 0551615..b0adc6c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -24,7 +24,7 @@ set( ../src/log_surgeon/Token.hpp ) -set(SOURCES_TESTS test-lexer.cpp test-NFA.cpp test-prefix-tree.cpp test-register-handler.cpp test-tag.cpp) +set(SOURCES_TESTS test-lexer.cpp test-nfa.cpp test-prefix-tree.cpp test-register-handler.cpp test-tag.cpp) add_executable(unit-test ${SOURCES_LOG_SURGEON} ${SOURCES_TESTS}) target_link_libraries(unit-test PRIVATE Catch2::Catch2WithMain log_surgeon::log_surgeon) diff --git a/tests/test-NFA.cpp b/tests/test-nfa.cpp similarity index 98% rename from tests/test-NFA.cpp rename to tests/test-nfa.cpp index 160d421..719a168 100644 --- a/tests/test-NFA.cpp +++ b/tests/test-nfa.cpp @@ -21,7 +21,7 @@ using std::stringstream; using std::vector; using ByteLexicalRule = log_surgeon::LexicalRule; -using ByteNFA = log_surgeon::finite_automata::Nfa; +using ByteNfa = log_surgeon::finite_automata::Nfa; using RegexASTCatByte = log_surgeon::finite_automata::RegexASTCat; using RegexASTCaptureByte = log_surgeon::finite_automata::RegexASTCapture; using RegexASTGroupByte = log_surgeon::finite_automata::RegexASTGroup; @@ -44,7 +44,7 @@ TEST_CASE("Test NFA", "[NFA]") { auto& capture_rule_ast = dynamic_cast(*schema_ast->m_schema_vars[0]); vector rules; rules.emplace_back(0, std::move(capture_rule_ast.m_regex_ptr)); - ByteNFA const nfa{std::move(rules)}; + ByteNfa const nfa{std::move(rules)}; // Compare against expected output string expected_serialized_nfa = "0:byte_transitions={A-->1,Z-->2}," From 5dc5953848e157b7b298631ecc23826ee43b450c Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 7 Jan 2025 08:05:41 -0500 Subject: [PATCH 2/2] Combine NfaStateType and DfaStateType into StateType. --- CMakeLists.txt | 3 +-- src/log_surgeon/finite_automata/DfaState.hpp | 18 ++++++++--------- .../finite_automata/DfaStateType.hpp | 13 ------------ src/log_surgeon/finite_automata/NfaState.hpp | 20 +++++++++---------- .../finite_automata/NfaStateType.hpp | 13 ------------ src/log_surgeon/finite_automata/StateType.hpp | 13 ++++++++++++ tests/CMakeLists.txt | 2 +- 7 files changed, 34 insertions(+), 48 deletions(-) delete mode 100644 src/log_surgeon/finite_automata/DfaStateType.hpp delete mode 100644 src/log_surgeon/finite_automata/NfaStateType.hpp create mode 100644 src/log_surgeon/finite_automata/StateType.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index ceb932d..9ad59f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,11 +99,10 @@ set(SOURCE_FILES src/log_surgeon/finite_automata/Dfa.hpp src/log_surgeon/finite_automata/DfaState.hpp src/log_surgeon/finite_automata/DfaStatePair.hpp - src/log_surgeon/finite_automata/DfaStateType.hpp src/log_surgeon/finite_automata/Nfa.hpp src/log_surgeon/finite_automata/NfaState.hpp - src/log_surgeon/finite_automata/NfaStateType.hpp src/log_surgeon/finite_automata/RegisterHandler.hpp + src/log_surgeon/finite_automata/StateType.hpp src/log_surgeon/finite_automata/Tag.hpp src/log_surgeon/finite_automata/TaggedTransition.hpp src/log_surgeon/finite_automata/UnicodeIntervalTree.hpp diff --git a/src/log_surgeon/finite_automata/DfaState.hpp b/src/log_surgeon/finite_automata/DfaState.hpp index f25b25a..76a45f2 100644 --- a/src/log_surgeon/finite_automata/DfaState.hpp +++ b/src/log_surgeon/finite_automata/DfaState.hpp @@ -9,17 +9,17 @@ #include #include -#include +#include #include namespace log_surgeon::finite_automata { -template +template class DfaState; -using ByteDfaState = DfaState; -using Utf8DfaState = DfaState; +using ByteDfaState = DfaState; +using Utf8DfaState = DfaState; -template +template class DfaState { public: using Tree = UnicodeIntervalTree; @@ -51,14 +51,14 @@ class DfaState { private: std::vector m_matching_variable_ids; DfaState* m_bytes_transition[cSizeOfByte]; - // NOTE: We don't need m_tree_transitions for the `state_type == DfaStateType::Byte` case, so we + // NOTE: We don't need m_tree_transitions for the `state_type == StateType::Byte` case, so we // use an empty class (`std::tuple<>`) in that case. - std::conditional_t> m_tree_transitions; + std::conditional_t> m_tree_transitions; }; -template +template auto DfaState::next(uint32_t character) const -> DfaState* { - if constexpr (DfaStateType::Byte == state_type) { + if constexpr (StateType::Byte == state_type) { return m_bytes_transition[character]; } else { if (character < cSizeOfByte) { diff --git a/src/log_surgeon/finite_automata/DfaStateType.hpp b/src/log_surgeon/finite_automata/DfaStateType.hpp deleted file mode 100644 index 017134c..0000000 --- a/src/log_surgeon/finite_automata/DfaStateType.hpp +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef LOG_SURGEON_FINITE_AUTOMATA_DFA_STATE_TYPE -#define LOG_SURGEON_FINITE_AUTOMATA_DFA_STATE_TYPE - -#include - -namespace log_surgeon::finite_automata { -enum class DfaStateType : uint8_t { - Byte, - Utf8 -}; -} // namespace log_surgeon::finite_automata - -#endif // LOG_SURGEON_FINITE_AUTOMATA_DFA_STATE_TYPE diff --git a/src/log_surgeon/finite_automata/NfaState.hpp b/src/log_surgeon/finite_automata/NfaState.hpp index 3dc4285..09676c9 100644 --- a/src/log_surgeon/finite_automata/NfaState.hpp +++ b/src/log_surgeon/finite_automata/NfaState.hpp @@ -12,18 +12,18 @@ #include -#include +#include #include #include namespace log_surgeon::finite_automata { -template +template class NfaState; -using ByteNfaState = NfaState; -using Utf8NfaState = NfaState; +using ByteNfaState = NfaState; +using Utf8NfaState = NfaState; -template +template class NfaState { public: using Tree = UnicodeIntervalTree; @@ -113,12 +113,12 @@ class NfaState { std::vector m_epsilon_transitions; std::array, cSizeOfByte> m_bytes_transitions; // NOTE: We don't need m_tree_transitions for the `stateType == - // NfaStateType::Byte` case, so we use an empty class (`std::tuple<>`) + // StateType::Byte` case, so we use an empty class (`std::tuple<>`) // in that case. - std::conditional_t> m_tree_transitions; + std::conditional_t> m_tree_transitions; }; -template +template auto NfaState::add_interval(Interval interval, NfaState* dest_state) -> void { if (interval.first < cSizeOfByte) { uint32_t const bound = std::min(interval.second, cSizeOfByte - 1); @@ -127,7 +127,7 @@ auto NfaState::add_interval(Interval interval, NfaState* dest_state) } interval.first = bound + 1; } - if constexpr (NfaStateType::Utf8 == state_type) { + if constexpr (StateType::Utf8 == state_type) { if (interval.second < cSizeOfByte) { return; } @@ -165,7 +165,7 @@ auto NfaState::add_interval(Interval interval, NfaState* dest_state) } } -template +template auto NfaState::serialize(std::unordered_map const& state_ids ) const -> std::optional { std::vector byte_transitions; diff --git a/src/log_surgeon/finite_automata/NfaStateType.hpp b/src/log_surgeon/finite_automata/NfaStateType.hpp deleted file mode 100644 index 1cc56de..0000000 --- a/src/log_surgeon/finite_automata/NfaStateType.hpp +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef LOG_SURGEON_FINITE_AUTOMATA_NFA_STATE_TYPE -#define LOG_SURGEON_FINITE_AUTOMATA_NFA_STATE_TYPE - -#include - -namespace log_surgeon::finite_automata { -enum class NfaStateType : uint8_t { - Byte, - Utf8 -}; -} // namespace log_surgeon::finite_automata - -#endif // LOG_SURGEON_FINITE_AUTOMATA_NFA_STATE_TYPE diff --git a/src/log_surgeon/finite_automata/StateType.hpp b/src/log_surgeon/finite_automata/StateType.hpp new file mode 100644 index 0000000..230bfa4 --- /dev/null +++ b/src/log_surgeon/finite_automata/StateType.hpp @@ -0,0 +1,13 @@ +#ifndef LOG_SURGEON_FINITE_AUTOMATA_STATE_TYPE +#define LOG_SURGEON_FINITE_AUTOMATA_STATE_TYPE + +#include + +namespace log_surgeon::finite_automata { +enum class StateType : uint8_t { + Byte, + Utf8 +}; +} // namespace log_surgeon::finite_automata + +#endif // LOG_SURGEON_FINITE_AUTOMATA_STATE_TYPE diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b0adc6c..652eceb 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -7,8 +7,8 @@ set( ../src/log_surgeon/finite_automata/RegexAST.hpp ../src/log_surgeon/finite_automata/Nfa.hpp ../src/log_surgeon/finite_automata/NfaState.hpp - ../src/log_surgeon/finite_automata/NfaStateType.hpp ../src/log_surgeon/finite_automata/RegisterHandler.hpp + ../src/log_surgeon/finite_automata/StateType.hpp ../src/log_surgeon/finite_automata/Tag.hpp ../src/log_surgeon/finite_automata/TaggedTransition.hpp ../src/log_surgeon/Lalr1Parser.cpp