-
Notifications
You must be signed in to change notification settings - Fork 72
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
First complete version of regex to wildcard utils
- Loading branch information
1 parent
6d069cf
commit b24e3a1
Showing
9 changed files
with
1,088 additions
and
235 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,22 @@ | ||
set( | ||
REGEX_UTILS_HEADER_LIST | ||
"ErrorCode.hpp" | ||
"RegexToWildcardTranslatorConfig.hpp" | ||
"constants.hpp" | ||
"regex_utils.hpp" | ||
) | ||
add_library( | ||
regex_utils | ||
regex_utils.cpp | ||
regex_utils_anchors.cpp | ||
ErrorCode.cpp | ||
${REGEX_UTILS_HEADER_LIST} | ||
) | ||
add_library(clp::regex_utils ALIAS regex_utils) | ||
target_include_directories(regex_utils PUBLIC ../) | ||
target_include_directories(regex_utils | ||
PUBLIC | ||
../ | ||
PRIVATE | ||
"${PROJECT_SOURCE_DIR}/submodules" | ||
) | ||
target_compile_features(regex_utils PRIVATE cxx_std_20) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
#include "regex_utils/ErrorCode.hpp" | ||
|
||
#include <string> | ||
#include <string_view> | ||
#include <system_error> | ||
|
||
using std::error_category; | ||
using std::error_code; | ||
using std::string; | ||
using std::string_view; | ||
|
||
namespace clp::regex_utils { | ||
|
||
/** | ||
* Class for giving the error codes more detailed string descriptions. | ||
* This class does not need to be seen outside the std error code wrapper implementation. | ||
*/ | ||
class ErrorCodeCategory : public error_category { | ||
public: | ||
/** | ||
* @return The class of errors. | ||
*/ | ||
[[nodiscard]] char const* name() const noexcept override; | ||
|
||
/** | ||
* @param The error code encoded in int. | ||
* @return The descriptive message for the error. | ||
*/ | ||
[[nodiscard]] string message(int ev) const override; | ||
}; | ||
|
||
auto ErrorCodeCategory::name() const noexcept -> char const* { | ||
return "regex utility"; | ||
} | ||
|
||
auto ErrorCodeCategory::message(int ev) const -> string { | ||
switch (static_cast<ErrorCode>(ev)) { | ||
case ErrorCode::Success: | ||
return "Success."; | ||
|
||
case ErrorCode::IllegalState: | ||
return "Unrecognized state."; | ||
|
||
case ErrorCode::Star: | ||
return "Failed to translate due to metachar `*` (zero or more occurences)."; | ||
|
||
case ErrorCode::Plus: | ||
return "Failed to translate due to metachar `+` (one or more occurences)."; | ||
|
||
case ErrorCode::Question: | ||
return "Currently does not support returning a list of wildcard translations. The " | ||
"metachar `?` (lazy match) may be supported in the future."; | ||
|
||
case ErrorCode::Pipe: | ||
return "Currently does not support returning a list of wildcard translations. The " | ||
"regex OR condition feature may be supported in the future."; | ||
|
||
case ErrorCode::Caret: | ||
return "Failed to translate due to start anchor `^` in the middle of the string."; | ||
|
||
case ErrorCode::Dollar: | ||
return "Failed to translate due to end anchor `$` in the middle of the string."; | ||
|
||
case ErrorCode::DisallowedEscapeSequence: | ||
return "Disallowed escape sequence."; | ||
|
||
case ErrorCode::UnmatchedParenthesis: | ||
return "Unmatched opening `(` or closing `)`."; | ||
|
||
case ErrorCode::UnsupportedCharsets: | ||
return "Currently only supports case-insensitive single-char charset (i.e. [aA] [bB])."; | ||
|
||
case ErrorCode::IncompleteCharsetStructure: | ||
return "Unmatched closing `]` at the end of the string."; | ||
|
||
case ErrorCode::UnsupportedQuantifier: | ||
return "Currently only supports exact positive number of repetitions in regex syntax."; | ||
|
||
case ErrorCode::TokenUnquantifiable: | ||
return "The preceding token is not quantifiable."; | ||
|
||
default: | ||
return "(unrecognized error)"; | ||
} | ||
} | ||
|
||
ErrorCodeCategory const cTheErrorCodeCategory{}; | ||
|
||
auto make_error_code(ErrorCode e) -> error_code { | ||
return {static_cast<int>(e), cTheErrorCodeCategory}; | ||
} | ||
|
||
} // namespace clp::regex_utils |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#ifndef CLP_REGEX_UTILS_ERRORCODE_HPP | ||
#define CLP_REGEX_UTILS_ERRORCODE_HPP | ||
|
||
#include <cstdint> | ||
#include <system_error> | ||
#include <type_traits> | ||
|
||
namespace clp::regex_utils { | ||
|
||
/** | ||
* Enum class for propagating and handling various regex utility errors. | ||
* More detailed descriptions can be found in ErrorCode.cpp. | ||
*/ | ||
enum class ErrorCode : uint8_t { | ||
Success = 0, | ||
IllegalState, | ||
Star, | ||
Plus, | ||
Question, | ||
Pipe, | ||
Caret, | ||
Dollar, | ||
DisallowedEscapeSequence, | ||
UnmatchedParenthesis, | ||
UnsupportedCharsets, | ||
IncompleteCharsetStructure, | ||
UnsupportedQuantifier, | ||
TokenUnquantifiable, | ||
}; | ||
|
||
/** | ||
* Wrapper function to turn a regular enum class into an std::error_code. | ||
* | ||
* @param An error code enum. | ||
* @return The corresponding std::error_code type variable. | ||
*/ | ||
[[nodiscard]] auto make_error_code(ErrorCode ec) -> std::error_code; | ||
|
||
} // namespace clp::regex_utils | ||
|
||
namespace std { | ||
template <> | ||
struct is_error_code_enum<clp::regex_utils::ErrorCode> : true_type {}; | ||
} // namespace std | ||
|
||
#endif // CLP_REGEX_UTILS_ERRORCODE_HPP |
42 changes: 42 additions & 0 deletions
42
components/core/src/clp/regex_utils/RegexToWildcardTranslatorConfig.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#ifndef CLP_REGEX_UTILS_REGEXTOWILDCARDTRANSLATORCONFIG_HPP | ||
#define CLP_REGEX_UTILS_REGEXTOWILDCARDTRANSLATORCONFIG_HPP | ||
|
||
namespace clp::regex_utils { | ||
|
||
class RegexToWildcardTranslatorConfig { | ||
public: | ||
// Constructors | ||
RegexToWildcardTranslatorConfig() = default; | ||
|
||
// Getters | ||
[[nodiscard]] auto case_insensitive_wildcard() const -> bool { | ||
return m_case_insensitive_wildcard; | ||
} | ||
|
||
[[nodiscard]] auto allow_anchors() const -> bool { return m_allow_anchors; } | ||
|
||
[[nodiscard]] auto add_prefix_suffix_wildcards() const -> bool { | ||
return m_add_prefix_suffix_wildcards; | ||
} | ||
|
||
// Setters | ||
void set_case_insensitive_wildcard(bool case_insensitive_wildcard) { | ||
m_case_insensitive_wildcard = case_insensitive_wildcard; | ||
} | ||
|
||
void set_allow_anchors(bool allow_anchors) { m_allow_anchors = allow_anchors; } | ||
|
||
void set_add_prefix_suffix_wildcards(bool add_prefix_suffix_wildcards) { | ||
m_add_prefix_suffix_wildcards = add_prefix_suffix_wildcards; | ||
} | ||
|
||
private: | ||
// Variables | ||
bool m_case_insensitive_wildcard = false; | ||
bool m_allow_anchors = true; | ||
bool m_add_prefix_suffix_wildcards = false; | ||
}; | ||
|
||
} // namespace clp::regex_utils | ||
|
||
#endif // CLP_REGEX_UTILS_REGEXTOWILDCARDTRANSLATORCONFIG_HPP |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#ifndef CLP_REGEX_UTILS_CONSTANTS_HPP | ||
#define CLP_REGEX_UTILS_CONSTANTS_HPP | ||
|
||
#include <array> | ||
#include <cstddef> | ||
#include <string_view> | ||
|
||
namespace clp::regex_utils { | ||
|
||
constexpr size_t cCharBitarraySize = 128; | ||
|
||
/** | ||
* Create an ASCII character lookup table (bit array) at compile time. | ||
* | ||
* @param char_str A string that contains the characters to look up. | ||
* @return The lookup table as bit array | ||
*/ | ||
[[nodiscard]] constexpr auto create_char_bit_array(std::string_view char_str | ||
) -> std::array<bool, cCharBitarraySize> { | ||
std::array<bool, cCharBitarraySize> bit_array{}; | ||
bit_array.fill(false); | ||
for (char const ch : char_str) { | ||
bit_array.at(ch) = true; | ||
} | ||
return bit_array; | ||
} | ||
|
||
constexpr char cZeroOrMoreCharsWildcard{'*'}; | ||
constexpr char cSingleCharWildcard{'?'}; | ||
constexpr char cRegexZeroOrMore{'*'}; | ||
constexpr char cRegexOneOrMore{'+'}; | ||
constexpr char cRegexZeroOrOne{'+'}; | ||
constexpr char cRegexStartAnchor{'^'}; | ||
constexpr char cRegexEndAnchor{'$'}; | ||
constexpr char cEscapeChar{'\\'}; | ||
constexpr char cCharsetNegate{'^'}; | ||
|
||
// This is a more complete set of meta characters than necessary, as the user might not be fully | ||
// knowledgeable on which meta characters to escape, and may introduce unnecessary escape sequences. | ||
constexpr auto cRegexEscapeSeqAcceptedMetaChars = create_char_bit_array("^$.*{}[]()+|?<>-_/=!\\"); | ||
// This is the set of meta characters that need escaping in the wildcard syntax. | ||
constexpr auto cRegexEscapeSeqWildcardOnlyMetaChars = create_char_bit_array("?*\\"); | ||
// This is the set of meta characters that need escaping in the character set. | ||
constexpr auto cRegexCharsetEscapeSeqMetaChars = create_char_bit_array("^-]\\"); | ||
|
||
} // namespace clp::regex_utils | ||
|
||
#endif // CLP_REGEX_UTILS_CONSTANTS_HPP |
Oops, something went wrong.