Skip to content

Commit

Permalink
First complete version of regex to wildcard utils
Browse files Browse the repository at this point in the history
  • Loading branch information
Bill-hbrhbr committed Jul 13, 2024
1 parent 6d069cf commit b24e3a1
Show file tree
Hide file tree
Showing 9 changed files with 1,088 additions and 235 deletions.
12 changes: 11 additions & 1 deletion components/core/src/clp/regex_utils/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,22 @@
set(
REGEX_UTILS_HEADER_LIST
"ErrorCode.hpp"
"RegexToWildcardTranslatorConfig.hpp"
"constants.hpp"
"regex_utils.hpp"
)
add_library(
regex_utils
regex_utils.cpp
regex_utils_anchors.cpp
ErrorCode.cpp
${REGEX_UTILS_HEADER_LIST}
)
add_library(clp::regex_utils ALIAS regex_utils)
target_include_directories(regex_utils PUBLIC ../)
target_include_directories(regex_utils
PUBLIC
../
PRIVATE
"${PROJECT_SOURCE_DIR}/submodules"
)
target_compile_features(regex_utils PRIVATE cxx_std_20)
93 changes: 93 additions & 0 deletions components/core/src/clp/regex_utils/ErrorCode.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#include "regex_utils/ErrorCode.hpp"

#include <string>
#include <string_view>
#include <system_error>

using std::error_category;
using std::error_code;
using std::string;
using std::string_view;

namespace clp::regex_utils {

/**
* Class for giving the error codes more detailed string descriptions.
* This class does not need to be seen outside the std error code wrapper implementation.
*/
class ErrorCodeCategory : public error_category {
public:
/**
* @return The class of errors.
*/
[[nodiscard]] char const* name() const noexcept override;

/**
* @param The error code encoded in int.
* @return The descriptive message for the error.
*/
[[nodiscard]] string message(int ev) const override;
};

auto ErrorCodeCategory::name() const noexcept -> char const* {
return "regex utility";
}

auto ErrorCodeCategory::message(int ev) const -> string {
switch (static_cast<ErrorCode>(ev)) {
case ErrorCode::Success:
return "Success.";

case ErrorCode::IllegalState:
return "Unrecognized state.";

case ErrorCode::Star:
return "Failed to translate due to metachar `*` (zero or more occurences).";

case ErrorCode::Plus:
return "Failed to translate due to metachar `+` (one or more occurences).";

case ErrorCode::Question:
return "Currently does not support returning a list of wildcard translations. The "
"metachar `?` (lazy match) may be supported in the future.";

case ErrorCode::Pipe:
return "Currently does not support returning a list of wildcard translations. The "
"regex OR condition feature may be supported in the future.";

case ErrorCode::Caret:
return "Failed to translate due to start anchor `^` in the middle of the string.";

case ErrorCode::Dollar:
return "Failed to translate due to end anchor `$` in the middle of the string.";

case ErrorCode::DisallowedEscapeSequence:
return "Disallowed escape sequence.";

case ErrorCode::UnmatchedParenthesis:
return "Unmatched opening `(` or closing `)`.";

case ErrorCode::UnsupportedCharsets:
return "Currently only supports case-insensitive single-char charset (i.e. [aA] [bB]).";

case ErrorCode::IncompleteCharsetStructure:
return "Unmatched closing `]` at the end of the string.";

case ErrorCode::UnsupportedQuantifier:
return "Currently only supports exact positive number of repetitions in regex syntax.";

case ErrorCode::TokenUnquantifiable:
return "The preceding token is not quantifiable.";

default:
return "(unrecognized error)";
}
}

ErrorCodeCategory const cTheErrorCodeCategory{};

auto make_error_code(ErrorCode e) -> error_code {
return {static_cast<int>(e), cTheErrorCodeCategory};
}

} // namespace clp::regex_utils
46 changes: 46 additions & 0 deletions components/core/src/clp/regex_utils/ErrorCode.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#ifndef CLP_REGEX_UTILS_ERRORCODE_HPP
#define CLP_REGEX_UTILS_ERRORCODE_HPP

#include <cstdint>
#include <system_error>
#include <type_traits>

namespace clp::regex_utils {

/**
* Enum class for propagating and handling various regex utility errors.
* More detailed descriptions can be found in ErrorCode.cpp.
*/
enum class ErrorCode : uint8_t {
Success = 0,
IllegalState,
Star,
Plus,
Question,
Pipe,
Caret,
Dollar,
DisallowedEscapeSequence,
UnmatchedParenthesis,
UnsupportedCharsets,
IncompleteCharsetStructure,
UnsupportedQuantifier,
TokenUnquantifiable,
};

/**
* Wrapper function to turn a regular enum class into an std::error_code.
*
* @param An error code enum.
* @return The corresponding std::error_code type variable.
*/
[[nodiscard]] auto make_error_code(ErrorCode ec) -> std::error_code;

} // namespace clp::regex_utils

namespace std {
template <>
struct is_error_code_enum<clp::regex_utils::ErrorCode> : true_type {};
} // namespace std

#endif // CLP_REGEX_UTILS_ERRORCODE_HPP
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#ifndef CLP_REGEX_UTILS_REGEXTOWILDCARDTRANSLATORCONFIG_HPP
#define CLP_REGEX_UTILS_REGEXTOWILDCARDTRANSLATORCONFIG_HPP

namespace clp::regex_utils {

class RegexToWildcardTranslatorConfig {
public:
// Constructors
RegexToWildcardTranslatorConfig() = default;

// Getters
[[nodiscard]] auto case_insensitive_wildcard() const -> bool {
return m_case_insensitive_wildcard;
}

[[nodiscard]] auto allow_anchors() const -> bool { return m_allow_anchors; }

[[nodiscard]] auto add_prefix_suffix_wildcards() const -> bool {
return m_add_prefix_suffix_wildcards;
}

// Setters
void set_case_insensitive_wildcard(bool case_insensitive_wildcard) {
m_case_insensitive_wildcard = case_insensitive_wildcard;
}

void set_allow_anchors(bool allow_anchors) { m_allow_anchors = allow_anchors; }

void set_add_prefix_suffix_wildcards(bool add_prefix_suffix_wildcards) {
m_add_prefix_suffix_wildcards = add_prefix_suffix_wildcards;
}

private:
// Variables
bool m_case_insensitive_wildcard = false;
bool m_allow_anchors = true;
bool m_add_prefix_suffix_wildcards = false;
};

} // namespace clp::regex_utils

#endif // CLP_REGEX_UTILS_REGEXTOWILDCARDTRANSLATORCONFIG_HPP
48 changes: 48 additions & 0 deletions components/core/src/clp/regex_utils/constants.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#ifndef CLP_REGEX_UTILS_CONSTANTS_HPP
#define CLP_REGEX_UTILS_CONSTANTS_HPP

#include <array>
#include <cstddef>
#include <string_view>

namespace clp::regex_utils {

constexpr size_t cCharBitarraySize = 128;

/**
* Create an ASCII character lookup table (bit array) at compile time.
*
* @param char_str A string that contains the characters to look up.
* @return The lookup table as bit array
*/
[[nodiscard]] constexpr auto create_char_bit_array(std::string_view char_str
) -> std::array<bool, cCharBitarraySize> {
std::array<bool, cCharBitarraySize> bit_array{};
bit_array.fill(false);
for (char const ch : char_str) {
bit_array.at(ch) = true;
}
return bit_array;
}

constexpr char cZeroOrMoreCharsWildcard{'*'};
constexpr char cSingleCharWildcard{'?'};
constexpr char cRegexZeroOrMore{'*'};
constexpr char cRegexOneOrMore{'+'};
constexpr char cRegexZeroOrOne{'+'};
constexpr char cRegexStartAnchor{'^'};
constexpr char cRegexEndAnchor{'$'};
constexpr char cEscapeChar{'\\'};
constexpr char cCharsetNegate{'^'};

// This is a more complete set of meta characters than necessary, as the user might not be fully
// knowledgeable on which meta characters to escape, and may introduce unnecessary escape sequences.
constexpr auto cRegexEscapeSeqAcceptedMetaChars = create_char_bit_array("^$.*{}[]()+|?<>-_/=!\\");
// This is the set of meta characters that need escaping in the wildcard syntax.
constexpr auto cRegexEscapeSeqWildcardOnlyMetaChars = create_char_bit_array("?*\\");
// This is the set of meta characters that need escaping in the character set.
constexpr auto cRegexCharsetEscapeSeqMetaChars = create_char_bit_array("^-]\\");

} // namespace clp::regex_utils

#endif // CLP_REGEX_UTILS_CONSTANTS_HPP
Loading

0 comments on commit b24e3a1

Please sign in to comment.