Skip to content

Commit

Permalink
[19_2] Move cjk predicates to lolly::data
Browse files Browse the repository at this point in the history
  • Loading branch information
da-liii authored Dec 11, 2023
1 parent ab36467 commit 166e2ab
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 68 deletions.
38 changes: 0 additions & 38 deletions Kernel/Types/analyze.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,44 +86,6 @@ is_numeric (string s) {
return true;
}

bool
is_cjk_unified_ideographs (string s) {
int n= N (s);
for (int i= 0; i < n; i++)
if (s[i] == '<' && i + 1 < n && s[i + 1] == '#') {
int start= i + 2;
i = i + 2;
while (i < n && s[i] != '>')
i++;
string r= s (start, i);
if ("4E00" <= r && r <= "9FBF") continue;
else return false;
}
else {
return false;
}
return true;
}

bool
has_cjk_unified_ideographs (string s) {
int n= N (s);
for (int i= 0; i < n; i++)
if (s[i] == '<' && i + 1 < n && s[i + 1] == '#') {
int start= i + 2;
i = i + 2;
while (i < n && s[i] != '>')
i++;
string r= s (start, i);
if ("4E00" <= r && r <= "9FBF") return true;
else continue;
}
else {
continue;
}
return false;
}

/******************************************************************************
* Changing cases
******************************************************************************/
Expand Down
22 changes: 0 additions & 22 deletions Kernel/Types/analyze.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,28 +125,6 @@ bool is_iso_alpha (string s);
*/
bool is_numeric (string s);

/**
* @brief Checks if a string contains only CJK Unified Ideographs.
*
* @param s The string to check.
* @return True if all characters in the string are CJK Unified Ideographs,
* otherwise false.
* @note This function expects the CJK Unified Ideographs to be in a specific
* encoded format.
*/
bool is_cjk_unified_ideographs (string s);

/**
* @brief Checks if a string contains any CJK Unified Ideographs.
*
* @param s The string to check.
* @return True if the string contains at least one CJK Unified Ideograph,
* otherwise false.
* @note This function expects the CJK Unified Ideographs to be in a specific
* encoded format.
*/
bool has_cjk_unified_ideographs (string s);

/**
* @brief Converts a lowercase character to uppercase.
*
Expand Down
39 changes: 39 additions & 0 deletions lolly/data/unicode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,44 @@ unicode_get_range (int code) {
else if (code >= 0x1d400 && code <= 0x1d7ff) return "mathletters";
else return "";
}

bool
is_cjk_unified_ideographs (string s) {
int n= N (s);
for (int i= 0; i < n; i++)
if (s[i] == '<' && i + 1 < n && s[i + 1] == '#') {
int start= i + 2;
i = i + 2;
while (i < n && s[i] != '>')
i++;
string r= s (start, i);
if ("4E00" <= r && r <= "9FBF") continue;
else return false;
}
else {
return false;
}
return true;
}

bool
has_cjk_unified_ideographs (string s) {
int n= N (s);
for (int i= 0; i < n; i++)
if (s[i] == '<' && i + 1 < n && s[i + 1] == '#') {
int start= i + 2;
i = i + 2;
while (i < n && s[i] != '>')
i++;
string r= s (start, i);
if ("4E00" <= r && r <= "9FBF") return true;
else continue;
}
else {
continue;
}
return false;
}

} // namespace data
} // namespace lolly
24 changes: 23 additions & 1 deletion lolly/data/unicode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,27 @@
namespace lolly {
namespace data {
string unicode_get_range (int code);
}

/**
* @brief Checks if a string contains only CJK Unified Ideographs.
*
* @param s The string to check.
* @return True if all characters in the string are CJK Unified Ideographs,
* otherwise false.
* @note This function expects the CJK Unified Ideographs to be in a specific
* encoded format.
*/
bool is_cjk_unified_ideographs (string s);

/**
* @brief Checks if a string contains any CJK Unified Ideographs.
*
* @param s The string to check.
* @return True if the string contains at least one CJK Unified Ideograph,
* otherwise false.
* @note This function expects the CJK Unified Ideographs to be in a specific
* encoded format.
*/
bool has_cjk_unified_ideographs (string s);
} // namespace data
} // namespace lolly
7 changes: 0 additions & 7 deletions tests/Kernel/Types/analyze_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,6 @@ TEST_CASE ("as_hex") {
}
}

TEST_CASE ("cjk_unified_ideographs") {
CHECK (is_cjk_unified_ideographs ("<#4E2D>"));
CHECK (has_cjk_unified_ideographs ("<#4E2D>"));
CHECK (has_cjk_unified_ideographs ("bib-<#4E2D>"));
CHECK (!is_cjk_unified_ideographs ("bib-<#4E2D>"));
}

TEST_CASE ("test locase all") {
CHECK_EQ (locase_all (string ("true")) == string ("true"), true);
CHECK_EQ (locase_all (string ("TRue")) == string ("true"), true);
Expand Down
9 changes: 9 additions & 0 deletions tests/lolly/data/unicode_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,19 @@
#include "a_tbox_main.cpp"
#include "lolly/data/unicode.hpp"

using lolly::data::has_cjk_unified_ideographs;
using lolly::data::is_cjk_unified_ideographs;
using lolly::data::unicode_get_range;

TEST_CASE ("unicode_get_range") {
string_eq (unicode_get_range ((int) 'a'), "ascii");
string_eq (unicode_get_range (0x2460), "enclosed_alphanumerics"); //
string_eq (unicode_get_range (0x24ff), "enclosed_alphanumerics"); //
}

TEST_CASE ("cjk_unified_ideographs") {
CHECK (is_cjk_unified_ideographs ("<#4E2D>"));
CHECK (has_cjk_unified_ideographs ("<#4E2D>"));
CHECK (has_cjk_unified_ideographs ("bib-<#4E2D>"));
CHECK (!is_cjk_unified_ideographs ("bib-<#4E2D>"));
}

0 comments on commit 166e2ab

Please sign in to comment.