diff --git a/config.m4 b/config.m4 index 4933221..e1c2d0c 100644 --- a/config.m4 +++ b/config.m4 @@ -161,6 +161,7 @@ if test "$PHP_ECMA_INTL" != "no"; then tests/criterion/ecma402/locale_min_max_test.c \ tests/criterion/ecma402/locale_options_test.c \ tests/criterion/ecma402/locale_test.c \ + tests/criterion/ecma402/locale_validation_test.cpp \ tests/criterion/ecma402/numbering_system_test.c \ tests/criterion/ecma402/time_zone_test.c \ tests/criterion/ecma402/unit_test.c \ diff --git a/src/ecma402/locale.cpp b/src/ecma402/locale.cpp index 3abf234..267d3fe 100644 --- a/src/ecma402/locale.cpp +++ b/src/ecma402/locale.cpp @@ -608,6 +608,34 @@ int ecma402_minimize(const char *localeId, char *minimized, ecma402_errorStatus return getMaxOrMin(MINIMIZE, localeId, minimized, status, isCanonicalized); } +int ecma402_validateAndCanonicalizeUnicodeLocaleId(const char *localeId, char *canonicalized, + ecma402_errorStatus *status) +{ + char **available, *bestAvailable, *tmp; + size_t total, length, resultLength = -1; + + available = (char **)malloc(sizeof(char *) * uloc_countAvailable()); + bestAvailable = (char *)malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY); + total = ecma402_intlAvailableLocales(available); + + if (ecma402_bestAvailableLocale(available, total, localeId, bestAvailable, false) > 0) { + tmp = (char *)malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY); + length = languageTagForLocaleId(localeId, tmp, status); + + if (!ecma402_hasError(status) && length > 0) { + strcpy(canonicalized, tmp); + resultLength = length; + } + + free(tmp); + } + + free(bestAvailable); + free(available); + + return resultLength; +} + namespace { int getHourCyclesForLocale(char *localeId, const char **values) diff --git a/src/ecma402/locale.h b/src/ecma402/locale.h index 7d3897c..5168aba 100644 --- a/src/ecma402/locale.h +++ b/src/ecma402/locale.h @@ -498,6 +498,27 @@ int ecma402_maximize(const char *localeId, char *maximized, ecma402_errorStatus */ int ecma402_minimize(const char *localeId, char *minimized, ecma402_errorStatus *status, bool isCanonicalized); +/** + * Returns the Unicode canonicalized locale identifier form of the locale ID + * after validating the locale is supported by this implementation (using + * ecma402_bestAvailableLocale()). + * + * The result parameter should already be allocated on the stack with + * enough memory to store the buffer. Typically, this should use + * ULOC_FULLNAME_CAPACITY. For example: + * + * malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY) + * + * @param localeId The locale identifier to canonicalize. + * @param canonicalized A buffer in which to store the canonicalized name. + * @param status A status object to pass error messages back to the caller. + * + * @return The length of the string stored to the canonicalized buffer, or -1 if + * the localeId cannot be validated or canonicalized. + */ +int ecma402_validateAndCanonicalizeUnicodeLocaleId(const char *localeId, char *canonicalized, + ecma402_errorStatus *status); + #ifdef __cplusplus } #endif diff --git a/tests/criterion/ecma402/locale_validation_test.cpp b/tests/criterion/ecma402/locale_validation_test.cpp new file mode 100644 index 0000000..004a2a4 --- /dev/null +++ b/tests/criterion/ecma402/locale_validation_test.cpp @@ -0,0 +1,63 @@ +#include "../test.h" + +#include "src/ecma402/locale.h" + +#include + +#define TEST_SUITE ecma402LocaleValidation + +// NOLINTBEGIN(cert-err58-cpp, misc-const-correctness, +// misc-use-anonymous-namespace) + +using string = std::basic_string, criterion::allocator>; + +struct localeIdValidationTest { + string localeId; + string expected; + + localeIdValidationTest(string localeId, string expected) : localeId(localeId), expected(expected) {} +}; + +ParameterizedTestParameters(TEST_SUITE, validateAndCanonicalizeUnicodeLocaleId) +{ + static criterion::parameters tests; + + tests.emplace_back("en-US", "en-US"); + tests.emplace_back("en_US", "en-US"); + tests.emplace_back("en-Latn-US", "en-Latn-US"); + tests.emplace_back("en-US-u-nu-latn-ca-gregory", "en-US-u-ca-gregory-nu-latn"); + tests.emplace_back("foobar", "-1"); + tests.emplace_back("zz-ZZ", "-1"); + tests.emplace_back("und-u-va-posix", "-1"); + tests.emplace_back("en-US-POSIX", "en-US-u-va-posix"); + tests.emplace_back("en_US_POSIX", "en-US-u-va-posix"); + + return tests; +} + +ParameterizedTest(struct localeIdValidationTest *test, TEST_SUITE, validateAndCanonicalizeUnicodeLocaleId) +{ + char *result; + size_t resultLength; + ecma402_errorStatus *status; + + status = ecma402_initErrorStatus(); + result = (char *)malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY); + resultLength = ecma402_validateAndCanonicalizeUnicodeLocaleId(test->localeId.c_str(), result, status); + + cr_assert(eq(i8, ecma402_hasError(status), 0)); + + if (test->expected == "-1") { + cr_expect(eq(i8, resultLength, -1)); + } else { + cr_expect(eq(str, result, test->expected.c_str()), + "Expected canonicalized value of \"%s\" for language tag \"%s\"; got \"%s\" instead", + test->expected.c_str(), test->localeId.c_str(), result); + cr_expect(eq(i8, resultLength, test->expected.length())); + } + + free(result); +} + +// NOLINTEND(cert-err58-cpp, misc-const-correctness, +// misc-use-anonymous-namespace)