Skip to content

Commit

Permalink
feat: add library method to validate and canonicalize locale ID
Browse files Browse the repository at this point in the history
  • Loading branch information
ramsey committed Oct 12, 2023
1 parent 1670424 commit f79edc5
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 0 deletions.
1 change: 1 addition & 0 deletions config.m4
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ if test "$PHP_ECMA_INTL" != "no"; then
tests/criterion/ecma402/locale_min_max_test.c \
tests/criterion/ecma402/locale_options_test.c \
tests/criterion/ecma402/locale_test.c \
tests/criterion/ecma402/locale_validation_test.cpp \
tests/criterion/ecma402/numbering_system_test.c \
tests/criterion/ecma402/time_zone_test.c \
tests/criterion/ecma402/unit_test.c \
Expand Down
28 changes: 28 additions & 0 deletions src/ecma402/locale.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,34 @@ int ecma402_minimize(const char *localeId, char *minimized, ecma402_errorStatus
return getMaxOrMin(MINIMIZE, localeId, minimized, status, isCanonicalized);
}

int ecma402_validateAndCanonicalizeUnicodeLocaleId(const char *localeId, char *canonicalized,
ecma402_errorStatus *status)
{
char **available, *bestAvailable, *tmp;
size_t total, length, resultLength = -1;

available = (char **)malloc(sizeof(char *) * uloc_countAvailable());
bestAvailable = (char *)malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
total = ecma402_intlAvailableLocales(available);

if (ecma402_bestAvailableLocale(available, total, localeId, bestAvailable, false) > 0) {
tmp = (char *)malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
length = languageTagForLocaleId(localeId, tmp, status);

if (!ecma402_hasError(status) && length > 0) {
strcpy(canonicalized, tmp);
resultLength = length;
}

free(tmp);
}

free(bestAvailable);
free(available);

return resultLength;
}

namespace {

int getHourCyclesForLocale(char *localeId, const char **values)
Expand Down
21 changes: 21 additions & 0 deletions src/ecma402/locale.h
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,27 @@ int ecma402_maximize(const char *localeId, char *maximized, ecma402_errorStatus
*/
int ecma402_minimize(const char *localeId, char *minimized, ecma402_errorStatus *status, bool isCanonicalized);

/**
* Returns the Unicode canonicalized locale identifier form of the locale ID
* after validating the locale is supported by this implementation (using
* ecma402_bestAvailableLocale()).
*
* The result parameter should already be allocated on the stack with
* enough memory to store the buffer. Typically, this should use
* ULOC_FULLNAME_CAPACITY. For example:
*
* malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY)
*
* @param localeId The locale identifier to canonicalize.
* @param canonicalized A buffer in which to store the canonicalized name.
* @param status A status object to pass error messages back to the caller.
*
* @return The length of the string stored to the canonicalized buffer, or -1 if
* the localeId cannot be validated or canonicalized.
*/
int ecma402_validateAndCanonicalizeUnicodeLocaleId(const char *localeId, char *canonicalized,
ecma402_errorStatus *status);

#ifdef __cplusplus
}
#endif
Expand Down
63 changes: 63 additions & 0 deletions tests/criterion/ecma402/locale_validation_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#include "../test.h"

#include "src/ecma402/locale.h"

#include <unicode/uloc.h>

#define TEST_SUITE ecma402LocaleValidation

// NOLINTBEGIN(cert-err58-cpp, misc-const-correctness,
// misc-use-anonymous-namespace)

using string = std::basic_string<char, std::char_traits<char>, criterion::allocator<char>>;

struct localeIdValidationTest {
string localeId;
string expected;

localeIdValidationTest(string localeId, string expected) : localeId(localeId), expected(expected) {}
};

ParameterizedTestParameters(TEST_SUITE, validateAndCanonicalizeUnicodeLocaleId)
{
static criterion::parameters<struct localeIdValidationTest> tests;

tests.emplace_back("en-US", "en-US");
tests.emplace_back("en_US", "en-US");
tests.emplace_back("en-Latn-US", "en-Latn-US");
tests.emplace_back("en-US-u-nu-latn-ca-gregory", "en-US-u-ca-gregory-nu-latn");
tests.emplace_back("foobar", "-1");
tests.emplace_back("zz-ZZ", "-1");
tests.emplace_back("und-u-va-posix", "-1");
tests.emplace_back("en-US-POSIX", "en-US-u-va-posix");
tests.emplace_back("en_US_POSIX", "en-US-u-va-posix");

return tests;
}

ParameterizedTest(struct localeIdValidationTest *test, TEST_SUITE, validateAndCanonicalizeUnicodeLocaleId)
{
char *result;
size_t resultLength;
ecma402_errorStatus *status;

status = ecma402_initErrorStatus();
result = (char *)malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
resultLength = ecma402_validateAndCanonicalizeUnicodeLocaleId(test->localeId.c_str(), result, status);

cr_assert(eq(i8, ecma402_hasError(status), 0));

if (test->expected == "-1") {
cr_expect(eq(i8, resultLength, -1));
} else {
cr_expect(eq(str, result, test->expected.c_str()),
"Expected canonicalized value of \"%s\" for language tag \"%s\"; got \"%s\" instead",
test->expected.c_str(), test->localeId.c_str(), result);
cr_expect(eq(i8, resultLength, test->expected.length()));
}

free(result);
}

// NOLINTEND(cert-err58-cpp, misc-const-correctness,
// misc-use-anonymous-namespace)

0 comments on commit f79edc5

Please sign in to comment.