diff --git a/config.m4 b/config.m4 index 4933221..5dfb69e 100644 --- a/config.m4 +++ b/config.m4 @@ -146,6 +146,7 @@ if test "$PHP_ECMA_INTL" != "no"; then tests/criterion/ecma402/hour_cycle_test.c \ tests/criterion/ecma402/language_tag_test.cpp \ tests/criterion/ecma402/locale_canonicalization_test.c \ + tests/criterion/ecma402/locale_default_validation_test.cpp \ tests/criterion/ecma402/locale_getBaseName_test.c \ tests/criterion/ecma402/locale_getCalendar_test.c \ tests/criterion/ecma402/locale_getCaseFirst_test.c \ diff --git a/package.xml b/package.xml index 4169e8c..6a1eda5 100644 --- a/package.xml +++ b/package.xml @@ -136,6 +136,7 @@ Add Locale::$currency and Locale\Options::$currency properties. ECMA-402 does no + @@ -161,6 +162,12 @@ Add Locale::$currency and Locale\Options::$currency properties. ECMA-402 does no + + + + + + diff --git a/src/ecma402/locale.cpp b/src/ecma402/locale.cpp index 3abf234..55657ed 100644 --- a/src/ecma402/locale.cpp +++ b/src/ecma402/locale.cpp @@ -608,6 +608,36 @@ int ecma402_minimize(const char *localeId, char *minimized, ecma402_errorStatus return getMaxOrMin(MINIMIZE, localeId, minimized, status, isCanonicalized); } +int ecma402_validateAndCanonicalizeForDefaultLocaleId(const char *defaultLocaleId, char *result) +{ + char **available, *bestAvailable, *canonicalized; + size_t total, length, resultLength = -1; + ecma402_errorStatus *status; + + available = (char **)malloc(sizeof(char *) * uloc_countAvailable()); + bestAvailable = (char *)malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY); + total = ecma402_intlAvailableLocales(available); + + if (ecma402_bestAvailableLocale(available, total, defaultLocaleId, bestAvailable, false) > 0) { + status = ecma402_initErrorStatus(); + canonicalized = (char *)malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY); + length = languageTagForLocaleId(defaultLocaleId, canonicalized, status); + + if (!ecma402_hasError(status) && length > 0) { + strcpy(result, canonicalized); + resultLength = length; + } + + free(canonicalized); + ecma402_freeErrorStatus(status); + } + + free(bestAvailable); + free(available); + + return resultLength; +} + namespace { int getHourCyclesForLocale(char *localeId, const char **values) diff --git a/src/ecma402/locale.h b/src/ecma402/locale.h index 7d3897c..721c5d5 100644 --- a/src/ecma402/locale.h +++ b/src/ecma402/locale.h @@ -498,6 +498,25 @@ int ecma402_maximize(const char *localeId, char *maximized, ecma402_errorStatus */ int ecma402_minimize(const char *localeId, char *minimized, ecma402_errorStatus *status, bool isCanonicalized); +/** + * Returns a canonicalized locale ID after validating the locale is supported + * by this implementation (using ecma402_bestAvailableLocale()). + * + * The result parameter should already be allocated on the stack with + * enough memory to store the buffer. Typically, this should use + * ULOC_FULLNAME_CAPACITY. For example: + * + * malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY) + * + * @param defaultLocaleId The locale ID intended for use as the default locale. + * @param result A buffer in which to store the validated and canonicalized + * default locale. + * + * @return The length of the string stored to the result buffer, or -1 if the + * default locale ID could not be validated or canonicalized. + */ +int ecma402_validateAndCanonicalizeForDefaultLocaleId(const char *defaultLocaleId, char *result); + #ifdef __cplusplus } #endif diff --git a/src/php/ecma_intl.c b/src/php/ecma_intl.c index 88538f1..42c7434 100644 --- a/src/php/ecma_intl.c +++ b/src/php/ecma_intl.c @@ -35,55 +35,41 @@ ZEND_DECLARE_MODULE_GLOBALS(ecma_intl) -/** - * Validates whether the locale provided in the ecma_intl.default_locale INI - * setting is supported by this implementation. If so, it stores the canonicalized - * BCP 47 version of the language tag to a global setting. - */ -ZEND_INI_MH(onUpdateLocale) +const char *ecma_defaultLocale(void) { - if (!new_value || (new_value && !ZSTR_VAL(new_value)[0])) { - return FAILURE; - } - - char **p = (char **)ZEND_INI_GET_ADDR(); - char **available, *bestAvailable, *canonicalized; - size_t total, length; - zend_result result = FAILURE; - ecma402_errorStatus *status; - - available = (char **)emalloc(sizeof(char *) * uloc_countAvailable()); - bestAvailable = (char *)emalloc(sizeof(char) * ULOC_FULLNAME_CAPACITY); - total = ecma402_intlAvailableLocales(available); - - if (ecma402_bestAvailableLocale(available, total, ZSTR_VAL(new_value), bestAvailable, false) > 0) { - status = ecma402_initErrorStatus(); - canonicalized = (char *)emalloc(sizeof(char) * ULOC_FULLNAME_CAPACITY); - length = ecma402_canonicalizeUnicodeLocaleId(ZSTR_VAL(new_value), canonicalized, status); - - if (!ecma402_hasError(status) && length > 0) { - strcpy(*p, canonicalized); - result = SUCCESS; + if (ECMA_INTL_G(defaultLocale) == NULL || strcmp(ECMA_INTL_G(defaultLocale), "") == 0) { + char *ini = INI_STR(PHP_ECMA_INI_DEFAULT_LOCALE); + if (ini == NULL || strcmp(ini, "") == 0) { + // Fall back to ICU default, if we don't have a default locale set. + ini = (char *)uloc_getDefault(); } - efree(canonicalized); - ecma402_freeErrorStatus(status); + // Let's check again to make sure ICU gave us a value. + if (ini == NULL || strcmp(ini, "") == 0) { + // If all else fails, use "en" as the default locale. This isn't + // perfect, but it ensures we have at least something. + strcpy(ECMA_INTL_G(defaultLocale), "en"); + } else { + char *defaultLocaleId = (char *)emalloc(sizeof(char) * ULOC_FULLNAME_CAPACITY); + if (ecma402_validateAndCanonicalizeForDefaultLocaleId(ini, defaultLocaleId) > 0) { + strcpy(ECMA_INTL_G(defaultLocale), defaultLocaleId); + } + efree(defaultLocaleId); + } } - efree(bestAvailable); - efree(available); - - return result; + return ECMA_INTL_G(defaultLocale); } +static PHP_GINIT_FUNCTION(ecma_intl); +static PHP_GSHUTDOWN_FUNCTION(ecma_intl); +static ZEND_INI_MH(onUpdateLocale); + PHP_INI_BEGIN() -STD_PHP_INI_ENTRY("ecma_intl.default_locale", NULL, PHP_INI_ALL, onUpdateLocale, defaultLocale, zend_ecma_intl_globals, +STD_PHP_INI_ENTRY(PHP_ECMA_INI_DEFAULT_LOCALE, NULL, PHP_INI_ALL, onUpdateLocale, defaultLocale, zend_ecma_intl_globals, ecma_intl_globals) PHP_INI_END() -static PHP_GINIT_FUNCTION(ecma_intl); -static PHP_GSHUTDOWN_FUNCTION(ecma_intl); - zend_module_entry ecma_intl_module_entry = {STANDARD_MODULE_HEADER, "ecma_intl", NULL, @@ -106,17 +92,6 @@ ZEND_TSRMLS_CACHE_DEFINE() ZEND_GET_MODULE(ecma_intl) #endif -static PHP_GINIT_FUNCTION(ecma_intl) -{ - ZEND_SECURE_ZERO(ecma_intl_globals, sizeof(zend_ecma_intl_globals)); - ecma_intl_globals->defaultLocale = (char *)emalloc(sizeof(char) * ULOC_FULLNAME_CAPACITY); -} - -static PHP_GSHUTDOWN_FUNCTION(ecma_intl) -{ - efree(ecma_intl_globals->defaultLocale); -} - PHP_MINIT_FUNCTION(ecma_intl_all) { REGISTER_INI_ENTRIES(); @@ -172,3 +147,35 @@ PHP_MINFO_FUNCTION(ecma_intl) DISPLAY_INI_ENTRIES(); } + +static PHP_GINIT_FUNCTION(ecma_intl) +{ + ZEND_SECURE_ZERO(ecma_intl_globals, sizeof(zend_ecma_intl_globals)); + ecma_intl_globals->defaultLocale = (char *)emalloc(sizeof(char) * ULOC_FULLNAME_CAPACITY); +} + +static PHP_GSHUTDOWN_FUNCTION(ecma_intl) +{ + efree(ecma_intl_globals->defaultLocale); +} + +static ZEND_INI_MH(onUpdateLocale) +{ + zend_result result = FAILURE; + + if (!new_value || (new_value && !ZSTR_VAL(new_value)[0])) { + return result; + } + + char **p = (char **)ZEND_INI_GET_ADDR(); + char *defaultLocaleId = (char *)emalloc(sizeof(char) * ULOC_FULLNAME_CAPACITY); + + if (ecma402_validateAndCanonicalizeForDefaultLocaleId(ZSTR_VAL(new_value), defaultLocaleId) > 0) { + strcpy(*p, defaultLocaleId); + result = SUCCESS; + } + + efree(defaultLocaleId); + + return result; +} diff --git a/src/php/ecma_intl.h b/src/php/ecma_intl.h index 0c91435..4779290 100644 --- a/src/php/ecma_intl.h +++ b/src/php/ecma_intl.h @@ -19,6 +19,7 @@ extern zend_module_entry ecma_intl_module_entry; #define phpext_ecma_intl_ptr &ecma_intl_module_entry #define PHP_ECMA_INTL_VERSION "0.3.0-dev" +#define PHP_ECMA_INI_DEFAULT_LOCALE "ecma_intl.default_locale" ZEND_BEGIN_MODULE_GLOBALS(ecma_intl) char *defaultLocale; diff --git a/tests/criterion/ecma402/locale_default_validation_test.cpp b/tests/criterion/ecma402/locale_default_validation_test.cpp new file mode 100644 index 0000000..dcfec5a --- /dev/null +++ b/tests/criterion/ecma402/locale_default_validation_test.cpp @@ -0,0 +1,62 @@ +#include "../test.h" + +#include "src/ecma402/locale.h" + +#include + +#define TEST_SUITE ecma402LocaleDefaultValidation + +// NOLINTBEGIN(cert-err58-cpp, misc-const-correctness, +// misc-use-anonymous-namespace) + +using string = std::basic_string, criterion::allocator>; + +struct defaultLocaleIdTest { + string defaultLocaleId; + string expected; + size_t expectedLength; + + defaultLocaleIdTest(string defaultLocaleId, string expected, size_t expectedLength) + : defaultLocaleId(defaultLocaleId), expected(expected), expectedLength(expectedLength) + {} +}; + +ParameterizedTestParameters(TEST_SUITE, validateAndCanonicalizeForDefaultLocaleId) +{ + static criterion::parameters tests; + + tests.emplace_back("en-US", "en-US", 5); + tests.emplace_back("en_US", "en-US", 5); + tests.emplace_back("en-Latn-US", "en-Latn-US", 10); + tests.emplace_back("en-US-u-nu-latn-ca-gregory", "en-US-u-ca-gregory-nu-latn", 26); + tests.emplace_back("foobar", "", -1); + tests.emplace_back("zz-ZZ", "", -1); + tests.emplace_back("und-u-va-posix", "", -1); + tests.emplace_back("en-US-POSIX", "en-US-u-va-posix", 16); + tests.emplace_back("en_US_POSIX", "en-US-u-va-posix", 16); + + return tests; +} + +ParameterizedTest(struct defaultLocaleIdTest *test, TEST_SUITE, validateAndCanonicalizeForDefaultLocaleId) +{ + char *result; + size_t resultLength; + + result = (char *)malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY); + + resultLength = ecma402_validateAndCanonicalizeForDefaultLocaleId(test->defaultLocaleId.c_str(), result); + + cr_expect(eq(i8, resultLength, test->expectedLength)); + + if (test->expectedLength > 0) { + cr_expect(eq(str, result, test->expected.c_str())); + } else { + cr_expect(eq(ptr, result, nullptr)); + } + + free(result); +} + +// NOLINTEND(cert-err58-cpp, misc-const-correctness, +// misc-use-anonymous-namespace) diff --git a/tests/phpt/ini-default_locale-005.phpt b/tests/phpt/ini-default_locale-005.phpt index 5e42935..b8b5dde 100644 --- a/tests/phpt/ini-default_locale-005.phpt +++ b/tests/phpt/ini-default_locale-005.phpt @@ -1,5 +1,5 @@ --TEST-- -ecma_intl.default_locale cannot recognize underscores in locale IDs +ecma_intl.default_locale can recognize underscores in locale IDs --EXTENSIONS-- ecma_intl --INI-- @@ -11,4 +11,4 @@ declare(strict_types=1); var_dump(ini_get('ecma_intl.default_locale')); --EXPECT-- -string(0) "" +string(11) "en_US_POSIX" diff --git a/tests/phpt/ini-default_locale-006.phpt b/tests/phpt/ini-default_locale-006.phpt index 6c7848a..c0638dd 100644 --- a/tests/phpt/ini-default_locale-006.phpt +++ b/tests/phpt/ini-default_locale-006.phpt @@ -9,16 +9,20 @@ ecma_intl.default_locale= declare(strict_types=1); var_dump(ini_set('ecma_intl.default_locale', 'en-US')); +var_dump(ini_set('ecma_intl.default_locale', 'en_US')); var_dump(ini_set('ecma_intl.default_locale', 'foobar')); var_dump(ini_set('ecma_intl.default_locale', 'en-Latn-US')); var_dump(ini_set('ecma_intl.default_locale', 'en-US-POSIX')); +var_dump(ini_set('ecma_intl.default_locale', 'en_US_POSIX')); var_dump(ini_set('ecma_intl.default_locale', 'de')); var_dump(ini_get('ecma_intl.default_locale')); --EXPECT-- string(0) "" -bool(false) string(5) "en-US" +bool(false) +string(5) "en_US" string(10) "en-Latn-US" string(11) "en-US-POSIX" +string(11) "en_US_POSIX" string(2) "de"