diff --git a/config.m4 b/config.m4
index 4933221..5dfb69e 100644
--- a/config.m4
+++ b/config.m4
@@ -146,6 +146,7 @@ if test "$PHP_ECMA_INTL" != "no"; then
tests/criterion/ecma402/hour_cycle_test.c \
tests/criterion/ecma402/language_tag_test.cpp \
tests/criterion/ecma402/locale_canonicalization_test.c \
+ tests/criterion/ecma402/locale_default_validation_test.cpp \
tests/criterion/ecma402/locale_getBaseName_test.c \
tests/criterion/ecma402/locale_getCalendar_test.c \
tests/criterion/ecma402/locale_getCaseFirst_test.c \
diff --git a/package.xml b/package.xml
index 4169e8c..6a1eda5 100644
--- a/package.xml
+++ b/package.xml
@@ -136,6 +136,7 @@ Add Locale::$currency and Locale\Options::$currency properties. ECMA-402 does no
+
@@ -161,6 +162,12 @@ Add Locale::$currency and Locale\Options::$currency properties. ECMA-402 does no
+
+
+
+
+
+
diff --git a/src/ecma402/locale.cpp b/src/ecma402/locale.cpp
index 3abf234..55657ed 100644
--- a/src/ecma402/locale.cpp
+++ b/src/ecma402/locale.cpp
@@ -608,6 +608,36 @@ int ecma402_minimize(const char *localeId, char *minimized, ecma402_errorStatus
return getMaxOrMin(MINIMIZE, localeId, minimized, status, isCanonicalized);
}
+int ecma402_validateAndCanonicalizeForDefaultLocaleId(const char *defaultLocaleId, char *result)
+{
+ char **available, *bestAvailable, *canonicalized;
+ size_t total, length, resultLength = -1;
+ ecma402_errorStatus *status;
+
+ available = (char **)malloc(sizeof(char *) * uloc_countAvailable());
+ bestAvailable = (char *)malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
+ total = ecma402_intlAvailableLocales(available);
+
+ if (ecma402_bestAvailableLocale(available, total, defaultLocaleId, bestAvailable, false) > 0) {
+ status = ecma402_initErrorStatus();
+ canonicalized = (char *)malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
+ length = languageTagForLocaleId(defaultLocaleId, canonicalized, status);
+
+ if (!ecma402_hasError(status) && length > 0) {
+ strcpy(result, canonicalized);
+ resultLength = length;
+ }
+
+ free(canonicalized);
+ ecma402_freeErrorStatus(status);
+ }
+
+ free(bestAvailable);
+ free(available);
+
+ return resultLength;
+}
+
namespace {
int getHourCyclesForLocale(char *localeId, const char **values)
diff --git a/src/ecma402/locale.h b/src/ecma402/locale.h
index 7d3897c..721c5d5 100644
--- a/src/ecma402/locale.h
+++ b/src/ecma402/locale.h
@@ -498,6 +498,25 @@ int ecma402_maximize(const char *localeId, char *maximized, ecma402_errorStatus
*/
int ecma402_minimize(const char *localeId, char *minimized, ecma402_errorStatus *status, bool isCanonicalized);
+/**
+ * Returns a canonicalized locale ID after validating the locale is supported
+ * by this implementation (using ecma402_bestAvailableLocale()).
+ *
+ * The result parameter should already be allocated on the stack with
+ * enough memory to store the buffer. Typically, this should use
+ * ULOC_FULLNAME_CAPACITY. For example:
+ *
+ * malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY)
+ *
+ * @param defaultLocaleId The locale ID intended for use as the default locale.
+ * @param result A buffer in which to store the validated and canonicalized
+ * default locale.
+ *
+ * @return The length of the string stored to the result buffer, or -1 if the
+ * default locale ID could not be validated or canonicalized.
+ */
+int ecma402_validateAndCanonicalizeForDefaultLocaleId(const char *defaultLocaleId, char *result);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/php/ecma_intl.c b/src/php/ecma_intl.c
index 88538f1..42c7434 100644
--- a/src/php/ecma_intl.c
+++ b/src/php/ecma_intl.c
@@ -35,55 +35,41 @@
ZEND_DECLARE_MODULE_GLOBALS(ecma_intl)
-/**
- * Validates whether the locale provided in the ecma_intl.default_locale INI
- * setting is supported by this implementation. If so, it stores the canonicalized
- * BCP 47 version of the language tag to a global setting.
- */
-ZEND_INI_MH(onUpdateLocale)
+const char *ecma_defaultLocale(void)
{
- if (!new_value || (new_value && !ZSTR_VAL(new_value)[0])) {
- return FAILURE;
- }
-
- char **p = (char **)ZEND_INI_GET_ADDR();
- char **available, *bestAvailable, *canonicalized;
- size_t total, length;
- zend_result result = FAILURE;
- ecma402_errorStatus *status;
-
- available = (char **)emalloc(sizeof(char *) * uloc_countAvailable());
- bestAvailable = (char *)emalloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
- total = ecma402_intlAvailableLocales(available);
-
- if (ecma402_bestAvailableLocale(available, total, ZSTR_VAL(new_value), bestAvailable, false) > 0) {
- status = ecma402_initErrorStatus();
- canonicalized = (char *)emalloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
- length = ecma402_canonicalizeUnicodeLocaleId(ZSTR_VAL(new_value), canonicalized, status);
-
- if (!ecma402_hasError(status) && length > 0) {
- strcpy(*p, canonicalized);
- result = SUCCESS;
+ if (ECMA_INTL_G(defaultLocale) == NULL || strcmp(ECMA_INTL_G(defaultLocale), "") == 0) {
+ char *ini = INI_STR(PHP_ECMA_INI_DEFAULT_LOCALE);
+ if (ini == NULL || strcmp(ini, "") == 0) {
+ // Fall back to ICU default, if we don't have a default locale set.
+ ini = (char *)uloc_getDefault();
}
- efree(canonicalized);
- ecma402_freeErrorStatus(status);
+ // Let's check again to make sure ICU gave us a value.
+ if (ini == NULL || strcmp(ini, "") == 0) {
+ // If all else fails, use "en" as the default locale. This isn't
+ // perfect, but it ensures we have at least something.
+ strcpy(ECMA_INTL_G(defaultLocale), "en");
+ } else {
+ char *defaultLocaleId = (char *)emalloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
+ if (ecma402_validateAndCanonicalizeForDefaultLocaleId(ini, defaultLocaleId) > 0) {
+ strcpy(ECMA_INTL_G(defaultLocale), defaultLocaleId);
+ }
+ efree(defaultLocaleId);
+ }
}
- efree(bestAvailable);
- efree(available);
-
- return result;
+ return ECMA_INTL_G(defaultLocale);
}
+static PHP_GINIT_FUNCTION(ecma_intl);
+static PHP_GSHUTDOWN_FUNCTION(ecma_intl);
+static ZEND_INI_MH(onUpdateLocale);
+
PHP_INI_BEGIN()
-STD_PHP_INI_ENTRY("ecma_intl.default_locale", NULL, PHP_INI_ALL, onUpdateLocale, defaultLocale, zend_ecma_intl_globals,
+STD_PHP_INI_ENTRY(PHP_ECMA_INI_DEFAULT_LOCALE, NULL, PHP_INI_ALL, onUpdateLocale, defaultLocale, zend_ecma_intl_globals,
ecma_intl_globals)
PHP_INI_END()
-static PHP_GINIT_FUNCTION(ecma_intl);
-static PHP_GSHUTDOWN_FUNCTION(ecma_intl);
-
zend_module_entry ecma_intl_module_entry = {STANDARD_MODULE_HEADER,
"ecma_intl",
NULL,
@@ -106,17 +92,6 @@ ZEND_TSRMLS_CACHE_DEFINE()
ZEND_GET_MODULE(ecma_intl)
#endif
-static PHP_GINIT_FUNCTION(ecma_intl)
-{
- ZEND_SECURE_ZERO(ecma_intl_globals, sizeof(zend_ecma_intl_globals));
- ecma_intl_globals->defaultLocale = (char *)emalloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
-}
-
-static PHP_GSHUTDOWN_FUNCTION(ecma_intl)
-{
- efree(ecma_intl_globals->defaultLocale);
-}
-
PHP_MINIT_FUNCTION(ecma_intl_all)
{
REGISTER_INI_ENTRIES();
@@ -172,3 +147,35 @@ PHP_MINFO_FUNCTION(ecma_intl)
DISPLAY_INI_ENTRIES();
}
+
+static PHP_GINIT_FUNCTION(ecma_intl)
+{
+ ZEND_SECURE_ZERO(ecma_intl_globals, sizeof(zend_ecma_intl_globals));
+ ecma_intl_globals->defaultLocale = (char *)emalloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
+}
+
+static PHP_GSHUTDOWN_FUNCTION(ecma_intl)
+{
+ efree(ecma_intl_globals->defaultLocale);
+}
+
+static ZEND_INI_MH(onUpdateLocale)
+{
+ zend_result result = FAILURE;
+
+ if (!new_value || (new_value && !ZSTR_VAL(new_value)[0])) {
+ return result;
+ }
+
+ char **p = (char **)ZEND_INI_GET_ADDR();
+ char *defaultLocaleId = (char *)emalloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
+
+ if (ecma402_validateAndCanonicalizeForDefaultLocaleId(ZSTR_VAL(new_value), defaultLocaleId) > 0) {
+ strcpy(*p, defaultLocaleId);
+ result = SUCCESS;
+ }
+
+ efree(defaultLocaleId);
+
+ return result;
+}
diff --git a/src/php/ecma_intl.h b/src/php/ecma_intl.h
index 0c91435..4779290 100644
--- a/src/php/ecma_intl.h
+++ b/src/php/ecma_intl.h
@@ -19,6 +19,7 @@ extern zend_module_entry ecma_intl_module_entry;
#define phpext_ecma_intl_ptr &ecma_intl_module_entry
#define PHP_ECMA_INTL_VERSION "0.3.0-dev"
+#define PHP_ECMA_INI_DEFAULT_LOCALE "ecma_intl.default_locale"
ZEND_BEGIN_MODULE_GLOBALS(ecma_intl)
char *defaultLocale;
diff --git a/tests/criterion/ecma402/locale_default_validation_test.cpp b/tests/criterion/ecma402/locale_default_validation_test.cpp
new file mode 100644
index 0000000..dcfec5a
--- /dev/null
+++ b/tests/criterion/ecma402/locale_default_validation_test.cpp
@@ -0,0 +1,62 @@
+#include "../test.h"
+
+#include "src/ecma402/locale.h"
+
+#include
+
+#define TEST_SUITE ecma402LocaleDefaultValidation
+
+// NOLINTBEGIN(cert-err58-cpp, misc-const-correctness,
+// misc-use-anonymous-namespace)
+
+using string = std::basic_string, criterion::allocator>;
+
+struct defaultLocaleIdTest {
+ string defaultLocaleId;
+ string expected;
+ size_t expectedLength;
+
+ defaultLocaleIdTest(string defaultLocaleId, string expected, size_t expectedLength)
+ : defaultLocaleId(defaultLocaleId), expected(expected), expectedLength(expectedLength)
+ {}
+};
+
+ParameterizedTestParameters(TEST_SUITE, validateAndCanonicalizeForDefaultLocaleId)
+{
+ static criterion::parameters tests;
+
+ tests.emplace_back("en-US", "en-US", 5);
+ tests.emplace_back("en_US", "en-US", 5);
+ tests.emplace_back("en-Latn-US", "en-Latn-US", 10);
+ tests.emplace_back("en-US-u-nu-latn-ca-gregory", "en-US-u-ca-gregory-nu-latn", 26);
+ tests.emplace_back("foobar", "", -1);
+ tests.emplace_back("zz-ZZ", "", -1);
+ tests.emplace_back("und-u-va-posix", "", -1);
+ tests.emplace_back("en-US-POSIX", "en-US-u-va-posix", 16);
+ tests.emplace_back("en_US_POSIX", "en-US-u-va-posix", 16);
+
+ return tests;
+}
+
+ParameterizedTest(struct defaultLocaleIdTest *test, TEST_SUITE, validateAndCanonicalizeForDefaultLocaleId)
+{
+ char *result;
+ size_t resultLength;
+
+ result = (char *)malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
+
+ resultLength = ecma402_validateAndCanonicalizeForDefaultLocaleId(test->defaultLocaleId.c_str(), result);
+
+ cr_expect(eq(i8, resultLength, test->expectedLength));
+
+ if (test->expectedLength > 0) {
+ cr_expect(eq(str, result, test->expected.c_str()));
+ } else {
+ cr_expect(eq(ptr, result, nullptr));
+ }
+
+ free(result);
+}
+
+// NOLINTEND(cert-err58-cpp, misc-const-correctness,
+// misc-use-anonymous-namespace)
diff --git a/tests/phpt/ini-default_locale-005.phpt b/tests/phpt/ini-default_locale-005.phpt
index 5e42935..b8b5dde 100644
--- a/tests/phpt/ini-default_locale-005.phpt
+++ b/tests/phpt/ini-default_locale-005.phpt
@@ -1,5 +1,5 @@
--TEST--
-ecma_intl.default_locale cannot recognize underscores in locale IDs
+ecma_intl.default_locale can recognize underscores in locale IDs
--EXTENSIONS--
ecma_intl
--INI--
@@ -11,4 +11,4 @@ declare(strict_types=1);
var_dump(ini_get('ecma_intl.default_locale'));
--EXPECT--
-string(0) ""
+string(11) "en_US_POSIX"
diff --git a/tests/phpt/ini-default_locale-006.phpt b/tests/phpt/ini-default_locale-006.phpt
index 6c7848a..c0638dd 100644
--- a/tests/phpt/ini-default_locale-006.phpt
+++ b/tests/phpt/ini-default_locale-006.phpt
@@ -9,16 +9,20 @@ ecma_intl.default_locale=
declare(strict_types=1);
var_dump(ini_set('ecma_intl.default_locale', 'en-US'));
+var_dump(ini_set('ecma_intl.default_locale', 'en_US'));
var_dump(ini_set('ecma_intl.default_locale', 'foobar'));
var_dump(ini_set('ecma_intl.default_locale', 'en-Latn-US'));
var_dump(ini_set('ecma_intl.default_locale', 'en-US-POSIX'));
+var_dump(ini_set('ecma_intl.default_locale', 'en_US_POSIX'));
var_dump(ini_set('ecma_intl.default_locale', 'de'));
var_dump(ini_get('ecma_intl.default_locale'));
--EXPECT--
string(0) ""
-bool(false)
string(5) "en-US"
+bool(false)
+string(5) "en_US"
string(10) "en-Latn-US"
string(11) "en-US-POSIX"
+string(11) "en_US_POSIX"
string(2) "de"