[libc++] Fix support for multibyte thousands_sep and decimal_point in moneypunct_byname and numpunct_byname.

Summary:
The underlying C locales provide the `thousands_sep` and `decimal_point` as strings, possible with more than one character. We currently don't handle this case even for `wchar_t`.

This patch properly converts the mbs -> wide character for `moneypunct_byname<wchar_t>`. For the `moneypunct_byname<char>` case we attempt to narrow the WC and if that fails we also attempt to translate it to some reasonable value. For example we translate U00A0 (non-breaking space) into U0020 (regular space). If none of these conversions succeed then we simply allow the base class to provide a fallback value.


Reviewers: mclow.lists, EricWF

Subscribers: vangyzen, george.burgess.iv, cfe-commits

Differential Revision: https://reviews.llvm.org/D24218

git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@289347 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Eric Fiselier
2016-12-11 00:20:59 +00:00
parent eae7e51a7d
commit 3e99c8b0af
5 changed files with 140 additions and 70 deletions

View File

@@ -4183,6 +4183,54 @@ __widen_from_utf8<32>::~__widen_from_utf8()
{ {
} }
static bool checked_string_to_wchar_convert(wchar_t& dest,
const char* ptr,
__locale_struct* loc) {
if (*ptr == '\0')
return false;
mbstate_t mb = {};
wchar_t out;
size_t ret = __libcpp_mbrtowc_l(&out, ptr, strlen(ptr), &mb, loc);
if (ret == static_cast<size_t>(-1) || ret == static_cast<size_t>(-2)) {
return false;
}
dest = out;
return true;
}
static bool checked_string_to_char_convert(char& dest,
const char* ptr,
__locale_struct* __loc) {
if (*ptr == '\0')
return false;
if (!ptr[1]) {
dest = *ptr;
return true;
}
// First convert the MBS into a wide char then attempt to narrow it using
// wctob_l.
wchar_t wout;
if (!checked_string_to_wchar_convert(wout, ptr, __loc))
return false;
int res;
if ((res = __libcpp_wctob_l(wout, __loc)) != char_traits<char>::eof()) {
dest = res;
return true;
}
// FIXME: Work around specific multibyte sequences that we can reasonable
// translate into a different single byte.
switch (wout) {
case L'\u00A0': // non-breaking space
dest = ' ';
return true;
default:
return false;
}
_LIBCPP_UNREACHABLE();
}
// numpunct<char> && numpunct<wchar_t> // numpunct<char> && numpunct<wchar_t>
locale::id numpunct< char >::id; locale::id numpunct< char >::id;
@@ -4254,10 +4302,10 @@ numpunct_byname<char>::__init(const char* nm)
" failed to construct for " + string(nm)); " failed to construct for " + string(nm));
lconv* lc = __libcpp_localeconv_l(loc.get()); lconv* lc = __libcpp_localeconv_l(loc.get());
if (*lc->decimal_point) checked_string_to_char_convert(__decimal_point_, lc->decimal_point,
__decimal_point_ = *lc->decimal_point; loc.get());
if (*lc->thousands_sep) checked_string_to_char_convert(__thousands_sep_, lc->thousands_sep,
__thousands_sep_ = *lc->thousands_sep; loc.get());
__grouping_ = lc->grouping; __grouping_ = lc->grouping;
// localization for truename and falsename is not available // localization for truename and falsename is not available
} }
@@ -4288,16 +4336,16 @@ numpunct_byname<wchar_t>::__init(const char* nm)
{ {
__locale_unique_ptr loc(newlocale(LC_ALL_MASK, nm, 0), freelocale); __locale_unique_ptr loc(newlocale(LC_ALL_MASK, nm, 0), freelocale);
if (loc == nullptr) if (loc == nullptr)
__throw_runtime_error("numpunct_byname<char>::numpunct_byname" __throw_runtime_error("numpunct_byname<wchar_t>::numpunct_byname"
" failed to construct for " + string(nm)); " failed to construct for " + string(nm));
lconv* lc = __libcpp_localeconv_l(loc.get()); lconv* lc = __libcpp_localeconv_l(loc.get());
if (*lc->decimal_point) checked_string_to_wchar_convert(__decimal_point_, lc->decimal_point,
__decimal_point_ = *lc->decimal_point; loc.get());
if (*lc->thousands_sep) checked_string_to_wchar_convert(__thousands_sep_, lc->thousands_sep,
__thousands_sep_ = *lc->thousands_sep; loc.get());
__grouping_ = lc->grouping; __grouping_ = lc->grouping;
// locallization for truename and falsename is not available // localization for truename and falsename is not available
} }
} }
@@ -5779,14 +5827,15 @@ moneypunct_byname<char, false>::init(const char* nm)
" failed to construct for " + string(nm)); " failed to construct for " + string(nm));
lconv* lc = __libcpp_localeconv_l(loc.get()); lconv* lc = __libcpp_localeconv_l(loc.get());
if (*lc->mon_decimal_point) if (!checked_string_to_char_convert(__decimal_point_,
__decimal_point_ = *lc->mon_decimal_point; lc->mon_decimal_point,
else loc.get()))
__decimal_point_ = base::do_decimal_point(); __decimal_point_ = base::do_decimal_point();
if (*lc->mon_thousands_sep) if (!checked_string_to_char_convert(__thousands_sep_,
__thousands_sep_ = *lc->mon_thousands_sep; lc->mon_thousands_sep,
else loc.get()))
__thousands_sep_ = base::do_thousands_sep(); __thousands_sep_ = base::do_thousands_sep();
__grouping_ = lc->mon_grouping; __grouping_ = lc->mon_grouping;
__curr_symbol_ = lc->currency_symbol; __curr_symbol_ = lc->currency_symbol;
if (lc->frac_digits != CHAR_MAX) if (lc->frac_digits != CHAR_MAX)
@@ -5822,14 +5871,14 @@ moneypunct_byname<char, true>::init(const char* nm)
" failed to construct for " + string(nm)); " failed to construct for " + string(nm));
lconv* lc = __libcpp_localeconv_l(loc.get()); lconv* lc = __libcpp_localeconv_l(loc.get());
if (*lc->mon_decimal_point) if (!checked_string_to_char_convert(__decimal_point_,
__decimal_point_ = *lc->mon_decimal_point; lc->mon_decimal_point,
else loc.get()))
__decimal_point_ = base::do_decimal_point(); __decimal_point_ = base::do_decimal_point();
if (*lc->mon_thousands_sep) if (!checked_string_to_char_convert(__thousands_sep_,
__thousands_sep_ = *lc->mon_thousands_sep; lc->mon_thousands_sep,
else loc.get()))
__thousands_sep_ = base::do_thousands_sep(); __thousands_sep_ = base::do_thousands_sep();
__grouping_ = lc->mon_grouping; __grouping_ = lc->mon_grouping;
__curr_symbol_ = lc->int_curr_symbol; __curr_symbol_ = lc->int_curr_symbol;
if (lc->int_frac_digits != CHAR_MAX) if (lc->int_frac_digits != CHAR_MAX)
@@ -5881,14 +5930,14 @@ moneypunct_byname<wchar_t, false>::init(const char* nm)
__throw_runtime_error("moneypunct_byname" __throw_runtime_error("moneypunct_byname"
" failed to construct for " + string(nm)); " failed to construct for " + string(nm));
lconv* lc = __libcpp_localeconv_l(loc.get()); lconv* lc = __libcpp_localeconv_l(loc.get());
if (*lc->mon_decimal_point) if (!checked_string_to_wchar_convert(__decimal_point_,
__decimal_point_ = static_cast<wchar_t>(*lc->mon_decimal_point); lc->mon_decimal_point,
else loc.get()))
__decimal_point_ = base::do_decimal_point(); __decimal_point_ = base::do_decimal_point();
if (*lc->mon_thousands_sep) if (!checked_string_to_wchar_convert(__thousands_sep_,
__thousands_sep_ = static_cast<wchar_t>(*lc->mon_thousands_sep); lc->mon_thousands_sep,
else loc.get()))
__thousands_sep_ = base::do_thousands_sep(); __thousands_sep_ = base::do_thousands_sep();
__grouping_ = lc->mon_grouping; __grouping_ = lc->mon_grouping;
wchar_t wbuf[100]; wchar_t wbuf[100];
mbstate_t mb = {0}; mbstate_t mb = {0};
@@ -5947,14 +5996,14 @@ moneypunct_byname<wchar_t, true>::init(const char* nm)
" failed to construct for " + string(nm)); " failed to construct for " + string(nm));
lconv* lc = __libcpp_localeconv_l(loc.get()); lconv* lc = __libcpp_localeconv_l(loc.get());
if (*lc->mon_decimal_point) if (!checked_string_to_wchar_convert(__decimal_point_,
__decimal_point_ = static_cast<wchar_t>(*lc->mon_decimal_point); lc->mon_decimal_point,
else loc.get()))
__decimal_point_ = base::do_decimal_point(); __decimal_point_ = base::do_decimal_point();
if (*lc->mon_thousands_sep) if (!checked_string_to_wchar_convert(__thousands_sep_,
__thousands_sep_ = static_cast<wchar_t>(*lc->mon_thousands_sep); lc->mon_thousands_sep,
else loc.get()))
__thousands_sep_ = base::do_thousands_sep(); __thousands_sep_ = base::do_thousands_sep();
__grouping_ = lc->mon_grouping; __grouping_ = lc->mon_grouping;
wchar_t wbuf[100]; wchar_t wbuf[100];
mbstate_t mb = {0}; mbstate_t mb = {0};

View File

@@ -12,9 +12,6 @@
// REQUIRES: locale.ru_RU.UTF-8 // REQUIRES: locale.ru_RU.UTF-8
// REQUIRES: locale.zh_CN.UTF-8 // REQUIRES: locale.zh_CN.UTF-8
// Russia uses ',' for the decimal separator. GLIBC returns '.'
// XFAIL: linux
// <locale> // <locale>
// class moneypunct_byname<charT, International> // class moneypunct_byname<charT, International>
@@ -25,6 +22,7 @@
#include <limits> #include <limits>
#include <cassert> #include <cassert>
#include "test_macros.h"
#include "platform_support.h" // locale name macros #include "platform_support.h" // locale name macros
class Fnf class Fnf
@@ -111,22 +109,29 @@ int main()
Fwt f(LOCALE_fr_FR_UTF_8, 1); Fwt f(LOCALE_fr_FR_UTF_8, 1);
assert(f.decimal_point() == L','); assert(f.decimal_point() == L',');
} }
// GLIBC 2.23 uses '.' as the decimal point while other C libraries use ','
#ifndef TEST_HAS_GLIBC
const char sep = ',';
const wchar_t wsep = L',';
#else
const char sep = '.';
const wchar_t wsep = L'.';
#endif
{ {
Fnf f(LOCALE_ru_RU_UTF_8, 1); Fnf f(LOCALE_ru_RU_UTF_8, 1);
assert(f.decimal_point() == ','); assert(f.decimal_point() == sep);
} }
{ {
Fnt f(LOCALE_ru_RU_UTF_8, 1); Fnt f(LOCALE_ru_RU_UTF_8, 1);
assert(f.decimal_point() == ','); assert(f.decimal_point() == sep);
} }
{ {
Fwf f(LOCALE_ru_RU_UTF_8, 1); Fwf f(LOCALE_ru_RU_UTF_8, 1);
assert(f.decimal_point() == L','); assert(f.decimal_point() == wsep);
} }
{ {
Fwt f(LOCALE_ru_RU_UTF_8, 1); Fwt f(LOCALE_ru_RU_UTF_8, 1);
assert(f.decimal_point() == L','); assert(f.decimal_point() == wsep);
} }
{ {

View File

@@ -18,16 +18,11 @@
// charT thousands_sep() const; // charT thousands_sep() const;
// Failure related to GLIBC's use of U00A0 as mon_thousands_sep
// and U002E as mon_decimal_point.
// TODO: U00A0 should be investigated.
// Possibly related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=16006
// XFAIL: linux-gnu
#include <locale> #include <locale>
#include <limits> #include <limits>
#include <cassert> #include <cassert>
#include "test_macros.h"
#include "platform_support.h" // locale name macros #include "platform_support.h" // locale name macros
class Fnf class Fnf
@@ -114,22 +109,34 @@ int main()
Fwt f(LOCALE_fr_FR_UTF_8, 1); Fwt f(LOCALE_fr_FR_UTF_8, 1);
assert(f.thousands_sep() == L' '); assert(f.thousands_sep() == L' ');
} }
// The below tests work around GLIBC's use of U00A0 as mon_thousands_sep
// and U002E as mon_decimal_point.
// TODO: Fix thousands_sep for 'char'.
// related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=16006
#ifndef TEST_HAS_GLIBC
const char sep = ' ';
const wchar_t wsep = L' ';
#else
// FIXME libc++ specifically works around \u00A0 by translating it into
// a regular space.
const char sep = ' ';
const wchar_t wsep = L'\u00A0';
#endif
{ {
Fnf f(LOCALE_ru_RU_UTF_8, 1); Fnf f(LOCALE_ru_RU_UTF_8, 1);
assert(f.thousands_sep() == ' '); assert(f.thousands_sep() == sep);
} }
{ {
Fnt f(LOCALE_ru_RU_UTF_8, 1); Fnt f(LOCALE_ru_RU_UTF_8, 1);
assert(f.thousands_sep() == ' '); assert(f.thousands_sep() == sep);
} }
{ {
Fwf f(LOCALE_ru_RU_UTF_8, 1); Fwf f(LOCALE_ru_RU_UTF_8, 1);
assert(f.thousands_sep() == L' '); assert(f.thousands_sep() == wsep);
} }
{ {
Fwt f(LOCALE_ru_RU_UTF_8, 1); Fwt f(LOCALE_ru_RU_UTF_8, 1);
assert(f.thousands_sep() == L' '); assert(f.thousands_sep() == wsep);
} }
{ {

View File

@@ -16,12 +16,10 @@
// string grouping() const; // string grouping() const;
// TODO: investigation needed
// XFAIL: linux-gnu
#include <locale> #include <locale>
#include <cassert> #include <cassert>
#include "test_macros.h"
#include "platform_support.h" // locale name macros #include "platform_support.h" // locale name macros
int main() int main()
@@ -54,15 +52,20 @@ int main()
} }
{ {
std::locale l(LOCALE_fr_FR_UTF_8); std::locale l(LOCALE_fr_FR_UTF_8);
#if defined(TEST_HAS_GLIBC)
const char* const group = "\3";
#else
const char* const group = "\x7f";
#endif
{ {
typedef char C; typedef char C;
const std::numpunct<C>& np = std::use_facet<std::numpunct<C> >(l); const std::numpunct<C>& np = std::use_facet<std::numpunct<C> >(l);
assert(np.grouping() == "\x7F"); assert(np.grouping() == group);
} }
{ {
typedef wchar_t C; typedef wchar_t C;
const std::numpunct<C>& np = std::use_facet<std::numpunct<C> >(l); const std::numpunct<C>& np = std::use_facet<std::numpunct<C> >(l);
assert(np.grouping() == "\x7F"); assert(np.grouping() == group);
} }
} }
} }

View File

@@ -16,12 +16,11 @@
// char_type thousands_sep() const; // char_type thousands_sep() const;
// TODO: investigation needed
// XFAIL: linux-gnu
#include <locale> #include <locale>
#include <cassert> #include <cassert>
#include "test_macros.h"
#include "platform_support.h" // locale name macros #include "platform_support.h" // locale name macros
int main() int main()
@@ -54,15 +53,22 @@ int main()
} }
{ {
std::locale l(LOCALE_fr_FR_UTF_8); std::locale l(LOCALE_fr_FR_UTF_8);
#if defined(TEST_HAS_GLIBC)
const char sep = ' ';
const wchar_t wsep = L' ';
#else
const char sep = ',';
const wchar_t wsep = L',';
#endif
{ {
typedef char C; typedef char C;
const std::numpunct<C>& np = std::use_facet<std::numpunct<C> >(l); const std::numpunct<C>& np = std::use_facet<std::numpunct<C> >(l);
assert(np.thousands_sep() == ','); assert(np.thousands_sep() == sep);
} }
{ {
typedef wchar_t C; typedef wchar_t C;
const std::numpunct<C>& np = std::use_facet<std::numpunct<C> >(l); const std::numpunct<C>& np = std::use_facet<std::numpunct<C> >(l);
assert(np.thousands_sep() == L','); assert(np.thousands_sep() == wsep);
} }
} }
} }