Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use locale grouping to format numbers #1781

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 8 additions & 0 deletions include/wx/intl.h
Expand Up @@ -118,6 +118,10 @@ enum wxLocaleInfo
// the character used as decimal point (for wxLOCALE_CAT_NUMBER or MONEY)
wxLOCALE_DECIMAL_POINT,

// Specifies the amount of digits that form each of the groups to be
// separated by thousands separator (for wxLOCALE_CAT_NUMBER or MONEY)
wxLOCALE_GROUPING,

// the stftime()-formats used for short/long date and time representations
// (under some platforms short and long date formats are the same)
//
Expand Down Expand Up @@ -356,6 +360,10 @@ class WXDLLIMPEXP_BASE wxLocale
const wxString& shortName,
bool bLoadDefault);

// Converts a grouping format string returned by localeconv()
// to the kind used on Windows platform
static wxString StandardizeGroupingString(const wxString& g);


wxString m_strLocale, // this locale name
m_strShort; // short name for the locale
Expand Down
1 change: 1 addition & 0 deletions include/wx/language.h
Expand Up @@ -108,6 +108,7 @@ enum wxLanguage
wxLANGUAGE_ENGLISH_SOUTH_AFRICA,
wxLANGUAGE_ENGLISH_TRINIDAD,
wxLANGUAGE_ENGLISH_ZIMBABWE,
wxLANGUAGE_ENGLISH_INDIA,
wxLANGUAGE_ESPERANTO,
wxLANGUAGE_ESTONIAN,
wxLANGUAGE_FAEROESE,
Expand Down
8 changes: 8 additions & 0 deletions include/wx/numformatter.h
Expand Up @@ -58,6 +58,14 @@ class WXDLLIMPEXP_BASE wxNumberFormatter
// function returns true.
static bool GetThousandsSeparatorIfUsed(wxChar *sep);

// Same as the above method but provides the grouping format as well
static bool GetThousandsSeparatorAndGroupingIfUsed(wxChar *sep, wxString *gr);
nkottary marked this conversation as resolved.
Show resolved Hide resolved

// Format a number s with the specified thousands separator, decimal separator
// and grouping format for the thousands separator
static void FormatNumber(wxString &s, wxChar thousandsSep,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I don't remember any more, but was there a reason to pass s as output parameter here instead of just returning it? Is it done for efficiency or is there something else?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just followed the declaration of AddThousandsSeparators(wxString& s).

wxChar decSep, wxString grouping);

private:
// Post-process the string representing an integer.
static wxString PostProcessIntString(wxString s, int style);
Expand Down
13 changes: 12 additions & 1 deletion interface/wx/intl.h
Expand Up @@ -127,6 +127,18 @@ enum wxLocaleInfo
*/
wxLOCALE_DECIMAL_POINT,

/**
Specifies the amount of digits that form each of the groups
to be separated by thousands_sep separator for non-monetary
quantities.

This value can be used with either wxLOCALE_CAT_NUMBER or
wxLOCALE_CAT_MONEY categories.

@since 3.1.5
*/
nkottary marked this conversation as resolved.
Show resolved Hide resolved
wxLOCALE_GROUPING,

/**
Short date format.

Expand Down Expand Up @@ -526,4 +538,3 @@ class wxLocale
Get the current locale object (note that it may be NULL!)
*/
wxLocale* wxGetLocale();

1 change: 1 addition & 0 deletions interface/wx/language.h
Expand Up @@ -93,6 +93,7 @@ enum wxLanguage
wxLANGUAGE_ENGLISH_SOUTH_AFRICA,
wxLANGUAGE_ENGLISH_TRINIDAD,
wxLANGUAGE_ENGLISH_ZIMBABWE,
wxLANGUAGE_ENGLISH_INDIA,
wxLANGUAGE_ESPERANTO,
wxLANGUAGE_ESTONIAN,
wxLANGUAGE_FAEROESE,
Expand Down
49 changes: 49 additions & 0 deletions interface/wx/numformatter.h
Expand Up @@ -123,4 +123,53 @@ class wxNumberFormatter
*/
static bool GetThousandsSeparatorIfUsed(wxChar *sep);

/**
Get the thousands separator and grouping format if grouping of the
digits is used by the current locale.

The value returned in @a sep and @a gr should be only used if the
function returns @true, otherwise no thousands separator should be
used at all.

@param sep
Points to the variable receiving the thousands separator character
if it is used by the current locale. May be @NULL if only the
function return value is needed.

@param gr
Points to the variable receiving the grouping format string
if it is used by the current locale. May be @NULL if only the
function return value is needed.

@since 3.1.5
*/
nkottary marked this conversation as resolved.
Show resolved Hide resolved
static bool GetThousandsSeparatorAndGroupingIfUsed(wxChar *sep, wxString *gr);

/**
Format a number with the thousands separator based on grouping format.
The grouping format is a string of digits separated by ';'. Each digit
indicates the number of digits of the string s to be grouped starting
from the right. If the last digit is '0' then the last but one digit
is used for grouping the remaining digits.

Examples:
Number: "123456789", Grouping format: "3;0", result: "123,456,789"
Number: "123456789", Grouping format: "3;2;0", result: "12,34,56,789"

@param s
The number to be formatted as a string

@param thousandsSep
The thousands separator

@param decSep
The decimal separator

@param grouping
The string representing the thousands separator grouping format.

@since 3.1.5
*/
static void FormatNumber(wxString &s, wxChar thousandsSep, wxChar decSep,
wxString grouping);
};
1 change: 1 addition & 0 deletions misc/languages/langtabl.txt
Expand Up @@ -63,6 +63,7 @@ wxLANGUAGE_ENGLISH_CANADA en_CA LANG_ENGLISH SUBLANG_ENGLISH_C
wxLANGUAGE_ENGLISH_CARIBBEAN en_CB LANG_ENGLISH SUBLANG_ENGLISH_CARIBBEAN LTR "English (Caribbean)"
wxLANGUAGE_ENGLISH_DENMARK en_DK - - LTR "English (Denmark)"
wxLANGUAGE_ENGLISH_EIRE en_IE LANG_ENGLISH SUBLANG_ENGLISH_EIRE LTR "English (Eire)"
wxLANGUAGE_ENGLISH_INDIA en_IN LANG_ENGLISH SUBLANG_ENGLISH_INDIA LTR "English (India)"
wxLANGUAGE_ENGLISH_JAMAICA en_JM LANG_ENGLISH SUBLANG_ENGLISH_JAMAICA LTR "English (Jamaica)"
wxLANGUAGE_ENGLISH_NEW_ZEALAND en_NZ LANG_ENGLISH SUBLANG_ENGLISH_NZ LTR "English (New Zealand)"
wxLANGUAGE_ENGLISH_PHILIPPINES en_PH LANG_ENGLISH SUBLANG_ENGLISH_PHILIPPINES LTR "English (Philippines)"
Expand Down
79 changes: 79 additions & 0 deletions src/common/intl.cpp
Expand Up @@ -69,6 +69,7 @@
#include "wx/osx/core/cfstring.h"
#include <CoreFoundation/CFLocale.h>
#include <CoreFoundation/CFDateFormatter.h>
#include <CoreFoundation/CFNumberFormatter.h>
#include <CoreFoundation/CFString.h>
#endif

Expand Down Expand Up @@ -1639,6 +1640,10 @@ GetInfoFromLCID(LCID lcid,
}
break;

case wxLOCALE_GROUPING:
if ( ::GetLocaleInfo(lcid, LOCALE_SGROUPING, buf, WXSIZEOF(buf)) )
str = buf;
break;
case wxLOCALE_SHORT_DATE_FMT:
case wxLOCALE_LONG_DATE_FMT:
case wxLOCALE_TIME_FMT:
Expand Down Expand Up @@ -1712,6 +1717,9 @@ wxString wxLocale::GetInfo(wxLocaleInfo index, wxLocaleCategory cat)
case wxLOCALE_DECIMAL_POINT:
return ".";

case wxLOCALE_GROUPING:
return "3;0";

case wxLOCALE_SHORT_DATE_FMT:
return "%m/%d/%y";

Expand Down Expand Up @@ -1770,6 +1778,42 @@ wxString wxLocale::GetInfo(wxLocaleInfo index, wxLocaleCategory WXUNUSED(cat))
cfstr = (CFStringRef) CFLocaleGetValue(userLocaleRef, kCFLocaleDecimalSeparator);
break;

case wxLOCALE_GROUPING:
{
wxCFRef<CFNumberFormatterRef> numFormatterRef(
CFNumberFormatterCreate(NULL, userLocaleRef, kCFNumberFormatterDecimalStyle));
CFNumberRef size = (CFNumberRef) CFNumberFormatterCopyProperty(
numFormatterRef, kCFNumberFormatterGroupingSize);
CFNumberRef secSize = (CFNumberRef) CFNumberFormatterCopyProperty(
numFormatterRef, kCFNumberFormatterSecondaryGroupingSize);
// Convert the size and secondary size to char and create the grouping string
char s, ss;
if (CFNumberGetValue(size, kCFNumberCharType, &s))
{
if (CFNumberGetValue(secSize, kCFNumberCharType, &ss) && ss != s)
{
s += '0';
ss += '0';
const char gstr[] = {s, ';', ss, ';', '0', '\0'};
cfstr = CFStringCreateWithCString(
NULL, &gstr[0], kCFStringEncodingASCII);
}
else
{
s += '0';
const char gstr[] = {s, ';', '0', '\0'};
cfstr = CFStringCreateWithCString(
NULL, &gstr[0], kCFStringEncodingASCII);
}
}
else
{
// No grouping
cfstr = CFStringCreateWithCString(NULL, "", kCFStringEncodingASCII);
}
}
break;

case wxLOCALE_SHORT_DATE_FMT:
case wxLOCALE_LONG_DATE_FMT:
case wxLOCALE_DATE_TIME_FMT:
Expand Down Expand Up @@ -1900,6 +1944,32 @@ wxString GetDateFormatFromLangInfo(wxLocaleInfo index)

} // anonymous namespace

// Convert a grouping format string returned by localeconv() to
// a standardized format. Our standard format is the one used for
// Windows SGROUPING. Here we convert char sized integers to ASCII
// characters. That is, for example '\3' becomes '3'. We also add
// a ';' delimiter between each number. A '\0' or a CHAR_MAX
// signifies the end of the argument string. If the argument string
// ends with a '\0' then we insert a '0' into the return string.
// The return string will be NULL terminated.

/* static */
wxString wxLocale::StandardizeGroupingString(const wxString& g)
{
wxString s;
int i;
for (i = 0; g[i] != '\0' && g[i] != CHAR_MAX; i++)
{
s.Append((char)((int)g[i] + (int)'0'));
s.Append(';');
}
if (g[i] == '\0')
s.Append('0');
else
s.RemoveLast(); // Remove extra ;
return s;
}

/* static */
wxString wxLocale::GetInfo(wxLocaleInfo index, wxLocaleCategory cat)
{
Expand Down Expand Up @@ -1930,6 +2000,15 @@ wxString wxLocale::GetInfo(wxLocaleInfo index, wxLocaleCategory cat)
wxFAIL_MSG( "invalid wxLocaleCategory" );
break;

case wxLOCALE_GROUPING:
if ( cat == wxLOCALE_CAT_NUMBER )
return StandardizeGroupingString(lc->grouping);
else if ( cat == wxLOCALE_CAT_MONEY )
return StandardizeGroupingString(lc->mon_grouping);

wxFAIL_MSG( "invalid wxLocaleCategory" );
break;

case wxLOCALE_SHORT_DATE_FMT:
case wxLOCALE_LONG_DATE_FMT:
case wxLOCALE_DATE_TIME_FMT:
Expand Down
4 changes: 4 additions & 0 deletions src/common/languageinfo.cpp
Expand Up @@ -388,6 +388,9 @@
#ifndef SUBLANG_ENGLISH_ZIMBABWE
#define SUBLANG_ENGLISH_ZIMBABWE SUBLANG_DEFAULT
#endif
#ifndef SUBLANG_ENGLISH_INDIA
#define SUBLANG_ENGLISH_INDIA SUBLANG_DEFAULT
#endif
#ifndef SUBLANG_FRENCH
#define SUBLANG_FRENCH SUBLANG_DEFAULT
#endif
Expand Down Expand Up @@ -628,6 +631,7 @@ void wxLocale::InitLanguagesDB()
LNG(wxLANGUAGE_ENGLISH_SOUTH_AFRICA, "en_ZA", LANG_ENGLISH , SUBLANG_ENGLISH_SOUTH_AFRICA , wxLayout_LeftToRight, "English (South Africa)")
LNG(wxLANGUAGE_ENGLISH_TRINIDAD, "en_TT", LANG_ENGLISH , SUBLANG_ENGLISH_TRINIDAD , wxLayout_LeftToRight, "English (Trinidad)")
LNG(wxLANGUAGE_ENGLISH_ZIMBABWE, "en_ZW", LANG_ENGLISH , SUBLANG_ENGLISH_ZIMBABWE , wxLayout_LeftToRight, "English (Zimbabwe)")
LNG(wxLANGUAGE_ENGLISH_INDIA, "en_IN", LANG_ENGLISH , SUBLANG_ENGLISH_INDIA , wxLayout_LeftToRight, "English (India)")
LNG(wxLANGUAGE_ESPERANTO, "eo" , 0 , 0 , wxLayout_LeftToRight, "Esperanto")
LNG(wxLANGUAGE_ESTONIAN, "et_EE", LANG_ESTONIAN , SUBLANG_DEFAULT , wxLayout_LeftToRight, "Estonian")
LNG(wxLANGUAGE_FAEROESE, "fo_FO", LANG_FAEROESE , SUBLANG_DEFAULT , wxLayout_LeftToRight, "Faeroese")
Expand Down
84 changes: 73 additions & 11 deletions src/common/numformatter.cpp
Expand Up @@ -171,6 +171,44 @@ bool wxNumberFormatter::GetThousandsSeparatorIfUsed(wxChar *sep)
#endif // wxUSE_INTL/!wxUSE_INTL
}

bool wxNumberFormatter::GetThousandsSeparatorAndGroupingIfUsed(wxChar *sep, wxString *gr)
{
#if wxUSE_INTL
static wxChar s_thousandsSeparator = 0;
static wxString s_grouping;
static LocaleId s_localeUsedForInit;

if ( s_localeUsedForInit.NotInitializedOrHasChanged() )
{
const wxString
s = wxLocale::GetInfo(wxLOCALE_THOUSANDS_SEP, wxLOCALE_CAT_NUMBER);
if ( s.length() == 1 )
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note to self: need to check whether this works correctly with non-ASCII separators. as in fr_FR.utf8 locale, the thousands separator is 'NARROW NO-BREAK SPACE' (U+202F).

{
s_thousandsSeparator = s[0];
s_grouping = wxLocale::GetInfo(wxLOCALE_GROUPING, wxLOCALE_CAT_NUMBER);
}
//else: Unlike above it's perfectly fine for the thousands separator to
// be empty if grouping is not used, so just leave it as 0.
}

if ( !s_thousandsSeparator )
return false;

if ( sep )
{
*sep = s_thousandsSeparator;
if ( gr )
*gr = s_grouping;
}

return true;
#else // !wxUSE_INTL
wxUnusedVar(sep);
wxUnusedVar(gr);
return false;
#endif // wxUSE_INTL/!wxUSE_INTL
}

// ----------------------------------------------------------------------------
// Conversion to string and helpers
// ----------------------------------------------------------------------------
Expand Down Expand Up @@ -221,10 +259,20 @@ void wxNumberFormatter::AddThousandsSeparators(wxString& s)
return;

wxChar thousandsSep;
if ( !GetThousandsSeparatorIfUsed(&thousandsSep) )
wxChar decSep;
wxString grouping;
if ( !GetThousandsSeparatorAndGroupingIfUsed(&thousandsSep, &grouping) )
return;

size_t pos = s.find(GetDecimalSeparator());
decSep = GetDecimalSeparator();
wxNumberFormatter::FormatNumber(
s, thousandsSep, decSep, grouping);
}

void wxNumberFormatter::FormatNumber(
wxString& s, wxChar thousandsSep, wxChar decSep, wxString grouping)
{
size_t pos = s.find(decSep);
if ( pos == wxString::npos )
{
// Start grouping at the end of an integer number.
Expand All @@ -235,17 +283,31 @@ void wxNumberFormatter::AddThousandsSeparators(wxString& s)
// before their start.
const size_t start = s.find_first_of("0123456789");

// We currently group digits by 3 independently of the locale. This is not
// the right thing to do and we should use lconv::grouping (under POSIX)
// and GetLocaleInfo(LOCALE_SGROUPING) (under MSW) to get information about
// the correct grouping to use. This is something that needs to be done at
// wxLocale level first and then used here in the future (TODO).
const size_t GROUP_LEN = 3;
// We get the grouping style from locale. This is represented by a ';'
// delimited character array where each element is the number of digits
// in a group starting from the right of the number. If the last element
// in the grouping is a 0 then the last but one element is the number
// used for grouping the remaining digits.

while ( pos > start + GROUP_LEN )
size_t i = 0;
while((grouping[i] != '\0') && (grouping[i] != '0'))
{
if (grouping[i] != ';')
{
if (pos <= start + (size_t)(grouping[i] - '0'))
break;
pos -= (size_t)(grouping[i] - '0');
s.insert(pos, thousandsSep);
}
i++;
}
if ( grouping[i] == '0' && i > 0 )
{
pos -= GROUP_LEN;
s.insert(pos, thousandsSep);
while ( pos > start + (size_t)(grouping[i - 2] - '0'))
{
pos -= (size_t)(grouping[i - 2] - '0');
s.insert(pos, thousandsSep);
}
}
}

Expand Down