Skip to content

Commit

Permalink
Merge branch 'natural-sort'
Browse files Browse the repository at this point in the history
Add natural sort functions.

See #1923
  • Loading branch information
vadz committed Jul 7, 2020
2 parents a2e4e6e + 83a2a1e commit 2289f8b
Show file tree
Hide file tree
Showing 4 changed files with 394 additions and 5 deletions.
18 changes: 18 additions & 0 deletions include/wx/arrstr.h
Expand Up @@ -42,12 +42,30 @@ wxDictionaryStringSortAscending(const wxString& s1, const wxString& s2)
return cmp ? cmp : s1.Cmp(s2);
}


inline int wxCMPFUNC_CONV
wxDictionaryStringSortDescending(const wxString& s1, const wxString& s2)
{
return wxDictionaryStringSortAscending(s2, s1);
}

WXDLLIMPEXP_BASE
int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2);

WXDLLIMPEXP_BASE
int wxCMPFUNC_CONV wxCmpNaturalGeneric(const wxString& s1, const wxString& s2);

inline int wxCMPFUNC_CONV wxNaturalStringSortAscending(const wxString& s1, const wxString& s2)
{
return wxCmpNatural(s1, s2);
}

inline int wxCMPFUNC_CONV wxNaturalStringSortDescending(const wxString& s1, const wxString& s2)
{
return wxCmpNatural(s2, s1);
}


#if wxUSE_STD_CONTAINERS

typedef int (wxCMPFUNC_CONV *CMPFUNCwxString)(wxString*, wxString*);
Expand Down
97 changes: 92 additions & 5 deletions interface/wx/arrstr.h
Expand Up @@ -363,7 +363,8 @@ class wxSortedArrayString : public wxArray
This function can be used with wxSortedArrayString::Sort() or passed as an
argument to wxSortedArrayString constructor.
@see wxStringSortDescending(), wxDictionaryStringSortAscending()
@see wxStringSortDescending(), wxDictionaryStringSortAscending(),
wxNaturalStringSortAscending()
@since 3.1.0
*/
Expand All @@ -375,7 +376,8 @@ int wxStringSortAscending(const wxString& s1, const wxString& s2);
This function can be used with wxSortedArrayString::Sort() or passed as an
argument to wxSortedArrayString constructor.
@see wxStringSortAscending(), wxDictionaryStringSortAscending()
@see wxStringSortAscending(), wxDictionaryStringSortDescending(),
wxNaturalStringSortDescending()
@since 3.1.0
*/
Expand All @@ -392,7 +394,9 @@ int wxStringSortDescending(const wxString& s1, const wxString& s2);
This function can be used with wxSortedArrayString::Sort() or passed as an
argument to wxSortedArrayString constructor.
@see wxStringSortAscending(), wxDictionaryStringSortDescending()
@see wxDictionaryStringSortDescending(),
wxStringSortAscending(),
wxNaturalStringSortAscending()
@since 3.1.0
*/
Expand All @@ -403,11 +407,94 @@ int wxDictionaryStringSortAscending(const wxString& s1, const wxString& s2);
See wxDictionaryStringSortAscending() for the dictionary sort description.
@see wxStringSortDescending()
@see wxDictionaryStringSortAscending(),
wxStringSortDescending(),
wxNaturalStringSortDescending()
@since 3.1.0
*/
int wxDictionaryStringSortAscending(const wxString& s1, const wxString& s2);
int wxDictionaryStringSortDescending(const wxString& s1, const wxString& s2);


/**
Comparison function comparing strings in natural order.
This function can be used with wxSortedArrayString::Sort()
or passed as an argument to wxSortedArrayString constructor.
See wxCmpNatural() for more information about how natural
sort order is implemented.
@see wxNaturalStringSortDescending(),
wxStringSortAscending(), wxDictionaryStringSortAscending()
@since 3.1.4
*/
int wxNaturalStringSortAscending(const wxString& s1, const wxString& s2);

/**
Comparison function comparing strings in reverse natural order.
This function can be used with wxSortedArrayString::Sort()
or passed as an argument to wxSortedArrayString constructor.
See wxCmpNatural() for more information about how natural
sort order is implemented.
@see wxNaturalStringSortAscending(),
wxStringSortDescending(), wxDictionaryStringSortDescending()
@since 3.1.4
*/
int wxNaturalStringSortDescending(const wxString& s1, const wxString& s2);

/**
This function compares strings using case-insensitive collation and
additionally, numbers within strings are recognised and compared
numerically, rather than alphabetically. When used for sorting,
the result is that e.g. file names containing numbers are sorted
in a natural way.
For example, sorting with a simple string comparison results in:
- file1.txt
- file10.txt
- file100.txt
- file2.txt
- file20.txt
- file3.txt
But sorting the same strings in natural sort order results in:
- file1.txt
- file2.txt
- file3.txt
- file10.txt
- file20.txt
- file100.txt
wxCmpNatural() uses an OS native natural sort function when available
(currently only under Microsoft Windows), wxCmpNaturalGeneric() otherwise.
Be aware that OS native implementations might differ from each other,
and might change behaviour from release to release.
@see wxNaturalStringSortAscending(), wxNaturalStringSortDescending()
@since 3.1.4
*/
int wxCmpNatural(const wxString& s1, const wxString& s2);

/**
This is wxWidgets' own implementation of the natural sort comparison function.
Requires wxRegEx, if it is unavailable numbers within strings are not
recognised and only case-insensitive collation is performed.
@see wxCmpNatural()
@since 3.1.4
*/
int wxCmpNaturalGeneric(const wxString& s1, const wxString& s2);


// ============================================================================
// Global functions/macros
Expand Down
203 changes: 203 additions & 0 deletions src/common/arrstr.cpp
Expand Up @@ -20,13 +20,20 @@
#endif

#include "wx/arrstr.h"
#include "wx/regex.h"
#include "wx/scopedarray.h"
#include "wx/wxcrt.h"

#include "wx/beforestd.h"
#include <algorithm>
#include <functional>
#include "wx/afterstd.h"

#if defined( __WINDOWS__ )
#include <shlwapi.h>
#endif


// ============================================================================
// ArrayString
// ============================================================================
Expand Down Expand Up @@ -721,3 +728,199 @@ wxArrayString wxSplit(const wxString& str, const wxChar sep, const wxChar escape

return ret;
}

#if wxUSE_REGEX

namespace // helpers needed by wxCmpNaturalGeneric()
{
// Used for comparison of string parts
struct wxStringFragment
{
// Fragment types are generally sorted like this:
// Empty < SpaceOrPunct < Digit < LetterOrSymbol
// Fragments of the same type are compared as follows:
// SpaceOrPunct - collated, Digit - as numbers using value
// LetterOrSymbol - lower-cased and then collated
enum Type
{
Empty,
SpaceOrPunct, // whitespace or punctuation
Digit, // a sequence of decimal digits
LetterOrSymbol // letters and symbols, i.e., anything not covered by the above types
};

wxStringFragment() : type(Empty), value(0) {}

Type type;
wxString text;
wxUint64 value; // used only for Digit type
};


wxStringFragment GetFragment(wxString& text)
{
static const wxRegEx reSpaceOrPunct(wxS("^([[:space:]]|[[:punct:]])+"));
// Limit the length to make sure the value will fit into a wxUint64
static const wxRegEx reDigit(wxS("^[[:digit:]]{1,19}"));
static const wxRegEx reLetterOrSymbol("^[^[:space:]|[:punct:]|[:digit:]]+");

if ( text.empty() )
return wxStringFragment();

wxStringFragment fragment;
size_t length = 0;

// In attempt to minimize the number of wxRegEx.Matches() calls,
// try to do them from the most expected to the least expected
// string fragment type.
if ( reLetterOrSymbol.Matches(text) )
{
if ( reLetterOrSymbol.GetMatch(NULL, &length) )
{
fragment.type = wxStringFragment::LetterOrSymbol;
fragment.text = text.Left(length);
}
}
else if ( reDigit.Matches(text) )
{
if ( reDigit.GetMatch(NULL, &length) )
{
fragment.type = wxStringFragment::Digit;
fragment.text = text.Left(length);
fragment.text.ToULongLong(&fragment.value);
}
}
else if ( reSpaceOrPunct.Matches(text) )
{
if ( reSpaceOrPunct.GetMatch(NULL, &length) )
{
fragment.type = wxStringFragment::SpaceOrPunct;
fragment.text = text.Left(length);
}
}

text.erase(0, length);
return fragment;
}

int CompareFragmentNatural(const wxStringFragment& lhs, const wxStringFragment& rhs)
{
switch ( lhs.type )
{
case wxStringFragment::Empty:
switch ( rhs.type )
{
case wxStringFragment::Empty:
return 0;
case wxStringFragment::SpaceOrPunct:
case wxStringFragment::Digit:
case wxStringFragment::LetterOrSymbol:
return -1;
}

case wxStringFragment::SpaceOrPunct:
switch ( rhs.type )
{
case wxStringFragment::Empty:
return 1;
case wxStringFragment::SpaceOrPunct:
return wxStrcoll_String(lhs.text, rhs.text);
case wxStringFragment::Digit:
case wxStringFragment::LetterOrSymbol:
return -1;
}

case wxStringFragment::Digit:
switch ( rhs.type )
{
case wxStringFragment::Empty:
case wxStringFragment::SpaceOrPunct:
return 1;
case wxStringFragment::Digit:
if ( lhs.value > rhs.value )
return 1;
else if ( lhs.value < rhs.value )
return -1;
else
return 0;
case wxStringFragment::LetterOrSymbol:
return -1;
}

case wxStringFragment::LetterOrSymbol:
switch ( rhs.type )
{
case wxStringFragment::Empty:
case wxStringFragment::SpaceOrPunct:
case wxStringFragment::Digit:
return 1;
case wxStringFragment::LetterOrSymbol:
return wxStrcoll_String(lhs.text.Lower(), rhs.text.Lower());
}
}

// all possible cases should be covered by the switch above
// but return also from here to prevent the compiler warning
return 1;
}

} // unnamed namespace


// ----------------------------------------------------------------------------
// wxCmpNaturalGeneric
// ----------------------------------------------------------------------------
//
int wxCMPFUNC_CONV wxCmpNaturalGeneric(const wxString& s1, const wxString& s2)
{
wxString lhs(s1);
wxString rhs(s2);

int comparison = 0;

while ( (comparison == 0) && (!lhs.empty() || !rhs.empty()) )
{
const wxStringFragment fragmentLHS = GetFragment(lhs);
const wxStringFragment fragmentRHS = GetFragment(rhs);

comparison = CompareFragmentNatural(fragmentLHS, fragmentRHS);
}

return comparison;
}

#else

int wxCMPFUNC_CONV wxCmpNaturalGeneric(const wxString& s1, const wxString& s2)
{
return wxStrcoll_String(s1.Lower(), s2.Lower());
}

#endif // #if wxUSE_REGEX

// ----------------------------------------------------------------------------
// Declaration of StrCmpLogicalW()
// ----------------------------------------------------------------------------
//
// In some distributions of MinGW32, this function is exported in the library,
// but not declared in shlwapi.h. Therefore we declare it here.
#if defined( __MINGW32_TOOLCHAIN__ )
extern "C" __declspec(dllimport) int WINAPI StrCmpLogicalW(LPCWSTR psz1, LPCWSTR psz2);
#endif


// ----------------------------------------------------------------------------
// wxCmpNatural
// ----------------------------------------------------------------------------
//
// If a native version of Natural sort is available, then use that, otherwise
// use the generic version.
inline int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2)
{
#if defined( __WINDOWS__ )
return StrCmpLogicalW(s1.wc_str(), s2.wc_str());
#else
return wxCmpNaturalGeneric(s1, s2);
#endif // #if defined( __WINDOWS__ )
}

0 comments on commit 2289f8b

Please sign in to comment.