From 5b3b56c918609c6134c81b291523f21e00487f7b Mon Sep 17 00:00:00 2001 From: Chocobo1 Date: Mon, 8 Jan 2024 14:47:00 +0800 Subject: [PATCH] Improve natural sort algorithm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Use proper case folding function instead of `toLower()`. 2. Use locale aware comparison instead of comparing unicode code points. Now `a` comes before `A` which is the same as the result from QCollator. A nice side effect is now it properly compares locale specific characters (for example `C`, `Č`). 3. Improve testing. Now the test is runnable and stable on all platforms. PR #20208. --- src/base/utils/compare.cpp | 8 +++----- src/base/utils/compare.h | 29 ++++++++++++++-------------- test/testutilscompare.cpp | 39 ++++++++++++++++++++++++-------------- 3 files changed, 42 insertions(+), 34 deletions(-) diff --git a/src/base/utils/compare.cpp b/src/base/utils/compare.cpp index 536dd147c..aca03e1a6 100644 --- a/src/base/utils/compare.cpp +++ b/src/base/utils/compare.cpp @@ -31,7 +31,6 @@ #include #include -#ifndef QBT_USE_QCOLLATOR int Utils::Compare::naturalCompare(const QString &left, const QString &right, const Qt::CaseSensitivity caseSensitivity) { // Return value <0: `left` is smaller than `right` @@ -45,8 +44,8 @@ int Utils::Compare::naturalCompare(const QString &left, const QString &right, co if ((posL == left.size()) || (posR == right.size())) return (left.size() - right.size()); // when a shorter string is another string's prefix, shorter string place before longer string - const QChar leftChar = (caseSensitivity == Qt::CaseSensitive) ? left[posL] : left[posL].toLower(); - const QChar rightChar = (caseSensitivity == Qt::CaseSensitive) ? right[posR] : right[posR].toLower(); + const QChar leftChar = (caseSensitivity == Qt::CaseSensitive) ? left[posL] : left[posL].toCaseFolded(); + const QChar rightChar = (caseSensitivity == Qt::CaseSensitive) ? right[posR] : right[posR].toCaseFolded(); // Compare only non-digits. // Numbers should be compared as a whole // otherwise the string->int conversion can yield a wrong value @@ -89,8 +88,7 @@ int Utils::Compare::naturalCompare(const QString &left, const QString &right, co } else { - return (leftChar.unicode() - rightChar.unicode()); + return QString::localeAwareCompare(leftChar, rightChar); } } } -#endif diff --git a/src/base/utils/compare.h b/src/base/utils/compare.h index f0428a5f7..bf222fc1e 100644 --- a/src/base/utils/compare.h +++ b/src/base/utils/compare.h @@ -34,24 +34,35 @@ // for QT_FEATURE_xxx, see: https://wiki.qt.io/Qt5_Build_System#How_to #include +#ifndef QBT_USE_QCOLLATOR // macOS and Windows support 'case sensitivity' and 'numeric mode' natively // https://github.com/qt/qtbase/blob/6.0/src/corelib/CMakeLists.txt#L777-L793 // https://github.com/qt/qtbase/blob/6.0/src/corelib/text/qcollator_macx.cpp#L74-L77 // https://github.com/qt/qtbase/blob/6.0/src/corelib/text/qcollator_win.cpp#L72-L78 #if ((QT_FEATURE_icu == 1) || defined(Q_OS_MACOS) || defined(Q_OS_WIN)) -#define QBT_USE_QCOLLATOR +#define QBT_USE_QCOLLATOR 1 #include +#else +#define QBT_USE_QCOLLATOR 0 +#endif #endif class QString; namespace Utils::Compare { -#ifdef QBT_USE_QCOLLATOR + int naturalCompare(const QString &left, const QString &right, Qt::CaseSensitivity caseSensitivity); + template class NaturalCompare { public: +#if (QBT_USE_QCOLLATOR == 0) + int operator()(const QString &left, const QString &right) const + { + return naturalCompare(left, right, caseSensitivity); + } +#else NaturalCompare() { m_collator.setNumericMode(true); @@ -65,20 +76,8 @@ namespace Utils::Compare private: QCollator m_collator; - }; -#else - int naturalCompare(const QString &left, const QString &right, Qt::CaseSensitivity caseSensitivity); - - template - class NaturalCompare - { - public: - int operator()(const QString &left, const QString &right) const - { - return naturalCompare(left, right, caseSensitivity); - } - }; #endif + }; template class NaturalLessThan diff --git a/test/testutilscompare.cpp b/test/testutilscompare.cpp index 980f1895c..65edde992 100644 --- a/test/testutilscompare.cpp +++ b/test/testutilscompare.cpp @@ -26,15 +26,16 @@ * exception statement from your version. */ -#include - +#include #include #include #include "base/global.h" + +// only test qbt own implementation, not QCollator +#define QBT_USE_QCOLLATOR 0 #include "base/utils/compare.h" -#ifndef QBT_USE_QCOLLATOR // only test qbt own implementation, not QCollator namespace { enum class CompareResult @@ -59,8 +60,8 @@ namespace {u"a"_s, u""_s, CompareResult::Greater, CompareResult::Greater}, {u"a"_s, u"a"_s, CompareResult::Equal, CompareResult::Equal}, - {u"A"_s, u"a"_s, CompareResult::Equal, CompareResult::Less}, // ascii code of 'A' is smaller than 'a' - {u"a"_s, u"A"_s, CompareResult::Equal, CompareResult::Greater}, + {u"A"_s, u"a"_s, CompareResult::Equal, CompareResult::Greater}, + {u"a"_s, u"A"_s, CompareResult::Equal, CompareResult::Less}, {u"0"_s, u"0"_s, CompareResult::Equal, CompareResult::Equal}, {u"1"_s, u"0"_s, CompareResult::Greater, CompareResult::Greater}, @@ -71,17 +72,17 @@ namespace {u"😁"_s, u"😀"_s, CompareResult::Greater, CompareResult::Greater}, {u"a1"_s, u"a1"_s, CompareResult::Equal, CompareResult::Equal}, - {u"A1"_s, u"a1"_s, CompareResult::Equal, CompareResult::Less}, - {u"a1"_s, u"A1"_s, CompareResult::Equal, CompareResult::Greater}, + {u"A1"_s, u"a1"_s, CompareResult::Equal, CompareResult::Greater}, + {u"a1"_s, u"A1"_s, CompareResult::Equal, CompareResult::Less}, {u"a1"_s, u"a2"_s, CompareResult::Less, CompareResult::Less}, - {u"A1"_s, u"a2"_s, CompareResult::Less, CompareResult::Less}, - {u"a1"_s, u"A2"_s, CompareResult::Less, CompareResult::Greater}, + {u"A1"_s, u"a2"_s, CompareResult::Less, CompareResult::Greater}, + {u"a1"_s, u"A2"_s, CompareResult::Less, CompareResult::Less}, {u"A1"_s, u"A2"_s, CompareResult::Less, CompareResult::Less}, {u"abc100"_s, u"abc99"_s, CompareResult::Greater, CompareResult::Greater}, - {u"ABC100"_s, u"abc99"_s, CompareResult::Greater, CompareResult::Less}, - {u"abc100"_s, u"ABC99"_s, CompareResult::Greater, CompareResult::Greater}, + {u"ABC100"_s, u"abc99"_s, CompareResult::Greater, CompareResult::Greater}, + {u"abc100"_s, u"ABC99"_s, CompareResult::Greater, CompareResult::Less}, {u"ABC100"_s, u"ABC99"_s, CompareResult::Greater, CompareResult::Greater}, {u"100abc"_s, u"99abc"_s, CompareResult::Greater, CompareResult::Greater}, @@ -135,7 +136,6 @@ namespace } } } -#endif class TestUtilsCompare final : public QObject { @@ -145,8 +145,20 @@ class TestUtilsCompare final : public QObject public: TestUtilsCompare() = default; -#ifndef QBT_USE_QCOLLATOR // only test qbt own implementation, not QCollator private slots: + void initTestCase() const + { + // Test will fail if ran with `C` locale. This is because `C` locale compare chars by code points + // and doesn't take account of human expectations + QLocale::setDefault(QLocale::English); + } + + void cleanupTestCase() const + { + // restore global state + QLocale::setDefault(QLocale::system()); + } + void testNaturalCompareCaseInsensitive() const { const Utils::Compare::NaturalCompare cmp; @@ -178,7 +190,6 @@ private slots: for (const TestData &data : testData) testLessThan(data, cmp(data.lhs, data.rhs), data.caseSensitiveResult); } -#endif }; QTEST_APPLESS_MAIN(TestUtilsCompare)