From c1883c874c922783d65029133d74098b805c73e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrei=20Tudor=20TOPAL=C4=82=20=28101668=29?= Date: Sun, 18 Jun 2023 16:12:12 +0300 Subject: [PATCH] ICU-22419 Performance improvements of collated string comparison --- icu4c/source/i18n/rulebasedcollator.cpp | 54 +++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/icu4c/source/i18n/rulebasedcollator.cpp b/icu4c/source/i18n/rulebasedcollator.cpp index e9482628d9b6..41bd3a74e23a 100644 --- a/icu4c/source/i18n/rulebasedcollator.cpp +++ b/icu4c/source/i18n/rulebasedcollator.cpp @@ -987,6 +987,33 @@ RuleBasedCollator::doCompare(const char16_t *left, int32_t leftLength, ++equalPrefixLength; } } else { + if ((uint32_t)leftLength >= sizeof(uint64_t) + && (uint32_t)rightLength >= sizeof(uint64_t) + && (uintptr_t)left % sizeof(uint64_t) == (uintptr_t)right % sizeof(uint64_t)) { + int32_t i = 0; + int32_t limit = leftLength < rightLength ? leftLength : rightLength; + + // Memory alignment step. + while(i < limit && left[i] == right[i] + && (uintptr_t)(left + i) % sizeof(uint64_t) != 0) { + ++i; + } + equalPrefixLength += i; + + // Double word baesd comparison. + if (i < limit && (uintptr_t)(left + i) % sizeof(uint64_t) == 0) { + const uint64_t *dWordLeft = (const uint64_t *)left; + const uint64_t *dWordRight = (const uint64_t *)right; + int32_t dWordLength = (leftLength < rightLength ? leftLength : rightLength) / sizeof(uint64_t); + i = 0; + + while(i < dWordLength && dWordLeft[i] == dWordRight[i]) { + ++i; + } + equalPrefixLength = i != 0 ? i * sizeof(uint64_t) / 2 - 1 : 0; + } + } + leftLimit = left + leftLength; rightLimit = right + rightLength; for(;;) { @@ -1102,6 +1129,33 @@ RuleBasedCollator::doCompare(const uint8_t *left, int32_t leftLength, ++equalPrefixLength; } } else { + if ((uint32_t)leftLength >= sizeof(uint64_t) + && (uint32_t)rightLength >= sizeof(uint64_t) + && (uintptr_t)left % sizeof(uint64_t) == (uintptr_t)right % sizeof(uint64_t)) { + int32_t i = 0; + int32_t limit = leftLength < rightLength ? leftLength : rightLength; + + // Memory alignment step. + while(i < limit && left[i] == right[i] + && (uintptr_t)(left + i) % sizeof(uint64_t) != 0) { + ++i; + } + equalPrefixLength += i; + + // Double word baesd comparison. + if (i < limit && (uintptr_t)(left + i) % sizeof(uint64_t) == 0) { + const uint64_t *dWordLeft = (const uint64_t *)left; + const uint64_t *dWordRight = (const uint64_t *)right; + int32_t dWordLength = (leftLength < rightLength ? leftLength : rightLength) / sizeof(uint64_t); + i = 0; + + while(i < dWordLength && dWordLeft[i] == dWordRight[i]) { + ++i; + } + equalPrefixLength = i != 0 ? i * sizeof(uint64_t) - 1 : 0; + } + } + for(;;) { if(equalPrefixLength == leftLength) { if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }