v1-0001-Use-optimized-versions-of-ICU-case-conversion-for.patch
text/x-patch
Filename: v1-0001-Use-optimized-versions-of-ICU-case-conversion-for.patch
Type: text/x-patch
Part: 0
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v1-0001
Subject: Use optimized versions of ICU case conversion for UTF-8
| File | + | − |
|---|---|---|
| src/backend/utils/adt/pg_locale_icu.c | 114 | 47 |
From 5a355ef083cc7de92ae1e5dcc0198866a07919eb Mon Sep 17 00:00:00 2001
From: Andreas Karlsson <andreas@proxel.se>
Date: Tue, 17 Dec 2024 22:47:00 +0100
Subject: [PATCH v1 1/2] Use optimized versions of ICU case conversion for
UTF-8
Instead of converting to and from UChar when doing case conversions we
use the UTF-8 versions of the functions. This can give a signficant
speedup, 15-20%, on short to medium length strings.
---
src/backend/utils/adt/pg_locale_icu.c | 161 ++++++++++++++++++--------
1 file changed, 114 insertions(+), 47 deletions(-)
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index f0a77a767e7..eea6f48f6c3 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -12,6 +12,7 @@
#include "postgres.h"
#ifdef USE_ICU
+#include "unicode/ucasemap.h"
#include <unicode/ucnv.h>
#include <unicode/ustring.h>
@@ -100,9 +101,9 @@ static size_t icu_from_uchar(char *dest, size_t destsize,
const UChar *buff_uchar, int32_t len_uchar);
static void icu_set_collation_attributes(UCollator *collator, const char *loc,
UErrorCode *status);
-static int32_t icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
- UChar **buff_dest, UChar *buff_source,
- int32_t len_source);
+static int32_t icu_convert_case_uchar(ICU_Convert_Func func, pg_locale_t mylocale,
+ UChar **buff_dest, UChar *buff_source,
+ int32_t len_source);
static int32_t u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
@@ -350,60 +351,126 @@ size_t
strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- int32_t len_uchar;
- int32_t len_conv;
- UChar *buff_uchar;
- UChar *buff_conv;
- size_t result_len;
-
- len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
- len_conv = icu_convert_case(u_strToLower, locale,
- &buff_conv, buff_uchar, len_uchar);
- result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
- pfree(buff_uchar);
- pfree(buff_conv);
-
- return result_len;
+ if (GetDatabaseEncoding() == PG_UTF8)
+ {
+ UErrorCode status = U_ZERO_ERROR;
+ UCaseMap *casemap;
+ int32_t needed;
+
+ casemap = ucasemap_open(locale->info.icu.locale, U_FOLD_CASE_DEFAULT, &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("casemap lookup failed: %s", u_errorName(status))));
+
+ status = U_ZERO_ERROR;
+ needed = ucasemap_utf8ToLower(casemap, dest, destsize, src, srclen, &status);
+ ucasemap_close(casemap);
+ if (status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("case conversion failed: %s", u_errorName(status))));
+ return needed;
+ }
+ else
+ {
+ int32_t len_uchar;
+ int32_t len_conv;
+ UChar *buff_uchar;
+ UChar *buff_conv;
+ size_t result_len;
+
+ len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
+ len_conv = icu_convert_case_uchar(u_strToLower, locale, &buff_conv,
+ buff_uchar, len_uchar);
+ result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
+ pfree(buff_uchar);
+ pfree(buff_conv);
+
+ return result_len;
+ }
}
size_t
strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- int32_t len_uchar;
- int32_t len_conv;
- UChar *buff_uchar;
- UChar *buff_conv;
- size_t result_len;
-
- len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
- len_conv = icu_convert_case(u_strToTitle_default_BI, locale,
- &buff_conv, buff_uchar, len_uchar);
- result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
- pfree(buff_uchar);
- pfree(buff_conv);
-
- return result_len;
+ if (GetDatabaseEncoding() == PG_UTF8)
+ {
+ UErrorCode status = U_ZERO_ERROR;
+ UCaseMap *casemap;
+ int32_t needed;
+
+ casemap = ucasemap_open(locale->info.icu.locale, U_FOLD_CASE_DEFAULT, &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("casemap lookup failed: %s", u_errorName(status))));
+
+ status = U_ZERO_ERROR;
+ needed = ucasemap_utf8ToTitle(casemap, dest, destsize, src, srclen, &status);
+ ucasemap_close(casemap);
+ if (status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("case conversion failed: %s", u_errorName(status))));
+ return needed;
+ }
+ else
+ {
+ int32_t len_uchar;
+ int32_t len_conv;
+ UChar *buff_uchar;
+ UChar *buff_conv;
+ size_t result_len;
+
+ len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
+ len_conv = icu_convert_case_uchar(u_strToTitle_default_BI, locale, &buff_conv,
+ buff_uchar, len_uchar);
+ result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
+ pfree(buff_uchar);
+ pfree(buff_conv);
+
+ return result_len;
+ }
}
size_t
strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- int32_t len_uchar;
- int32_t len_conv;
- UChar *buff_uchar;
- UChar *buff_conv;
- size_t result_len;
-
- len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
- len_conv = icu_convert_case(u_strToUpper, locale,
- &buff_conv, buff_uchar, len_uchar);
- result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
- pfree(buff_uchar);
- pfree(buff_conv);
-
- return result_len;
+ if (GetDatabaseEncoding() == PG_UTF8)
+ {
+ UErrorCode status = U_ZERO_ERROR;
+ UCaseMap *casemap;
+ int32_t needed;
+
+ casemap = ucasemap_open(locale->info.icu.locale, U_FOLD_CASE_DEFAULT, &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("casemap lookup failed: %s", u_errorName(status))));
+
+ status = U_ZERO_ERROR;
+ needed = ucasemap_utf8ToUpper(casemap, dest, destsize, src, srclen, &status);
+ ucasemap_close(casemap);
+ if (status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("case conversion failed: %s", u_errorName(status))));
+ return needed;
+ }
+ else
+ {
+ int32_t len_uchar;
+ int32_t len_conv;
+ UChar *buff_uchar;
+ UChar *buff_conv;
+ size_t result_len;
+
+ len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
+ len_conv = icu_convert_case_uchar(u_strToUpper, locale, &buff_conv,
+ buff_uchar, len_uchar);
+ result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
+ pfree(buff_uchar);
+ pfree(buff_conv);
+
+ return result_len;
+ }
}
/*
@@ -599,8 +666,8 @@ icu_from_uchar(char *dest, size_t destsize, const UChar *buff_uchar, int32_t len
}
static int32_t
-icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
- UChar **buff_dest, UChar *buff_source, int32_t len_source)
+icu_convert_case_uchar(ICU_Convert_Func func, pg_locale_t mylocale,
+ UChar **buff_dest, UChar *buff_source, int32_t len_source)
{
UErrorCode status;
int32_t len_dest;
--
2.45.2