v12-0007-fuzzystrmatch-use-pg_ascii_toupper.patch
text/x-patch
Filename: v12-0007-fuzzystrmatch-use-pg_ascii_toupper.patch
Type: text/x-patch
Part: 6
From 8bea39a2780283d4afdd75e0eb4a01b50d524faf Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Wed, 19 Nov 2025 13:24:38 -0800
Subject: [PATCH v12 7/8] fuzzystrmatch: use pg_ascii_toupper().
fuzzystrmatch is designed for ASCII, so no need to rely on the global
LC_CTYPE setting.
TODO: what about \xc7 case? Also, what should the behavior be for
soundex()?
Discussion: https://postgr.es/m/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com
---
contrib/fuzzystrmatch/dmetaphone.c | 2 +-
contrib/fuzzystrmatch/fuzzystrmatch.c | 43 +++++++++++++++------------
2 files changed, 25 insertions(+), 20 deletions(-)
diff --git a/contrib/fuzzystrmatch/dmetaphone.c b/contrib/fuzzystrmatch/dmetaphone.c
index 227d8b11ddc..5e8ee2b0354 100644
--- a/contrib/fuzzystrmatch/dmetaphone.c
+++ b/contrib/fuzzystrmatch/dmetaphone.c
@@ -284,7 +284,7 @@ MakeUpper(metastring *s)
char *i;
for (i = s->str; *i; i++)
- *i = toupper((unsigned char) *i);
+ *i = pg_ascii_toupper((unsigned char) *i);
}
diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.c b/contrib/fuzzystrmatch/fuzzystrmatch.c
index e7cc314b763..319302af0e4 100644
--- a/contrib/fuzzystrmatch/fuzzystrmatch.c
+++ b/contrib/fuzzystrmatch/fuzzystrmatch.c
@@ -62,7 +62,7 @@ static const char *const soundex_table = "01230120022455012623010202";
static char
soundex_code(char letter)
{
- letter = toupper((unsigned char) letter);
+ letter = pg_ascii_toupper((unsigned char) letter);
/* Defend against non-ASCII letters */
if (letter >= 'A' && letter <= 'Z')
return soundex_table[letter - 'A'];
@@ -122,16 +122,21 @@ static const char _codes[26] = {
static int
getcode(char c)
{
- if (isalpha((unsigned char) c))
- {
- c = toupper((unsigned char) c);
- /* Defend against non-ASCII letters */
- if (c >= 'A' && c <= 'Z')
- return _codes[c - 'A'];
- }
+ c = pg_ascii_toupper((unsigned char) c);
+ /* Defend against non-ASCII letters */
+ if (c >= 'A' && c <= 'Z')
+ return _codes[c - 'A'];
+
return 0;
}
+static bool
+ascii_isalpha(char c)
+{
+ return (c >= 'A' && c <= 'Z') ||
+ (c >= 'a' && c <= 'z');
+}
+
#define isvowel(c) (getcode(c) & 1) /* AEIOU */
/* These letters are passed through unchanged */
@@ -301,18 +306,18 @@ metaphone(PG_FUNCTION_ARGS)
* accessing the array directly... */
/* Look at the next letter in the word */
-#define Next_Letter (toupper((unsigned char) word[w_idx+1]))
+#define Next_Letter (pg_ascii_toupper((unsigned char) word[w_idx+1]))
/* Look at the current letter in the word */
-#define Curr_Letter (toupper((unsigned char) word[w_idx]))
+#define Curr_Letter (pg_ascii_toupper((unsigned char) word[w_idx]))
/* Go N letters back. */
#define Look_Back_Letter(n) \
- (w_idx >= (n) ? toupper((unsigned char) word[w_idx-(n)]) : '\0')
+ (w_idx >= (n) ? pg_ascii_toupper((unsigned char) word[w_idx-(n)]) : '\0')
/* Previous letter. I dunno, should this return null on failure? */
#define Prev_Letter (Look_Back_Letter(1))
/* Look two letters down. It makes sure you don't walk off the string. */
#define After_Next_Letter \
- (Next_Letter != '\0' ? toupper((unsigned char) word[w_idx+2]) : '\0')
-#define Look_Ahead_Letter(n) toupper((unsigned char) Lookahead(word+w_idx, n))
+ (Next_Letter != '\0' ? pg_ascii_toupper((unsigned char) word[w_idx+2]) : '\0')
+#define Look_Ahead_Letter(n) pg_ascii_toupper((unsigned char) Lookahead(word+w_idx, n))
/* Allows us to safely look ahead an arbitrary # of letters */
@@ -340,7 +345,7 @@ Lookahead(char *word, int how_far)
#define Phone_Len (p_idx)
/* Note is a letter is a 'break' in the word */
-#define Isbreak(c) (!isalpha((unsigned char) (c)))
+#define Isbreak(c) (!ascii_isalpha((unsigned char) (c)))
static void
@@ -379,7 +384,7 @@ _metaphone(char *word, /* IN */
/*-- The first phoneme has to be processed specially. --*/
/* Find our first letter */
- for (; !isalpha((unsigned char) (Curr_Letter)); w_idx++)
+ for (; !ascii_isalpha((unsigned char) (Curr_Letter)); w_idx++)
{
/* On the off chance we were given nothing but crap... */
if (Curr_Letter == '\0')
@@ -478,7 +483,7 @@ _metaphone(char *word, /* IN */
*/
/* Ignore non-alphas */
- if (!isalpha((unsigned char) (Curr_Letter)))
+ if (!ascii_isalpha((unsigned char) (Curr_Letter)))
continue;
/* Drop duplicates, except CC */
@@ -731,7 +736,7 @@ _soundex(const char *instr, char *outstr)
Assert(outstr);
/* Skip leading non-alphabetic characters */
- while (*instr && !isalpha((unsigned char) *instr))
+ while (*instr && !ascii_isalpha((unsigned char) *instr))
++instr;
/* If no string left, return all-zeroes buffer */
@@ -742,12 +747,12 @@ _soundex(const char *instr, char *outstr)
}
/* Take the first letter as is */
- *outstr++ = (char) toupper((unsigned char) *instr++);
+ *outstr++ = (char) pg_ascii_toupper((unsigned char) *instr++);
count = 1;
while (*instr && count < SOUNDEX_LEN)
{
- if (isalpha((unsigned char) *instr) &&
+ if (ascii_isalpha((unsigned char) *instr) &&
soundex_code(*instr) != soundex_code(*(instr - 1)))
{
*outstr = soundex_code(*instr);
--
2.43.0