v5-0002-JCN-changes.patch
application/octet-stream
Filename: v5-0002-JCN-changes.patch
Type: application/octet-stream
Part: 0
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v5-0002
Subject: JCN changes
| File | + | − |
|---|---|---|
| src/backend/utils/mb/Unicode/UCS_to_GB18030.pl | 11 | 27 |
From 0a2ab84b481acc81b974e6049c03fe3dea56e728 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Mon, 8 Sep 2025 15:29:01 +0700
Subject: [PATCH v5 2/3] JCN changes
---
.../utils/mb/Unicode/UCS_to_GB18030.pl | 38 ++++++-------------
1 file changed, 11 insertions(+), 27 deletions(-)
diff --git a/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl b/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
index 658e0d59e2c..084fdf66af1 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
@@ -4,17 +4,15 @@
#
# src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
#
-
# Generate UTF-8 <--> GB18030 code conversion tables from
-# "gb-18030-2000.ucm", a Unicode Character Mapping file (UCM) from ICU,
-# obtained from https://github.com/unicode-org/icu-data/blob/d9d3a6ed27bb98a7106763e940258f0be8cd995b/charset/data/ucm/gb-18030-2000.ucm
+# "gb-18030-2000.ucm", obtained from
+# https://github.com/unicode-org/icu-data/tree/main/charset/data/ucm
#
# The lines we care about in the source file look like
# <UXXXX> \xYY[\xYY...] |n
-# where <UXXXX> is the Unicode code point in hex,
+# where XXXX is the Unicode code point in hex,
# and the \xYY... is the hex byte sequence for GB18030,
# and n is a flag indicating the type of mapping.
-#
use strict;
use warnings FATAL => 'all';
@@ -30,37 +28,23 @@ my $in_file = "gb-18030-2000.ucm";
open(my $in, '<', $in_file) || die("cannot open $in_file");
my @mapping;
-my $in_charmap = 0;
while (<$in>)
{
- chomp;
- # Enter CHARMAP section
- if (/^CHARMAP/) {
- $in_charmap = 1;
- next;
- }
- # Exit CHARMAP section
- if (/^END CHARMAP/) {
- $in_charmap = 0;
- last;
- }
- next unless $in_charmap;
- # Skip comments and empty lines
- next if /^#/ || /^$/;
+ # Mappings may have been removed by commenting out
+ next if /^#/;
- # Match lines like: <UXXXX> \xYY[\xYY...] |n
- next if !/^<U([0-9A-Fa-f]+)>\s+((?:\\x[0-9A-Fa-f]{2})+)\s*\|(\d+)/;
+ next if !/^<U([0-9A-Fa-f]+)>\s+
+ ((?:\\x[0-9A-Fa-f]{2})+)\s+
+ \|(\d+)/x;
my ($u, $c, $flag) = ($1, $2, $3);
+ $c =~ s/\\x//g;
- # flag 0 means round-trip mapping, we only care about that
+ # We only want round-trip mappings
next if ($flag ne '0');
my $ucs = hex($u);
- # Remove \x and concatenate bytes
- my $c_hex = $c;
- $c_hex =~ s/\\x//g;
- my $code = hex($c_hex);
+ my $code = hex($c);
if ($code >= 0x80 && $ucs >= 0x0080)
{
push @mapping,
--
2.39.5 (Apple Git-154)