v4.2-0003-Feedback-Changes.patch
text/x-patch
Filename: v4.2-0003-Feedback-Changes.patch
Type: text/x-patch
Part: 2
From 128574f80963c5b532c8aa7e7fad84a7e6e20874 Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavuz81@gmail.com>
Date: Tue, 9 Dec 2025 15:32:10 +0300
Subject: [PATCH v4.2 3/3] Feedback / Changes
---
src/include/commands/copyfrom_internal.h | 9 +--
src/backend/commands/copyfrom.c | 1 +
src/backend/commands/copyfromparse.c | 92 +++++++++++++++---------
3 files changed, 65 insertions(+), 37 deletions(-)
diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h
index 215215f909f..397720bf875 100644
--- a/src/include/commands/copyfrom_internal.h
+++ b/src/include/commands/copyfrom_internal.h
@@ -183,12 +183,13 @@ typedef struct CopyFromStateData
uint64 bytes_processed; /* number of bytes processed so far */
/* the amount of bytes to read until checking if we should try simd */
-#define BYTES_PROCESSED_UNTIL_SIMD_CHECK 100000
- /* the number of special chars read below which we use simd */
-#define SPECIAL_CHAR_SIMD_THRESHOLD 20000
+#define CHARS_PROCESSED_UNTIL_SIMD_CHECK 100000
+ /* the ratio of special chars read below which we use simd */
+#define SPECIAL_CHAR_SIMD_RATIO 4
+ uint64 chars_processed;
uint64 special_chars_encountered; /* number of special chars
* encountered so far */
- bool checked_simd; /* we read BYTES_PROCESSED_UNTIL_SIMD_CHECK
+ bool checked_simd; /* we read CHARS_PROCESSED_UNTIL_SIMD_CHECK
* and checked if we should use SIMD on the
* rest of the file */
bool use_simd; /* use simd to speed up copying */
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 6711c0cfcdd..2b77ba2556c 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1720,6 +1720,7 @@ BeginCopyFrom(ParseState *pstate,
cstate->cur_attname = NULL;
cstate->cur_attval = NULL;
cstate->relname_only = false;
+ cstate->chars_processed = 0;
cstate->special_chars_encountered = 0;
cstate->checked_simd = false;
cstate->use_simd = false;
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index d548674c8ff..720222152da 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -143,7 +143,7 @@ static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
/* non-export function prototypes */
static bool CopyReadLine(CopyFromState cstate, bool is_csv);
-static bool CopyReadLineText(CopyFromState cstate, bool is_csv);
+static pg_attribute_always_inline bool CopyReadLineText(CopyFromState cstate, bool is_csv, bool use_simd);
static int CopyReadAttributesText(CopyFromState cstate);
static int CopyReadAttributesCSV(CopyFromState cstate);
static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
@@ -1173,8 +1173,40 @@ CopyReadLine(CopyFromState cstate, bool is_csv)
resetStringInfo(&cstate->line_buf);
cstate->line_buf_valid = false;
- /* Parse data and transfer into line_buf */
- result = CopyReadLineText(cstate, is_csv);
+#ifndef USE_NO_SIMD
+
+ /*
+ * Wait until we have read more than CHARS_PROCESSED_UNTIL_SIMD_CHECK.
+ * cstate->bytes_processed will grow an unpredictable amount with each
+ * call to this function, so just wait until we have crossed the
+ * threshold.
+ */
+ if (!cstate->checked_simd && cstate->chars_processed > CHARS_PROCESSED_UNTIL_SIMD_CHECK)
+ {
+ cstate->checked_simd = true;
+
+ /*
+ * If we have not read too many special characters then start using
+ * SIMD to speed up processing. This heuristic assumes that input does
+ * not vary too much from line to line and that number of special
+ * characters encountered in the first
+ * CHARS_PROCESSED_UNTIL_SIMD_CHECK are indicitive of the whole file.
+ */
+ if (cstate->chars_processed / SPECIAL_CHAR_SIMD_RATIO >= cstate->special_chars_encountered)
+ {
+ cstate->use_simd = true;
+ }
+ }
+#endif
+
+ /*
+ * Parse data and transfer into line_buf. To get benefit from inlining,
+ * call CopyReadLineText() with the constant boolean variables.
+ */
+ if (cstate->use_simd)
+ result = CopyReadLineText(cstate, is_csv, true);
+ else
+ result = CopyReadLineText(cstate, is_csv, false);
if (result)
{
@@ -1241,11 +1273,12 @@ CopyReadLine(CopyFromState cstate, bool is_csv)
/*
* CopyReadLineText - inner loop of CopyReadLine for text mode
*/
-static bool
-CopyReadLineText(CopyFromState cstate, bool is_csv)
+static pg_attribute_always_inline bool
+CopyReadLineText(CopyFromState cstate, bool is_csv, bool use_simd)
{
char *copy_input_buf;
int input_buf_ptr;
+ int start_input_buf_ptr;
int copy_buf_len;
bool need_data = false;
bool hit_eof = false;
@@ -1309,6 +1342,7 @@ CopyReadLineText(CopyFromState cstate, bool is_csv)
input_buf_ptr = cstate->input_buf_index;
copy_buf_len = cstate->input_buf_len;
+ start_input_buf_ptr = input_buf_ptr;
for (;;)
{
int prev_raw_ptr;
@@ -1327,9 +1361,11 @@ CopyReadLineText(CopyFromState cstate, bool is_csv)
REFILL_LINEBUF;
CopyLoadInputBuf(cstate);
+ cstate->chars_processed += (input_buf_ptr - start_input_buf_ptr);
/* update our local variables */
hit_eof = cstate->input_reached_eof;
input_buf_ptr = cstate->input_buf_index;
+ start_input_buf_ptr = input_buf_ptr;
copy_buf_len = cstate->input_buf_len;
/*
@@ -1346,28 +1382,6 @@ CopyReadLineText(CopyFromState cstate, bool is_csv)
#ifndef USE_NO_SIMD
- /*
- * Wait until we have read more than BYTES_PROCESSED_UNTIL_SIMD_CHECK.
- * cstate->bytes_processed will grow an unpredictable amount with each
- * call to this function, so just wait until we have crossed the
- * threshold.
- */
- if (!cstate->checked_simd && cstate->bytes_processed > BYTES_PROCESSED_UNTIL_SIMD_CHECK)
- {
- cstate->checked_simd = true;
-
- /*
- * If we have not read too many special characters
- * (SPECIAL_CHAR_SIMD_THRESHOLD) then start using SIMD to speed up
- * processing. This heuristic assumes that input does not vary too
- * much from line to line and that number of special characters
- * encountered in the first BYTES_PROCESSED_UNTIL_SIMD_CHECK are
- * indicitive of the whole file.
- */
- if (cstate->special_chars_encountered < SPECIAL_CHAR_SIMD_THRESHOLD)
- cstate->use_simd = true;
- }
-
/*
* Use SIMD instructions to efficiently scan the input buffer for
* special characters (e.g., newline, carriage return, quote, and
@@ -1380,7 +1394,7 @@ CopyReadLineText(CopyFromState cstate, bool is_csv)
* sequentially. - The remaining buffer is smaller than one vector
* width (sizeof(Vector8)); SIMD operates on fixed-size chunks.
*/
- if (cstate->use_simd && !last_was_esc && copy_buf_len - input_buf_ptr >= sizeof(Vector8))
+ if (use_simd && !last_was_esc && copy_buf_len - input_buf_ptr >= sizeof(Vector8))
{
Vector8 chunk;
Vector8 match = vector8_broadcast(0);
@@ -1427,6 +1441,21 @@ CopyReadLineText(CopyFromState cstate, bool is_csv)
prev_raw_ptr = input_buf_ptr;
c = copy_input_buf[input_buf_ptr++];
+ /* Use this calculation decide whether to use SIMD later */
+ if (!use_simd && unlikely(!cstate->checked_simd))
+ {
+ if (is_csv)
+ {
+ if (c == '\r' || c == '\n' || c == quotec || c == escapec)
+ cstate->special_chars_encountered++;
+ }
+ else
+ {
+ if (c == '\r' || c == '\n' || c == '\\')
+ cstate->special_chars_encountered++;
+ }
+ }
+
if (is_csv)
{
/*
@@ -1437,7 +1466,6 @@ CopyReadLineText(CopyFromState cstate, bool is_csv)
*/
if (c == '\r')
{
- cstate->special_chars_encountered++;
IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
}
@@ -1469,7 +1497,6 @@ CopyReadLineText(CopyFromState cstate, bool is_csv)
/* Process \r */
if (c == '\r' && (!is_csv || !in_quote))
{
- cstate->special_chars_encountered++;
/* Check for \r\n on first line, _and_ handle \r\n. */
if (cstate->eol_type == EOL_UNKNOWN ||
cstate->eol_type == EOL_CRNL)
@@ -1526,7 +1553,6 @@ CopyReadLineText(CopyFromState cstate, bool is_csv)
/* Process \n */
if (c == '\n' && (!is_csv || !in_quote))
{
- cstate->special_chars_encountered++;
if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
@@ -1549,8 +1575,6 @@ CopyReadLineText(CopyFromState cstate, bool is_csv)
{
char c2;
- cstate->special_chars_encountered++;
-
IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
IF_NEED_REFILL_AND_EOF_BREAK(0);
@@ -1635,6 +1659,8 @@ CopyReadLineText(CopyFromState cstate, bool is_csv)
*/
REFILL_LINEBUF;
+ cstate->chars_processed += (input_buf_ptr - start_input_buf_ptr);
+
return result;
}
--
2.51.0