COPY-SIMD-add-heuristic-to-avoid-regression-on-sm.txt
text/plain
Filename: COPY-SIMD-add-heuristic-to-avoid-regression-on-sm.txt
Type: text/plain
Part: 0
From aa55843b0c64bed9f72cf8cd7854df9df7ef989b Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavuz81@gmail.com>
Date: Tue, 19 Aug 2025 15:16:02 +0300
Subject: [PATCH v1] COPY SIMD: add heuristic to avoid regression on small
advances
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When SIMD advances fewer than 5 characters, performance regresses.
To mitigate this, introduce a heuristic:
- If advance < 5 -> insert a sleep penalty (n cycles).
- Each time advance < 5, n is doubled.
- Each time advance ≥ 5, n is halved.
---
src/backend/commands/copyfromparse.c | 42 ++++++++++++++++++++++++++--
1 file changed, 40 insertions(+), 2 deletions(-)
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 5aba0fa6cb7..e58d7d4e353 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -1263,6 +1263,9 @@ CopyReadLineText(CopyFromState cstate, bool is_csv)
Vector8 bs = vector8_broadcast('\\');
Vector8 quote;
Vector8 escape;
+
+ int sleep_cyle = 0;
+ int last_sleep_cyle = 1;
#endif
if (is_csv)
@@ -1359,7 +1362,7 @@ CopyReadLineText(CopyFromState cstate, bool is_csv)
* vector register, as SIMD operations require processing data in
* fixed-size chunks.
*/
- if (!in_quote && copy_buf_len - input_buf_ptr >= sizeof(Vector8))
+ if (sleep_cyle <= 0 && !in_quote && copy_buf_len - input_buf_ptr >= sizeof(Vector8))
{
Vector8 chunk;
Vector8 match;
@@ -1390,14 +1393,49 @@ CopyReadLineText(CopyFromState cstate, bool is_csv)
*/
int advance = pg_rightmost_one_pos32(mask);
input_buf_ptr += advance;
+
+ /*
+ * If we advance less than 5 characters we cause regression.
+ * Sleep a bit then try again. Sleep time increases
+ * exponentially.
+ */
+ if (advance < 5)
+ {
+ if (last_sleep_cyle >= PG_INT16_MAX / 2)
+ last_sleep_cyle = PG_INT16_MAX;
+ else
+ last_sleep_cyle = last_sleep_cyle << 1;
+
+ sleep_cyle = last_sleep_cyle;
+ }
+
+ /*
+ * If we advance more than 4 charactes this means we have
+ * performance improvement. Halve sleep time for next sleep.
+ */
+ else
+ {
+ last_sleep_cyle = Max(last_sleep_cyle >> 1, 1);
+ sleep_cyle = 0;
+ }
}
else
{
- /* No special characters found, so skip the entire chunk */
+ /*
+ * No special characters found, so skip the entire chunk and
+ * halve sleep time for next sleep.
+ */
input_buf_ptr += sizeof(Vector8);
+ last_sleep_cyle = Max(last_sleep_cyle >> 1, 1);
continue;
}
}
+
+ /*
+ * Vulnerable to overflow if we are in quote for more than INT16_MAX
+ * characters.
+ */
+ sleep_cyle--;
#endif
/* OK to fetch a character */
--
2.50.1