fast_makesign.patch
text/x-diff
Filename: fast_makesign.patch
Type: text/x-diff
Part: 0
diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c
index b328a09..1f3d3e3 100644
--- a/contrib/pg_trgm/trgm_gist.c
+++ b/contrib/pg_trgm/trgm_gist.c
@@ -84,17 +84,88 @@ gtrgm_out(PG_FUNCTION_ARGS)
static void
makesign(BITVECP sign, TRGM *a)
{
- int4 k,
- len = ARRNELEM(a);
+ int4 len = ARRNELEM(a);
trgm *ptr = GETARR(a);
- int4 tmp = 0;
+ char *p;
+ char *endptr;
+ uint32 w1,
+ w2,
+ w3;
+ uint32 trg1,
+ trg2,
+ trg3,
+ trg4;
+ uint32 *p32;
MemSet((void *) sign, 0, sizeof(BITVEC));
SETBIT(sign, SIGLENBIT); /* set last unused bit */
- for (k = 0; k < len; k++)
+
+ if (len == 0)
+ return;
+
+ endptr = (char *) (ptr + len);
+ /*------------------------------------------------------------------------
+ * We have to extract each trigram into a uint32, and calculate the HASH.
+ * This would be a lot easier if each trigram was aligned at 4-byte
+ * boundary, but they're not. The simple way would be to copy each
+ * trigram byte-per-byte, but that is quite slow, and this function is a
+ * hotspot in penalty calculation.
+ *
+ * The first trigram in the array doesn't begin at a 4-byte boundary, the
+ * flags byte comes first, but the next one does. So we fetch the first
+ * trigram as a special case, and after that the following four trigrams
+ * fall onto 4-byte words like this:
+ *
+ * w1 w2 w3
+ * AAAB BBCC CDDD
+ *
+ * As long as there's at least four trigrams left to process, we fetch
+ * the next three words and extract the trigrams from them with bit
+ * operations.
+ *------------------------------------------------------------------------
+ */
+ p32 = (uint32 *) (((char *) ptr) - 1);
+
+ /* Fetch and extract the initial word */
+ w1 = *(p32++);
+#ifdef WORDS_BIGENDIAN
+ trg1 = w1 << 8;
+#else
+ trg1 = w1 >> 8;
+#endif
+ HASH(sign, trg1);
+
+ while((char *) p32 < endptr - 12)
{
- CPTRGM(((char *) &tmp), ptr + k);
- HASH(sign, tmp);
+ w1 = *(p32++);
+ w2 = *(p32++);
+ w3 = *(p32++);
+
+#ifdef WORDS_BIGENDIAN
+ trg1 = w1 & 0xFFFFFF00;
+ trg2 = (w1 << 24) | ((w2 & 0xFFFF0000) >> 8);
+ trg3 = ((w2 & 0x0000FFFF) << 16) | ((w3 & 0xFF000000) >> 16);
+ trg4 = w3 << 8;
+#else
+ trg1 = w1 & 0x00FFFFFF;
+ trg2 = (w1 >> 24) | ((w2 & 0x0000FFFF) << 8);
+ trg3 = ((w2 & 0xFFFF0000) >> 16) | ((w3 & 0x000000FF) << 16);
+ trg4 = w3 >> 8;
+#endif
+
+ HASH(sign, trg1);
+ HASH(sign, trg2);
+ HASH(sign, trg3);
+ HASH(sign, trg4);
+ }
+
+ /* Handle remaining 1-3 trigrams the slow way */
+ p = (char *) p32;
+ while (p < endptr)
+ {
+ CPTRGM(((char *) &trg1), p);
+ HASH(sign, trg1);
+ p += 3;
}
}