fast_makesign_v3.patch
text/x-patch
Filename: fast_makesign_v3.patch
Type: text/x-patch
Part: 0
diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c
index b328a09f41fee50beb96a28835e15ef835222cd6..e024caceeed0d656e8569d3a8f1bd2c1b6d72821 100644
*** a/contrib/pg_trgm/trgm_gist.c
--- b/contrib/pg_trgm/trgm_gist.c
*************** gtrgm_out(PG_FUNCTION_ARGS)
*** 84,100 ****
static void
makesign(BITVECP sign, TRGM *a)
{
! int4 k,
! len = ARRNELEM(a);
trgm *ptr = GETARR(a);
! int4 tmp = 0;
MemSet((void *) sign, 0, sizeof(BITVEC));
SETBIT(sign, SIGLENBIT); /* set last unused bit */
! for (k = 0; k < len; k++)
{
! CPTRGM(((char *) &tmp), ptr + k);
! HASH(sign, tmp);
}
}
--- 84,182 ----
static void
makesign(BITVECP sign, TRGM *a)
{
! int4 len = ARRNELEM(a);
trgm *ptr = GETARR(a);
! char *p;
! char *endptr;
! uint32 w1,
! w2,
! w3;
! uint32 trg0 = 0,
! trg1,
! trg2,
! trg3,
! trg4;
! uint32 *p32;
MemSet((void *) sign, 0, sizeof(BITVEC));
SETBIT(sign, SIGLENBIT); /* set last unused bit */
!
! if (len <= 0)
! return;
!
! /*----------
! * We have to extract each trigram into a uint32, and calculate the HASH.
! * This would be a lot easier if the trigrams were aligned on 4-byte
! * boundaries, but they're not. The simple way would be to copy each
! * trigram byte-by-byte, but that is quite slow, and this function is a
! * hotspot in penalty calculations.
! *
! * The first trigram in the array might not begin at a 4-byte boundary,
! * because the TRGM header is of odd length, and anyway we'd prefer not
! * to hard-wire an assumption about header size here. So we fetch
! * trigrams byte-by-byte until we reach one that does start on a 4-byte
! * boundary. After that, each four trigrams fall onto 4-byte words like
! * this:
! *
! * w1 w2 w3
! * AAAB BBCC CDDD
! *
! * As long as there's at least four trigrams left to process, we fetch
! * the next three words and extract the trigrams from them with bit
! * operations, per the above diagram. The last few trigrams are handled
! * one at a time with byte-by-byte fetching.
! *
! * Note that this code yields different results on big-endian and
! * little-endian machines, because the bytes of each trigram are loaded
! * into a uint32 in memory order and left-justified. That's probably
! * undesirable, but changing this behavior would break existing indexes.
! *----------
! */
! p = (char *) ptr;
! endptr = (char *) (ptr + len);
!
! /* Handle trigrams the slow way until we reach a uint32 boundary */
! while (p < endptr &&
! ((intptr_t) p % sizeof(uint32)) != 0)
{
! CPTRGM(((char *) &trg0), p);
! HASH(sign, trg0);
! p += 3;
! }
!
! /* Now handle trigrams in groups of four */
! p32 = (uint32 *) p;
! while ((char *) p32 <= endptr - 3 * sizeof(uint32))
! {
! w1 = *(p32++);
! w2 = *(p32++);
! w3 = *(p32++);
!
! #ifdef WORDS_BIGENDIAN
! trg1 = w1 & 0xFFFFFF00;
! trg2 = (w1 << 24) | ((w2 & 0xFFFF0000) >> 8);
! trg3 = ((w2 & 0x0000FFFF) << 16) | ((w3 & 0xFF000000) >> 16);
! trg4 = w3 << 8;
! #else
! trg1 = w1 & 0x00FFFFFF;
! trg2 = (w1 >> 24) | ((w2 & 0x0000FFFF) << 8);
! trg3 = ((w2 & 0xFFFF0000) >> 16) | ((w3 & 0x000000FF) << 16);
! trg4 = w3 >> 8;
! #endif
!
! HASH(sign, trg1);
! HASH(sign, trg2);
! HASH(sign, trg3);
! HASH(sign, trg4);
! }
!
! /* Handle the remaining 0-3 trigrams the slow way */
! p = (char *) p32;
! while (p < endptr)
! {
! CPTRGM(((char *) &trg0), p);
! HASH(sign, trg0);
! p += 3;
}
}