fast_makesign_v2.patch
text/x-patch
Filename: fast_makesign_v2.patch
Type: text/x-patch
Part: 0
diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c
index b328a09f41fee50beb96a28835e15ef835222cd6..7cea6d68fdcde71e0fb033682d77c21978c3406b 100644
*** a/contrib/pg_trgm/trgm_gist.c
--- b/contrib/pg_trgm/trgm_gist.c
*************** gtrgm_out(PG_FUNCTION_ARGS)
*** 84,100 ****
static void
makesign(BITVECP sign, TRGM *a)
{
! int4 k,
! len = ARRNELEM(a);
trgm *ptr = GETARR(a);
! int4 tmp = 0;
MemSet((void *) sign, 0, sizeof(BITVEC));
SETBIT(sign, SIGLENBIT); /* set last unused bit */
! for (k = 0; k < len; k++)
{
! CPTRGM(((char *) &tmp), ptr + k);
! HASH(sign, tmp);
}
}
--- 84,178 ----
static void
makesign(BITVECP sign, TRGM *a)
{
! int4 len = ARRNELEM(a);
trgm *ptr = GETARR(a);
! char *p;
! char *endptr;
! uint32 w1,
! w2,
! w3;
! uint32 trg0 = 0,
! trg1,
! trg2,
! trg3,
! trg4;
! uint32 *p32;
MemSet((void *) sign, 0, sizeof(BITVEC));
SETBIT(sign, SIGLENBIT); /* set last unused bit */
!
! if (len <= 0)
! return;
!
! /*----------
! * We have to extract each trigram into a uint32, and calculate the HASH.
! * This would be a lot easier if the trigrams were aligned on 4-byte
! * boundaries, but they're not. The simple way would be to copy each
! * trigram byte-by-byte, but that is quite slow, and this function is a
! * hotspot in penalty calculations.
! *
! * The first trigram in the array doesn't begin at a 4-byte boundary, as
! * the flags byte comes first; but the next one does. So we fetch the
! * first trigram as a special case, and after that each four trigrams fall
! * onto 4-byte words like this:
! *
! * w1 w2 w3
! * AAAB BBCC CDDD
! *
! * As long as there's at least four trigrams left to process, we fetch
! * the next three words and extract the trigrams from them with bit
! * operations, per the above diagram. The last few trigrams are handled
! * one at a time with byte-by-byte fetching.
! *
! * Note that this code yields different results on big-endian and
! * little-endian machines, because the bytes of each trigram are loaded
! * into a uint32 in memory order and left-justified. That's probably
! * undesirable, but changing this behavior would break existing indexes.
! *----------
! */
! endptr = (char *) (ptr + len);
! p32 = (uint32 *) (((char *) ptr) - 1);
!
! /* Fetch and extract the initial word */
! w1 = *(p32++);
! #ifdef WORDS_BIGENDIAN
! trg1 = w1 << 8;
! #else
! trg1 = w1 >> 8;
! #endif
! HASH(sign, trg1);
!
! while ((char *) p32 <= endptr - 3 * sizeof(uint32))
{
! w1 = *(p32++);
! w2 = *(p32++);
! w3 = *(p32++);
!
! #ifdef WORDS_BIGENDIAN
! trg1 = w1 & 0xFFFFFF00;
! trg2 = (w1 << 24) | ((w2 & 0xFFFF0000) >> 8);
! trg3 = ((w2 & 0x0000FFFF) << 16) | ((w3 & 0xFF000000) >> 16);
! trg4 = w3 << 8;
! #else
! trg1 = w1 & 0x00FFFFFF;
! trg2 = (w1 >> 24) | ((w2 & 0x0000FFFF) << 8);
! trg3 = ((w2 & 0xFFFF0000) >> 16) | ((w3 & 0x000000FF) << 16);
! trg4 = w3 >> 8;
! #endif
!
! HASH(sign, trg1);
! HASH(sign, trg2);
! HASH(sign, trg3);
! HASH(sign, trg4);
! }
!
! /* Handle the remaining 0-3 trigrams the slow way */
! p = (char *) p32;
! while (p < endptr)
! {
! CPTRGM(((char *) &trg0), p);
! HASH(sign, trg0);
! p += 3;
}
}