fast_makesign-tbl.patch
text/x-diff
Filename: fast_makesign-tbl.patch
Type: text/x-diff
Part: 0
*** a/contrib/pg_trgm/trgm_gist.c
--- b/contrib/pg_trgm/trgm_gist.c
***************
*** 82,88 **** gtrgm_out(PG_FUNCTION_ARGS)
}
static void
! makesign(BITVECP sign, TRGM *a)
{
int4 k,
len = ARRNELEM(a);
--- 82,88 ----
}
static void
! origmakesign(BITVECP sign, TRGM *a)
{
int4 k,
len = ARRNELEM(a);
***************
*** 98,103 **** makesign(BITVECP sign, TRGM *a)
--- 98,316 ----
}
}
+ /*
+ * Lookup tables for setting nth bit in a 128-bit bit array, where the
+ * bit array is stored in two 64-bit integers. That representation is used
+ * inside makesign(), elsewhere the signature is stored in a BITVEC, ie.
+ * a byte array, in little-endian order. To be compatible with the byte
+ * array representation, the 64-bit integers are in little-endian order
+ * regardless of the native endianness of the platform.
+ */
+ #ifdef WORDS_BIGENDIAN
+ #define SET_NTH_BIT_LOOKUP \
+ 1LL<<0x38, 1LL<<0x39, 1LL<<0x3A, 1LL<<0x3B, \
+ 1LL<<0x3C, 1LL<<0x3D, 1LL<<0x3E, 1LL<<0x3F, \
+ \
+ 1LL<<0x30, 1LL<<0x31, 1LL<<0x32, 1LL<<0x33, \
+ 1LL<<0x34, 1LL<<0x35, 1LL<<0x36, 1LL<<0x37, \
+ \
+ 1LL<<0x28, 1LL<<0x29, 1LL<<0x2A, 1LL<<0x2B, \
+ 1LL<<0x2C, 1LL<<0x2D, 1LL<<0x2E, 1LL<<0x2F, \
+ \
+ 1LL<<0x20, 1LL<<0x21, 1LL<<0x22, 1LL<<0x23, \
+ 1LL<<0x24, 1LL<<0x25, 1LL<<0x26, 1LL<<0x27, \
+ \
+ 1LL<<0x18, 1LL<<0x19, 1LL<<0x1A, 1LL<<0x1B, \
+ 1LL<<0x1C, 1LL<<0x1D, 1LL<<0x1E, 1LL<<0x1F, \
+ \
+ 1LL<<0x10, 1LL<<0x11, 1LL<<0x12, 1LL<<0x13, \
+ 1LL<<0x14, 1LL<<0x15, 1LL<<0x16, 1LL<<0x17, \
+ \
+ 1LL<<0x08, 1LL<<0x09, 1LL<<0x0A, 1LL<<0x0B, \
+ 1LL<<0x0C, 1LL<<0x0D, 1LL<<0x0E, 1LL<<0x0F, \
+ \
+ 1LL<<0x00, 1LL<<0x01, 1LL<<0x02, 1LL<<0x03, \
+ 1LL<<0x04, 1LL<<0x05, 1LL<<0x06, 1LL<<0x07
+ #else
+ #define SET_NTH_BIT_LOOKUP \
+ 1LL<<0x00, 1LL<<0x01, 1LL<<0x02, 1LL<<0x03, \
+ 1LL<<0x04, 1LL<<0x05, 1LL<<0x06, 1LL<<0x07, \
+ \
+ 1LL<<0x08, 1LL<<0x09, 1LL<<0x0A, 1LL<<0x0B, \
+ 1LL<<0x0C, 1LL<<0x0D, 1LL<<0x0E, 1LL<<0x0F, \
+ \
+ 1LL<<0x10, 1LL<<0x11, 1LL<<0x12, 1LL<<0x13, \
+ 1LL<<0x14, 1LL<<0x15, 1LL<<0x16, 1LL<<0x17, \
+ \
+ 1LL<<0x18, 1LL<<0x19, 1LL<<0x1A, 1LL<<0x1B, \
+ 1LL<<0x1C, 1LL<<0x1D, 1LL<<0x1E, 1LL<<0x1F, \
+ \
+ 1LL<<0x20, 1LL<<0x21, 1LL<<0x22, 1LL<<0x23, \
+ 1LL<<0x24, 1LL<<0x25, 1LL<<0x26, 1LL<<0x27, \
+ \
+ 1LL<<0x28, 1LL<<0x29, 1LL<<0x2A, 1LL<<0x2B, \
+ 1LL<<0x2C, 1LL<<0x2D, 1LL<<0x2E, 1LL<<0x2F, \
+ \
+ 1LL<<0x30, 1LL<<0x31, 1LL<<0x32, 1LL<<0x33, \
+ 1LL<<0x34, 1LL<<0x35, 1LL<<0x36, 1LL<<0x37, \
+ \
+ 1LL<<0x38, 1LL<<0x39, 1LL<<0x3A, 1LL<<0x3B, \
+ 1LL<<0x3C, 1LL<<0x3D, 1LL<<0x3E, 1LL<<0x3F
+ #endif
+
+ /* Lookup tables for setting the low and high 64-bits of the signature. */
+ static const uint64 lowsbox[128] =
+ {
+ /* 0-63 */
+ SET_NTH_BIT_LOOKUP,
+
+ /* 64-SIGLENBIT are zeros */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+
+ static const uint64 highsbox[128] =
+ {
+ /* 0-63 are zeros. */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ /* 64-SIGLENBIT */
+ SET_NTH_BIT_LOOKUP,
+ };
+
+ /* Set the nth bit in the signature, stored in two uint64s. */
+ #define SETBIT_S(high, low, hash) \
+ do { \
+ unsigned int n = (hash); \
+ high |= highsbox[n]; \
+ low |= lowsbox[n]; \
+ } while(0)
+
+ static void
+ makesign(BITVECP sign, TRGM *a)
+ {
+ int4 len = ARRNELEM(a);
+ trgm *ptr = GETARR(a);
+ char *p;
+ char *endptr;
+ uint32 w1,
+ w2,
+ w3;
+ uint32 trg0 = 0,
+ trg1,
+ trg2,
+ trg3,
+ trg4;
+ uint32 *p32;
+
+ /*
+ * s1 and s2 contain the signature we're calcuting. Together they
+ * form one bit-array of 128 bits.
+ */
+ uint64 highsig = 0;
+ uint64 lowsig = 0;
+
+ SETBIT_S(highsig, lowsig, SIGLENBIT); /* set last unused bit */
+
+ if (len <= 0)
+ goto end;
+
+ /*----------
+ * We have to extract each trigram into a uint32, and calculate the HASH.
+ * This would be a lot easier if the trigrams were aligned on 4-byte
+ * boundaries, but they're not. The simple way would be to copy each
+ * trigram byte-by-byte, but that is quite slow, and this function is a
+ * hotspot in penalty calculations.
+ *
+ * The first trigram in the array doesn't begin at a 4-byte boundary, as
+ * the flags byte comes first; but the next one does. So we fetch the
+ * first trigram as a special case, and after that each four trigrams fall
+ * onto 4-byte words like this:
+ *
+ * w1 w2 w3
+ * AAAB BBCC CDDD
+ *
+ * As long as there's at least four trigrams left to process, we fetch
+ * the next three words and extract the trigrams from them with bit
+ * operations, per the above diagram. The last few trigrams are handled
+ * one at a time with byte-by-byte fetching.
+ *
+ * Note that this code yields different results on big-endian and
+ * little-endian machines, because the bytes of each trigram are loaded
+ * into a uint32 in memory order and left-justified. That's probably
+ * undesirable, but changing this behavior would break existing indexes.
+ *----------
+ */
+ endptr = (char *) (ptr + len);
+ p32 = (uint32 *) (((char *) ptr) - 1);
+
+ /* Fetch and extract the initial word */
+ w1 = *(p32++);
+ #ifdef WORDS_BIGENDIAN
+ trg1 = w1 << 8;
+ #else
+ trg1 = w1 >> 8;
+ #endif
+ SETBIT_S(highsig, lowsig, HASHVAL(trg1));
+
+ while ((char *) p32 <= endptr - 3 * sizeof(uint32))
+ {
+ w1 = *(p32++);
+ w2 = *(p32++);
+ w3 = *(p32++);
+
+ #ifdef WORDS_BIGENDIAN
+ trg1 = w1 & 0xFFFFFF00;
+ trg2 = (w1 << 24) | ((w2 & 0xFFFF0000) >> 8);
+ trg3 = ((w2 & 0x0000FFFF) << 16) | ((w3 & 0xFF000000) >> 16);
+ trg4 = w3 << 8;
+ #else
+ trg1 = w1 & 0x00FFFFFF;
+ trg2 = (w1 >> 24) | ((w2 & 0x0000FFFF) << 8);
+ trg3 = ((w2 & 0xFFFF0000) >> 16) | ((w3 & 0x000000FF) << 16);
+ trg4 = w3 >> 8;
+ #endif
+
+ SETBIT_S(highsig, lowsig, HASHVAL(trg1));
+ SETBIT_S(highsig, lowsig, HASHVAL(trg2));
+ SETBIT_S(highsig, lowsig, HASHVAL(trg3));
+ SETBIT_S(highsig, lowsig, HASHVAL(trg4));
+ }
+
+ /* Handle the remaining 0-3 trigrams the slow way */
+ p = (char *) p32;
+ while (p < endptr)
+ {
+ CPTRGM(((char *) &trg0), p);
+ SETBIT_S(highsig, lowsig, HASHVAL(trg0));
+ p += 3;
+ }
+
+ end:
+ memcpy((char *) sign, &lowsig, sizeof(uint64));
+ memcpy(((char *) sign) + sizeof(uint64), &highsig, sizeof(uint64));
+
+ #ifdef VERIFY_SIG
+ {
+ BITVEC osign;
+ origmakesign(osign, a);
+
+ if (memcmp(sign, osign, sizeof(BITVEC)) != 0)
+ {
+ elog(LOG, "mismatch: %lx %lx", highsig, lowsig);
+ elog(LOG, "orig: %lx %lx", ((uint64 *)osign)[0], ((uint64 *)osign)[1]);
+ }
+ }
+ #endif
+ }
+
Datum
gtrgm_compress(PG_FUNCTION_ARGS)
{