after-triggers.patch
application/octet-stream
Filename: after-triggers.patch
Type: application/octet-stream
Part: 0
Message:
Compressing the AFTER TRIGGER queue
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
new file mode 100644
index 4c31f19..4799dc2
*** a/src/backend/commands/trigger.c
--- b/src/backend/commands/trigger.c
*************** typedef SetConstraintStateData *SetConst
*** 2851,2864 ****
/*
* Per-trigger-event data
*
! * The actual per-event data, AfterTriggerEventData, includes DONE/IN_PROGRESS
! * status bits and one or two tuple CTIDs. Each event record also has an
! * associated AfterTriggerSharedData that is shared across all instances
! * of similar events within a "chunk".
! *
! * We arrange not to waste storage on ate_ctid2 for non-update events.
! * We could go further and not store either ctid for statement-level triggers,
! * but that seems unlikely to be worth the trouble.
*
* Note: ats_firing_id is initially zero and is set to something else when
* AFTER_TRIGGER_IN_PROGRESS is set. It indicates which trigger firing
--- 2851,2864 ----
/*
* Per-trigger-event data
*
! * The actual per-event data includes DONE/IN_PROGRESS status bits, an
! * encoded CTID and a reference to an associated AfterTriggerSharedData that
! * is shared across all instances of similar events within a "chunk". The
! * encoding uses between 1 and 9 bytes per event, and is optimised so that
! * the smallest amount of space is used for the types of event that are most
! * likely to occur often - events where the CTID is the same or close to the
! * last event's CTID. This happens when there are multiple triggers on a
! * table, or the table is traversed using a sequential or bitmap heap scan.
*
* Note: ats_firing_id is initially zero and is set to something else when
* AFTER_TRIGGER_IN_PROGRESS is set. It indicates which trigger firing
*************** typedef SetConstraintStateData *SetConst
*** 2869,2881 ****
* cycles. So we need only ensure that ats_firing_id is zero when attaching
* a new event to an existing AfterTriggerSharedData record.
*/
- typedef uint32 TriggerFlags;
! #define AFTER_TRIGGER_OFFSET 0x0FFFFFFF /* must be low-order
! * bits */
! #define AFTER_TRIGGER_2CTIDS 0x10000000
! #define AFTER_TRIGGER_DONE 0x20000000
! #define AFTER_TRIGGER_IN_PROGRESS 0x40000000
typedef struct AfterTriggerSharedData *AfterTriggerShared;
--- 2869,2900 ----
* cycles. So we need only ensure that ats_firing_id is zero when attaching
* a new event to an existing AfterTriggerSharedData record.
*/
! /*
! * The first (and possibly the only) byte of an event record contains flags
! * that determine how the rest of the record is encoded.
! *
! * AFTER_TRIGGER_SHARED_DATA_FLAGS (3 bits) determines how the index to the
! * AfterTriggerSharedData is encoded:
! * 0-5 => Index of AfterTriggerSharedData (counting back from the end of
! * the chunk). If the CTID is the same as the previous event,
! * this index is treated as relative to the last event's index.
! * 6 => Index is encoded in 1 additional byte.
! * 7 => Index is encoded in 2 additional bytes.
! *
! * AFTER_TRIGGER_CTID_FLAGS (3 bits) determines how the CTID is encoded:
! * 0 => CTID is same as previous event.
! * 1-6 => Difference between this event's CTID and previous CTID encoded
! * using 1-6 bytes.
! * 7 => CTID is offset by 1 relative to previous event.
! *
! * The flag byte is then followed by 0-2 bytes for the index to the shared
! * data and 0-6 bytes for the CTID.
! */
! #define AFTER_TRIGGER_DONE 0x80
! #define AFTER_TRIGGER_IN_PROGRESS 0x40
! #define AFTER_TRIGGER_SHARED_DATA_FLAGS 0x38
! #define AFTER_TRIGGER_CTID_FLAGS 0x07
typedef struct AfterTriggerSharedData *AfterTriggerShared;
*************** typedef struct AfterTriggerEventData *Af
*** 2891,2914 ****
typedef struct AfterTriggerEventData
{
! TriggerFlags ate_flags; /* status bits and offset to shared data */
! ItemPointerData ate_ctid1; /* inserted, deleted, or old updated tuple */
! ItemPointerData ate_ctid2; /* new updated tuple */
} AfterTriggerEventData;
! /* This struct must exactly match the one above except for not having ctid2 */
! typedef struct AfterTriggerEventDataOneCtid
! {
! TriggerFlags ate_flags; /* status bits and offset to shared data */
! ItemPointerData ate_ctid1; /* inserted, deleted, or old updated tuple */
! } AfterTriggerEventDataOneCtid;
! #define SizeofTriggerEvent(evt) \
! (((evt)->ate_flags & AFTER_TRIGGER_2CTIDS) ? \
! sizeof(AfterTriggerEventData) : sizeof(AfterTriggerEventDataOneCtid))
! #define GetTriggerSharedData(evt) \
! ((AfterTriggerShared) ((char *) (evt) + ((evt)->ate_flags & AFTER_TRIGGER_OFFSET)))
/*
* To avoid palloc overhead, we keep trigger events in arrays in successively-
--- 2910,2932 ----
typedef struct AfterTriggerEventData
{
! char ate_flags; /* flag byte */
! char ate_data[8]; /* variable length array (0-8 bytes) */
} AfterTriggerEventData;
! /* Macros to help extract the AfterTriggerSharedData index from an event */
! #define SharedDataFlags(evt) \
! (((evt)->ate_flags & AFTER_TRIGGER_SHARED_DATA_FLAGS) >> 3)
! #define SizeofSharedDataIndex(evt) \
! (SharedDataFlags(evt) < 6 ? 0 : (SharedDataFlags(evt) - 5))
! /* Macros to help extract the CTID from an event */
! #define CtidFlags(evt) ((evt)->ate_flags & AFTER_TRIGGER_CTID_FLAGS)
! #define SizeofCtid(evt) (CtidFlags(evt) == 7 ? 0 : CtidFlags(evt))
! /* Size of an encoded event (1-9 bytes) */
! #define SizeofTriggerEvent(evt) \
! (1 + SizeofSharedDataIndex(evt) + SizeofCtid(evt))
/*
* To avoid palloc overhead, we keep trigger events in arrays in successively-
*************** typedef struct AfterTriggerEventList
*** 2934,2939 ****
--- 2952,2959 ----
AfterTriggerEventChunk *head;
AfterTriggerEventChunk *tail;
char *tailfree; /* freeptr of tail chunk */
+ uint16 last_ats_idx; /* Shared data index of last event */
+ ItemPointerData last_ctid; /* CTID of last event */
} AfterTriggerEventList;
/* Macros to help in iterating over a list of events */
*************** typedef struct AfterTriggerEventList
*** 2947,2952 ****
--- 2967,2975 ----
#define for_each_event_chunk(eptr, cptr, evtlist) \
for_each_chunk(cptr, evtlist) for_each_event(eptr, cptr)
+ #define AfterTriggerSharedDataIdx(chunk, evtsharedptr) \
+ ((uint16) (((AfterTriggerShared) (chunk)->endptr) - evtsharedptr) - 1)
+
/*
* All per-transaction data for the AFTER TRIGGERS module.
*************** typedef AfterTriggersData *AfterTriggers
*** 3024,3034 ****
static AfterTriggers afterTriggers;
! static void AfterTriggerExecute(AfterTriggerEvent event,
! Relation rel, TriggerDesc *trigdesc,
! FmgrInfo *finfo,
! Instrumentation *instr,
! MemoryContext per_tuple_context);
static SetConstraintState SetConstraintStateCreate(int numalloc);
static SetConstraintState SetConstraintStateCopy(SetConstraintState state);
static SetConstraintState SetConstraintStateAddItem(SetConstraintState state,
--- 3047,3056 ----
static AfterTriggers afterTriggers;
! static void AfterTriggerExecute(Relation rel, TriggerDesc *trigdesc,
! FmgrInfo *finfo, Instrumentation *instr,
! MemoryContext per_tuple_context,
! TriggerEvent event, Oid tgoid, ItemPointer ctid);
static SetConstraintState SetConstraintStateCreate(int numalloc);
static SetConstraintState SetConstraintStateCopy(SetConstraintState state);
static SetConstraintState SetConstraintStateAddItem(SetConstraintState state,
*************** static SetConstraintState SetConstraintS
*** 3036,3041 ****
--- 3058,3304 ----
/* ----------
+ * encodeAfterTriggerEvent()
+ *
+ * Encode a trigger event for storage in the AfterTriggerEventList. This
+ * aims to store the most common events using a single byte, but may take
+ * up to 9 bytes in some cases.
+ *
+ * Returns the number of bytes used to encode the event.
+ * ----------
+ */
+ static inline int
+ encodeAfterTriggerEvent(AfterTriggerEvent evt,
+ bool row_trigger,
+ uint16 ats_idx,
+ ItemPointer ctid,
+ uint16 prev_ats_idx,
+ ItemPointer prev_ctid)
+ {
+ bool store_ctid = row_trigger && !ItemPointerEquals(ctid, prev_ctid);
+ int ate_data_idx = 0;
+ char ats_flags;
+ char ctid_flags;
+
+ /*
+ * Work out how to store the shared data index compactly. Try to code for
+ * the most common cases first, and have them use the minimum space.
+ *
+ * If the CTID isn't changing, then a common case is that the shared data
+ * index is increasing by a small amount (next trigger on the same row).
+ * If this 'small amount' is less than 6 it will fit in the flag byte.
+ * In most cases it will be a different trigger, so we can offset by 1,
+ * under the assumption that the index will not be the same.
+ */
+ if (!store_ctid && ats_idx > prev_ats_idx && ats_idx < prev_ats_idx + 7)
+ ats_flags = (char) (ats_idx - prev_ats_idx - 1); /* 0-5 */
+
+ /*
+ * Else if the CTID is changing, the shared data index will often reset
+ * back to a small value. If this is less than 6 it will fit in the flag
+ * byte.
+ */
+ else if (store_ctid && ats_idx < 6)
+ ats_flags = (char) ats_idx; /* 0-5 */
+
+ /*
+ * Else indexes less than 256 require one additional byte. The chunk
+ * size adapts so that typically not too many shared records are used
+ * per chunk (typically less than 200) but we can't rely on that.
+ */
+ else if (ats_idx < 256)
+ {
+ ats_flags = 6;
+ evt->ate_data[ate_data_idx++] = (char) ats_idx;
+ }
+
+ /*
+ * Otherwise indexes larger than 255 require 2 additional bytes.
+ */
+ else
+ {
+ ats_flags = 7;
+ evt->ate_data[ate_data_idx++] = (char) (ats_idx >> 8);
+ evt->ate_data[ate_data_idx++] = (char) (ats_idx & 0xFF);
+ }
+
+ if (store_ctid)
+ {
+ /*
+ * Store the CTID compactly. In this block we know that the CTID has
+ * changed, but hopefully only by a small amount.
+ *
+ * First test for the common case where it has increased by one. We
+ * simplify the encoding and decoding by discounting the case of it
+ * crossing into the next block (can that ever really happen?).
+ */
+ BlockNumber blk1 = ItemPointerGetBlockNumber(prev_ctid);
+ BlockNumber blk2 = ItemPointerGetBlockNumber(ctid);
+ OffsetNumber off1 = ItemPointerGetOffsetNumber(prev_ctid);
+ OffsetNumber off2 = ItemPointerGetOffsetNumber(ctid);
+
+ if (blk2 == blk1 && off2 == off1 + 1)
+ {
+ ctid_flags = 7;
+ }
+ else
+ {
+ /*
+ * XOR the 2 CTIDs together and store the result (MSB first)
+ * discarding any leading zero bytes. This reduces the space
+ * needed for nearby CTIDs, for which the high bytes will be
+ * equal and XOR to zero.
+ */
+ BlockNumber blk_xor = blk1 ^ blk2;
+ OffsetNumber off_xor = off1 ^ off2;
+
+ /* Number of bytes needed for the encoded CTID (1-6) */
+ ctid_flags = blk_xor == 0 ? ((off_xor & 0xFF00) != 0 ? 2 : 1) :
+ (blk_xor & 0xFF000000) != 0 ? 6 :
+ (blk_xor & 0xFF0000) != 0 ? 5 :
+ (blk_xor & 0xFF00) != 0 ? 4 : 3;
+
+ switch (ctid_flags)
+ {
+ case 6:
+ evt->ate_data[ate_data_idx++] = (char) (blk_xor >> 24);
+ /* fall through to next case */
+ case 5:
+ evt->ate_data[ate_data_idx++] = (char) (blk_xor >> 16);
+ /* fall through to next case */
+ case 4:
+ evt->ate_data[ate_data_idx++] = (char) (blk_xor >> 8);
+ /* fall through to next case */
+ case 3:
+ evt->ate_data[ate_data_idx++] = (char) blk_xor;
+ /* fall through to next case */
+ case 2:
+ evt->ate_data[ate_data_idx++] = (char) (off_xor >> 8);
+ /* fall through to next case */
+ case 1:
+ evt->ate_data[ate_data_idx++] = (char) off_xor;
+ break;
+ default:
+ /* can't happen */
+ break;
+ }
+ }
+ }
+ else
+ ctid_flags = 0; /* No CTID needed */
+
+ evt->ate_flags = (ats_flags << 3) | ctid_flags;
+
+ return ate_data_idx + 1; /* Total size of encoded event */
+ }
+
+ /* ----------
+ * GetTriggerSharedData()
+ *
+ * Returns a pointer to a trigger event's shared data.
+ * ----------
+ */
+ static inline AfterTriggerShared
+ GetTriggerSharedData(AfterTriggerEvent evt, AfterTriggerEventChunk *chunk,
+ uint16 prev_ats_idx)
+ {
+ int shared_data_flags = SharedDataFlags(evt);
+ uint16 ats_idx;
+
+ if (shared_data_flags < 6)
+ {
+ if (CtidFlags(evt) == 0)
+ /* Next trigger on same row, uses a relative index */
+ ats_idx = prev_ats_idx + (uint16) shared_data_flags + 1;
+ else
+ /* First trigger on a new row, with index < 6 */
+ ats_idx = (uint16) shared_data_flags;
+ }
+ else if (shared_data_flags == 6)
+ {
+ /* 1-byte shared data index follows flag byte */
+ ats_idx = (uint16) evt->ate_data[0];
+ }
+ else /* shared_data_flags == 7 */
+ {
+ /* 2-byte shared data index follow flag byte */
+ ats_idx = (((uint16) evt->ate_data[0]) << 8) | evt->ate_data[1];
+ }
+ return ((AfterTriggerShared) chunk->endptr) - ats_idx - 1;
+ }
+
+ /* ----------
+ * GetTriggerCtid()
+ *
+ * Get the CTID encoded in an AfterTriggerEvent.
+ * ----------
+ */
+ static inline void
+ GetTriggerCtid(AfterTriggerEvent evt, ItemPointer prev_ctid, ItemPointer ctid)
+ {
+ if (CtidFlags(evt) == 0)
+ {
+ /* CTID is same as that of previous event */
+ ItemPointerCopy(prev_ctid, ctid);
+ }
+ else if (CtidFlags(evt) == 7)
+ {
+ /*
+ * CTID is the previous value plus one. Note the encoding used
+ * explicitly prevents this carrying over into the next block.
+ */
+ ItemPointerSet(ctid, ItemPointerGetBlockNumber(prev_ctid),
+ ItemPointerGetOffsetNumber(prev_ctid) + 1);
+ }
+ else
+ {
+ /*
+ * Values in the range 1-6 indicate an encoded difference from the
+ * previous CTID. This difference is computed by XORing the two CTIDs
+ * and storing the result (MSB first) after discarding any leading
+ * zero bytes. To reproduce the original CTID, we just XOR the
+ * difference bytes again.
+ */
+ unsigned char *ctid_diffs = ((unsigned char *)evt) + 1 +
+ SizeofSharedDataIndex(evt);
+ BlockNumber blkno = ItemPointerGetBlockNumber(prev_ctid);
+ OffsetNumber offno = ItemPointerGetOffsetNumber(prev_ctid);
+
+ switch (CtidFlags(evt))
+ {
+ case 6:
+ blkno ^= ((BlockNumber) *ctid_diffs) << 24;
+ ctid_diffs++;
+ /* fall through to next case */
+ case 5:
+ blkno ^= ((BlockNumber) *ctid_diffs) << 16;
+ ctid_diffs++;
+ /* fall through to next case */
+ case 4:
+ blkno ^= ((BlockNumber) *ctid_diffs) << 8;
+ ctid_diffs++;
+ /* fall through to next case */
+ case 3:
+ blkno ^= (BlockNumber) *ctid_diffs;
+ ctid_diffs++;
+ /* fall through to next case */
+ case 2:
+ offno ^= ((OffsetNumber) *ctid_diffs) << 8;
+ ctid_diffs++;
+ /* fall through to next case */
+ case 1:
+ offno ^= (OffsetNumber) *ctid_diffs;
+ break;
+ default:
+ /* can't happen */
+ break;
+ }
+ ItemPointerSet(ctid, blkno, offno);
+ }
+ }
+
+
+ /* ----------
* afterTriggerCheckState()
*
* Returns true if the trigger event is actually in state DEFERRED.
*************** afterTriggerCheckState(AfterTriggerShare
*** 3085,3102 ****
* ----------
*/
static void
! afterTriggerAddEvent(AfterTriggerEventList *events,
! AfterTriggerEvent event, AfterTriggerShared evtshared)
{
! Size eventsize = SizeofTriggerEvent(event);
Size needed = eventsize + sizeof(AfterTriggerSharedData);
AfterTriggerEventChunk *chunk;
AfterTriggerShared newshared;
! AfterTriggerEvent newevent;
/*
* If empty list or not enough room in the tail chunk, make a new chunk.
! * We assume here that a new shared record will always be needed.
*/
chunk = events->tail;
if (chunk == NULL ||
--- 3348,3368 ----
* ----------
*/
static void
! afterTriggerAddEvent(AfterTriggerEventList *events, ItemPointer ctid,
! AfterTriggerShared evtshared)
{
! bool row_trigger = (evtshared->ats_event & TRIGGER_EVENT_ROW) != 0;
! Size eventsize = sizeof(AfterTriggerEventData); /* worst case */
Size needed = eventsize + sizeof(AfterTriggerSharedData);
AfterTriggerEventChunk *chunk;
AfterTriggerShared newshared;
! AfterTriggerEventData newevent;
! uint16 ats_idx;
/*
* If empty list or not enough room in the tail chunk, make a new chunk.
! * We assume here a worst case event record size and that a new shared
! * record will always be needed.
*/
chunk = events->tail;
if (chunk == NULL ||
*************** afterTriggerAddEvent(AfterTriggerEventLi
*** 3115,3138 ****
/*
* Chunk size starts at 1KB and is allowed to increase up to 1MB.
! * These numbers are fairly arbitrary, though there is a hard limit at
! * AFTER_TRIGGER_OFFSET; else we couldn't link event records to their
! * shared records using the available space in ate_flags. Another
! * constraint is that if the chunk size gets too huge, the search loop
! * below would get slow given a (not too common) usage pattern with
! * many distinct event types in a chunk. Therefore, we double the
! * preceding chunk size only if there weren't too many shared records
! * in the preceding chunk; otherwise we halve it. This gives us some
! * ability to adapt to the actual usage pattern of the current query
! * while still having large chunk sizes in typical usage. All chunk
! * sizes used should be MAXALIGN multiples, to ensure that the shared
! * records will be aligned safely.
*/
#define MIN_CHUNK_SIZE 1024
#define MAX_CHUNK_SIZE (1024*1024)
! #if MAX_CHUNK_SIZE > (AFTER_TRIGGER_OFFSET+1)
! #error MAX_CHUNK_SIZE must not exceed AFTER_TRIGGER_OFFSET
#endif
if (chunk == NULL)
--- 3381,3407 ----
/*
* Chunk size starts at 1KB and is allowed to increase up to 1MB.
! * These numbers are fairly arbitrary, though there is a hard limit
! * set by the fact that we use 16-bit indexes into the shared data
! * records at the end of the chunk. Another constraint is that if the
! * chunk size gets too huge, the search loop below would get slow
! * given a (not too common) usage pattern with many distinct event
! * types in a chunk. This would also affect the per-event compression
! * used. Therefore, we double the preceding chunk size only if there
! * weren't too many shared records in the preceding chunk; otherwise
! * we halve it. This gives us some ability to adapt to the actual
! * usage pattern of the current query while still having large chunk
! * sizes in typical usage. All chunk sizes used should be MAXALIGN
! * multiples, to ensure that the shared records will be aligned
! * safely.
*/
#define MIN_CHUNK_SIZE 1024
#define MAX_CHUNK_SIZE (1024*1024)
+ #define SHARED_DATA_SIZE 16
+ #define MAX_SHARED_DATA_COUNT (256*256)
! #if MAX_CHUNK_SIZE > (SHARED_DATA_SIZE * MAX_SHARED_DATA_COUNT)
! #error MAX_CHUNK_SIZE must not exceed SHARED_DATA_SIZE * MAX_SHARED_DATA_COUNT
#endif
if (chunk == NULL)
*************** afterTriggerAddEvent(AfterTriggerEventLi
*** 3183,3195 ****
newshared->ats_firing_id = 0; /* just to be sure */
chunk->endfree = (char *) newshared;
}
/* Insert the data */
! newevent = (AfterTriggerEvent) chunk->freeptr;
! memcpy(newevent, event, eventsize);
! /* ... and link the new event to its shared record */
! newevent->ate_flags &= ~AFTER_TRIGGER_OFFSET;
! newevent->ate_flags |= (char *) newshared - (char *) newevent;
chunk->freeptr += eventsize;
events->tailfree = chunk->freeptr;
--- 3452,3467 ----
newshared->ats_firing_id = 0; /* just to be sure */
chunk->endfree = (char *) newshared;
}
+ ats_idx = AfterTriggerSharedDataIdx(chunk, newshared);
/* Insert the data */
! eventsize = encodeAfterTriggerEvent(&newevent, row_trigger, ats_idx,
! ctid, events->last_ats_idx,
! &events->last_ctid);
! memcpy(chunk->freeptr, &newevent, eventsize);
! events->last_ats_idx = ats_idx;
! if (row_trigger)
! ItemPointerCopy(ctid, &events->last_ctid);
chunk->freeptr += eventsize;
events->tailfree = chunk->freeptr;
*************** afterTriggerFreeEventList(AfterTriggerEv
*** 3215,3220 ****
--- 3487,3494 ----
events->head = NULL;
events->tail = NULL;
events->tailfree = NULL;
+ events->last_ats_idx = 0;
+ ItemPointerSet(&events->last_ctid, 0, 1); /* first valid CTID */
}
/* ----------
*************** afterTriggerRestoreEventList(AfterTrigge
*** 3268,3290 ****
* fmgr lookup cache space at the caller level. (For triggers fired at
* the end of a query, we can even piggyback on the executor's state.)
*
- * event: event currently being fired.
* rel: open relation for event.
* trigdesc: working copy of rel's trigger info.
* finfo: array of fmgr lookup cache entries (one per trigger in trigdesc).
* instr: array of EXPLAIN ANALYZE instrumentation nodes (one per trigger),
* or NULL if no instrumentation is wanted.
* per_tuple_context: memory context to call trigger function in.
* ----------
*/
static void
! AfterTriggerExecute(AfterTriggerEvent event,
! Relation rel, TriggerDesc *trigdesc,
FmgrInfo *finfo, Instrumentation *instr,
! MemoryContext per_tuple_context)
{
! AfterTriggerShared evtshared = GetTriggerSharedData(event);
! Oid tgoid = evtshared->ats_tgoid;
TriggerData LocTriggerData;
HeapTupleData tuple1;
HeapTupleData tuple2;
--- 3542,3565 ----
* fmgr lookup cache space at the caller level. (For triggers fired at
* the end of a query, we can even piggyback on the executor's state.)
*
* rel: open relation for event.
* trigdesc: working copy of rel's trigger info.
* finfo: array of fmgr lookup cache entries (one per trigger in trigdesc).
* instr: array of EXPLAIN ANALYZE instrumentation nodes (one per trigger),
* or NULL if no instrumentation is wanted.
* per_tuple_context: memory context to call trigger function in.
+ * event: type of event being fired.
+ * tgoid: trigger OID of event being fired.
+ * ctid: CTID for event (ignored for statement-level events).
* ----------
*/
static void
! AfterTriggerExecute(Relation rel, TriggerDesc *trigdesc,
FmgrInfo *finfo, Instrumentation *instr,
! MemoryContext per_tuple_context,
! TriggerEvent event, Oid tgoid, ItemPointer ctid)
{
! ItemPointer new_ctid = NULL;
TriggerData LocTriggerData;
HeapTupleData tuple1;
HeapTupleData tuple2;
*************** AfterTriggerExecute(AfterTriggerEvent ev
*** 3318,3330 ****
/*
* Fetch the required tuple(s).
*/
! if (ItemPointerIsValid(&(event->ate_ctid1)))
{
! ItemPointerCopy(&(event->ate_ctid1), &(tuple1.t_self));
if (!heap_fetch(rel, SnapshotAny, &tuple1, &buffer1, false, NULL))
elog(ERROR, "failed to fetch tuple1 for AFTER trigger");
LocTriggerData.tg_trigtuple = &tuple1;
LocTriggerData.tg_trigtuplebuf = buffer1;
}
else
{
--- 3593,3609 ----
/*
* Fetch the required tuple(s).
*/
! if ((event & TRIGGER_EVENT_ROW) != 0)
{
! ItemPointerCopy(ctid, &(tuple1.t_self));
if (!heap_fetch(rel, SnapshotAny, &tuple1, &buffer1, false, NULL))
elog(ERROR, "failed to fetch tuple1 for AFTER trigger");
LocTriggerData.tg_trigtuple = &tuple1;
LocTriggerData.tg_trigtuplebuf = buffer1;
+
+ /* for an UPDATE the old tuple points to the new tuple */
+ if (TRIGGER_FIRED_BY_UPDATE(event))
+ new_ctid = &(tuple1.t_data->t_ctid);
}
else
{
*************** AfterTriggerExecute(AfterTriggerEvent ev
*** 3332,3342 ****
LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
}
! /* don't touch ctid2 if not there */
! if ((event->ate_flags & AFTER_TRIGGER_2CTIDS) &&
! ItemPointerIsValid(&(event->ate_ctid2)))
{
! ItemPointerCopy(&(event->ate_ctid2), &(tuple2.t_self));
if (!heap_fetch(rel, SnapshotAny, &tuple2, &buffer2, false, NULL))
elog(ERROR, "failed to fetch tuple2 for AFTER trigger");
LocTriggerData.tg_newtuple = &tuple2;
--- 3611,3619 ----
LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
}
! if (ItemPointerIsValid(new_ctid))
{
! ItemPointerCopy(new_ctid, &(tuple2.t_self));
if (!heap_fetch(rel, SnapshotAny, &tuple2, &buffer2, false, NULL))
elog(ERROR, "failed to fetch tuple2 for AFTER trigger");
LocTriggerData.tg_newtuple = &tuple2;
*************** AfterTriggerExecute(AfterTriggerEvent ev
*** 3353,3359 ****
*/
LocTriggerData.type = T_TriggerData;
LocTriggerData.tg_event =
! evtshared->ats_event & (TRIGGER_EVENT_OPMASK | TRIGGER_EVENT_ROW);
LocTriggerData.tg_relation = rel;
MemoryContextReset(per_tuple_context);
--- 3630,3636 ----
*/
LocTriggerData.type = T_TriggerData;
LocTriggerData.tg_event =
! event & (TRIGGER_EVENT_OPMASK | TRIGGER_EVENT_ROW);
LocTriggerData.tg_relation = rel;
MemoryContextReset(per_tuple_context);
*************** afterTriggerMarkEvents(AfterTriggerEvent
*** 3409,3420 ****
bool found = false;
AfterTriggerEvent event;
AfterTriggerEventChunk *chunk;
for_each_event_chunk(event, chunk, *events)
{
! AfterTriggerShared evtshared = GetTriggerSharedData(event);
bool defer_it = false;
if (!(event->ate_flags &
(AFTER_TRIGGER_DONE | AFTER_TRIGGER_IN_PROGRESS)))
{
--- 3686,3705 ----
bool found = false;
AfterTriggerEvent event;
AfterTriggerEventChunk *chunk;
+ uint16 prev_ats_idx = 0;
+ ItemPointerData prev_ctid;
+
+ ItemPointerSet(&prev_ctid, 0, 1); /* first valid CTID */
for_each_event_chunk(event, chunk, *events)
{
! AfterTriggerShared evtshared = GetTriggerSharedData(event, chunk,
! prev_ats_idx);
! ItemPointerData ctid;
bool defer_it = false;
+ GetTriggerCtid(event, &prev_ctid, &ctid);
+
if (!(event->ate_flags &
(AFTER_TRIGGER_DONE | AFTER_TRIGGER_IN_PROGRESS)))
{
*************** afterTriggerMarkEvents(AfterTriggerEvent
*** 3443,3452 ****
if (defer_it && move_list != NULL)
{
/* add it to move_list */
! afterTriggerAddEvent(move_list, event, evtshared);
/* mark original copy "done" so we don't do it again */
event->ate_flags |= AFTER_TRIGGER_DONE;
}
}
return found;
--- 3728,3740 ----
if (defer_it && move_list != NULL)
{
/* add it to move_list */
! afterTriggerAddEvent(move_list, &ctid, evtshared);
/* mark original copy "done" so we don't do it again */
event->ate_flags |= AFTER_TRIGGER_DONE;
}
+
+ prev_ats_idx = AfterTriggerSharedDataIdx(chunk, evtshared);
+ ItemPointerCopy(&ctid, &prev_ctid);
}
return found;
*************** afterTriggerInvokeEvents(AfterTriggerEve
*** 3487,3492 ****
--- 3775,3784 ----
TriggerDesc *trigdesc = NULL;
FmgrInfo *finfo = NULL;
Instrumentation *instr = NULL;
+ uint16 prev_ats_idx = 0;
+ ItemPointerData prev_ctid;
+
+ ItemPointerSet(&prev_ctid, 0, 1); /* first valid CTID */
/* Make a local EState if need be */
if (estate == NULL)
*************** afterTriggerInvokeEvents(AfterTriggerEve
*** 3510,3516 ****
for_each_event(event, chunk)
{
! AfterTriggerShared evtshared = GetTriggerSharedData(event);
/*
* Is it one for me to fire?
--- 3802,3812 ----
for_each_event(event, chunk)
{
! AfterTriggerShared evtshared = GetTriggerSharedData(event, chunk,
! prev_ats_idx);
! ItemPointerData ctid;
!
! GetTriggerCtid(event, &prev_ctid, &ctid);
/*
* Is it one for me to fire?
*************** afterTriggerInvokeEvents(AfterTriggerEve
*** 3541,3548 ****
* still set, so recursive examinations of the event list
* won't try to re-fire it.
*/
! AfterTriggerExecute(event, rel, trigdesc, finfo, instr,
! per_tuple_context);
/*
* Mark the event as done.
--- 3837,3845 ----
* still set, so recursive examinations of the event list
* won't try to re-fire it.
*/
! AfterTriggerExecute(rel, trigdesc, finfo, instr,
! per_tuple_context, evtshared->ats_event,
! evtshared->ats_tgoid, &ctid);
/*
* Mark the event as done.
*************** afterTriggerInvokeEvents(AfterTriggerEve
*** 3555,3560 ****
--- 3852,3860 ----
/* something remains to be done */
all_fired = all_fired_in_chunk = false;
}
+
+ prev_ats_idx = AfterTriggerSharedDataIdx(chunk, evtshared);
+ ItemPointerCopy(&ctid, &prev_ctid);
}
/* Clear the chunk if delete_ok and nothing left of interest */
*************** AfterTriggerBeginXact(void)
*** 3620,3625 ****
--- 3920,3927 ----
afterTriggers->events.head = NULL;
afterTriggers->events.tail = NULL;
afterTriggers->events.tailfree = NULL;
+ afterTriggers->events.last_ats_idx = 0;
+ ItemPointerSet(&afterTriggers->events.last_ctid, 0, 1); /* first valid CTID */
afterTriggers->query_depth = -1;
/* We initialize the query stack to a reasonable size */
*************** AfterTriggerBeginQuery(void)
*** 3679,3684 ****
--- 3981,3988 ----
events->head = NULL;
events->tail = NULL;
events->tailfree = NULL;
+ events->last_ats_idx = 0;
+ ItemPointerSet(&events->last_ctid, 0, 1); /* first valid CTID */
}
*************** AfterTriggerEndSubXact(bool isCommit)
*** 3921,3926 ****
--- 4225,4231 ----
AfterTriggerEvent event;
AfterTriggerEventChunk *chunk;
CommandId subxact_firing_id;
+ uint16 prev_ats_idx = 0;
/*
* Ignore call if the transaction is in aborted state. (Probably
*************** AfterTriggerEndSubXact(bool isCommit)
*** 3997,4003 ****
subxact_firing_id = afterTriggers->firing_stack[my_level];
for_each_event_chunk(event, chunk, afterTriggers->events)
{
! AfterTriggerShared evtshared = GetTriggerSharedData(event);
if (event->ate_flags &
(AFTER_TRIGGER_DONE | AFTER_TRIGGER_IN_PROGRESS))
--- 4302,4309 ----
subxact_firing_id = afterTriggers->firing_stack[my_level];
for_each_event_chunk(event, chunk, afterTriggers->events)
{
! AfterTriggerShared evtshared = GetTriggerSharedData(event, chunk,
! prev_ats_idx);
if (event->ate_flags &
(AFTER_TRIGGER_DONE | AFTER_TRIGGER_IN_PROGRESS))
*************** AfterTriggerEndSubXact(bool isCommit)
*** 4006,4011 ****
--- 4312,4318 ----
event->ate_flags &=
~(AFTER_TRIGGER_DONE | AFTER_TRIGGER_IN_PROGRESS);
}
+ prev_ats_idx = AfterTriggerSharedDataIdx(chunk, evtshared);
}
}
}
*************** AfterTriggerPendingOnRel(Oid relid)
*** 4389,4394 ****
--- 4696,4702 ----
AfterTriggerEvent event;
AfterTriggerEventChunk *chunk;
int depth;
+ uint16 prev_ats_idx = 0;
/* No-op if we aren't in a transaction. (Shouldn't happen?) */
if (afterTriggers == NULL)
*************** AfterTriggerPendingOnRel(Oid relid)
*** 4397,4403 ****
/* Scan queued events */
for_each_event_chunk(event, chunk, afterTriggers->events)
{
! AfterTriggerShared evtshared = GetTriggerSharedData(event);
/*
* We can ignore completed events. (Even if a DONE flag is rolled
--- 4705,4712 ----
/* Scan queued events */
for_each_event_chunk(event, chunk, afterTriggers->events)
{
! AfterTriggerShared evtshared = GetTriggerSharedData(event, chunk,
! prev_ats_idx);
/*
* We can ignore completed events. (Even if a DONE flag is rolled
*************** AfterTriggerPendingOnRel(Oid relid)
*** 4409,4414 ****
--- 4718,4725 ----
if (evtshared->ats_relid == relid)
return true;
+
+ prev_ats_idx = AfterTriggerSharedDataIdx(chunk, evtshared);
}
/*
*************** AfterTriggerPendingOnRel(Oid relid)
*** 4418,4432 ****
*/
for (depth = 0; depth <= afterTriggers->query_depth; depth++)
{
for_each_event_chunk(event, chunk, afterTriggers->query_stack[depth])
{
! AfterTriggerShared evtshared = GetTriggerSharedData(event);
if (event->ate_flags & AFTER_TRIGGER_DONE)
continue;
if (evtshared->ats_relid == relid)
return true;
}
}
--- 4729,4747 ----
*/
for (depth = 0; depth <= afterTriggers->query_depth; depth++)
{
+ prev_ats_idx = 0;
for_each_event_chunk(event, chunk, afterTriggers->query_stack[depth])
{
! AfterTriggerShared evtshared = GetTriggerSharedData(event, chunk,
! prev_ats_idx);
if (event->ate_flags & AFTER_TRIGGER_DONE)
continue;
if (evtshared->ats_relid == relid)
return true;
+
+ prev_ats_idx = AfterTriggerSharedDataIdx(chunk, evtshared);
}
}
*************** AfterTriggerSaveEvent(EState *estate, Re
*** 4453,4460 ****
{
Relation rel = relinfo->ri_RelationDesc;
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
- AfterTriggerEventData new_event;
AfterTriggerSharedData new_shared;
int tgtype_event;
int tgtype_level;
int i;
--- 4768,4775 ----
{
Relation rel = relinfo->ri_RelationDesc;
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
AfterTriggerSharedData new_shared;
+ ItemPointer ctid;
int tgtype_event;
int tgtype_level;
int i;
*************** AfterTriggerSaveEvent(EState *estate, Re
*** 4476,4482 ****
* validation is important to make sure we don't walk off the edge of our
* arrays.
*/
- new_event.ate_flags = 0;
switch (event)
{
case TRIGGER_EVENT_INSERT:
--- 4791,4796 ----
*************** AfterTriggerSaveEvent(EState *estate, Re
*** 4485,4499 ****
{
Assert(oldtup == NULL);
Assert(newtup != NULL);
! ItemPointerCopy(&(newtup->t_self), &(new_event.ate_ctid1));
! ItemPointerSetInvalid(&(new_event.ate_ctid2));
}
else
{
Assert(oldtup == NULL);
Assert(newtup == NULL);
! ItemPointerSetInvalid(&(new_event.ate_ctid1));
! ItemPointerSetInvalid(&(new_event.ate_ctid2));
}
break;
case TRIGGER_EVENT_DELETE:
--- 4799,4811 ----
{
Assert(oldtup == NULL);
Assert(newtup != NULL);
! ctid = &newtup->t_self;
}
else
{
Assert(oldtup == NULL);
Assert(newtup == NULL);
! ctid = NULL;
}
break;
case TRIGGER_EVENT_DELETE:
*************** AfterTriggerSaveEvent(EState *estate, Re
*** 4502,4516 ****
{
Assert(oldtup != NULL);
Assert(newtup == NULL);
! ItemPointerCopy(&(oldtup->t_self), &(new_event.ate_ctid1));
! ItemPointerSetInvalid(&(new_event.ate_ctid2));
}
else
{
Assert(oldtup == NULL);
Assert(newtup == NULL);
! ItemPointerSetInvalid(&(new_event.ate_ctid1));
! ItemPointerSetInvalid(&(new_event.ate_ctid2));
}
break;
case TRIGGER_EVENT_UPDATE:
--- 4814,4826 ----
{
Assert(oldtup != NULL);
Assert(newtup == NULL);
! ctid = &oldtup->t_self;
}
else
{
Assert(oldtup == NULL);
Assert(newtup == NULL);
! ctid = NULL;
}
break;
case TRIGGER_EVENT_UPDATE:
*************** AfterTriggerSaveEvent(EState *estate, Re
*** 4519,4546 ****
{
Assert(oldtup != NULL);
Assert(newtup != NULL);
! ItemPointerCopy(&(oldtup->t_self), &(new_event.ate_ctid1));
! ItemPointerCopy(&(newtup->t_self), &(new_event.ate_ctid2));
! new_event.ate_flags |= AFTER_TRIGGER_2CTIDS;
}
else
{
Assert(oldtup == NULL);
Assert(newtup == NULL);
! ItemPointerSetInvalid(&(new_event.ate_ctid1));
! ItemPointerSetInvalid(&(new_event.ate_ctid2));
}
break;
case TRIGGER_EVENT_TRUNCATE:
tgtype_event = TRIGGER_TYPE_TRUNCATE;
Assert(oldtup == NULL);
Assert(newtup == NULL);
! ItemPointerSetInvalid(&(new_event.ate_ctid1));
! ItemPointerSetInvalid(&(new_event.ate_ctid2));
break;
default:
elog(ERROR, "invalid after-trigger event code: %d", event);
tgtype_event = 0; /* keep compiler quiet */
break;
}
--- 4829,4853 ----
{
Assert(oldtup != NULL);
Assert(newtup != NULL);
! ctid = &oldtup->t_self;
}
else
{
Assert(oldtup == NULL);
Assert(newtup == NULL);
! ctid = NULL;
}
break;
case TRIGGER_EVENT_TRUNCATE:
tgtype_event = TRIGGER_TYPE_TRUNCATE;
Assert(oldtup == NULL);
Assert(newtup == NULL);
! ctid = NULL;
break;
default:
elog(ERROR, "invalid after-trigger event code: %d", event);
tgtype_event = 0; /* keep compiler quiet */
+ ctid = NULL;
break;
}
*************** AfterTriggerSaveEvent(EState *estate, Re
*** 4627,4632 ****
new_shared.ats_firing_id = 0;
afterTriggerAddEvent(&afterTriggers->query_stack[afterTriggers->query_depth],
! &new_event, &new_shared);
}
}
--- 4934,4939 ----
new_shared.ats_firing_id = 0;
afterTriggerAddEvent(&afterTriggers->query_stack[afterTriggers->query_depth],
! ctid, &new_shared);
}
}