ssi-ddl-4.patch

text/plain
Filename: ssi-ddl-4.patch
Type: text/plain
Part: 0
Message: Re: SIREAD lock versus ACCESS EXCLUSIVE lock
Patch

Same data as JSON: GET /api/v1/attachments/:id/patch the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes. API reference →
Format: context
File	+	−
src/backend/catalog/heap.c	9	0
src/backend/catalog/index.c	13	0
src/backend/commands/cluster.c	7	0
src/backend/commands/tablecmds.c	9	0
src/backend/storage/lmgr/predicate.c	337	4
src/include/storage/predicate.h	2	0
*** a/src/backend/catalog/heap.c
--- b/src/backend/catalog/heap.c
***************
*** 63,68 ****
--- 63,69 ----
  #include "parser/parse_relation.h"
  #include "storage/bufmgr.h"
  #include "storage/freespace.h"
+ #include "storage/predicate.h"
  #include "storage/smgr.h"
  #include "utils/acl.h"
  #include "utils/builtins.h"
***************
*** 1658,1663 **** heap_drop_with_catalog(Oid relid)
--- 1659,1672 ----
  	CheckTableNotInUse(rel, "DROP TABLE");
  
  	/*
+ 	 * This effectively deletes all rows in the table, and may be done in a
+ 	 * serializable transaction.  In that case we must record a rw-conflict in
+ 	 * to this transaction from each transaction holding a predicate lock on
+ 	 * the table.
+ 	 */
+ 	CheckTableForSerializableConflictIn(rel);
+ 
+ 	/*
  	 * Delete pg_foreign_table tuple first.
  	 */
  	if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
*** a/src/backend/catalog/index.c
--- b/src/backend/catalog/index.c
***************
*** 54,59 ****
--- 54,60 ----
  #include "parser/parser.h"
  #include "storage/bufmgr.h"
  #include "storage/lmgr.h"
+ #include "storage/predicate.h"
  #include "storage/procarray.h"
  #include "storage/smgr.h"
  #include "utils/builtins.h"
***************
*** 1312,1317 **** index_drop(Oid indexId)
--- 1313,1324 ----
  	CheckTableNotInUse(userIndexRelation, "DROP INDEX");
  
  	/*
+ 	 * All predicate locks on the index are about to be made invalid.
+ 	 * Promote them to relation locks on the heap.
+ 	 */
+ 	TransferPredicateLocksToHeapRelation(userIndexRelation);
+ 
+ 	/*
  	 * Schedule physical removal of the files
  	 */
  	RelationDropStorage(userIndexRelation);
***************
*** 2799,2804 **** reindex_index(Oid indexId, bool skip_constraint_checks)
--- 2806,2817 ----
  	 */
  	CheckTableNotInUse(iRel, "REINDEX INDEX");
  
+ 	/*
+ 	 * All predicate locks on the index are about to be made invalid.
+ 	 * Promote them to relation locks on the heap.
+ 	 */
+ 	TransferPredicateLocksToHeapRelation(iRel);
+ 
  	PG_TRY();
  	{
  		/* Suppress use of the target index while rebuilding it */
*** a/src/backend/commands/cluster.c
--- b/src/backend/commands/cluster.c
***************
*** 39,44 ****
--- 39,45 ----
  #include "optimizer/planner.h"
  #include "storage/bufmgr.h"
  #include "storage/lmgr.h"
+ #include "storage/predicate.h"
  #include "storage/procarray.h"
  #include "storage/smgr.h"
  #include "utils/acl.h"
***************
*** 385,390 **** cluster_rel(Oid tableOid, Oid indexOid, bool recheck, bool verbose,
--- 386,397 ----
  	if (OidIsValid(indexOid))
  		check_index_is_clusterable(OldHeap, indexOid, recheck, AccessExclusiveLock);
  
+ 	/*
+ 	 * All predicate locks on the table and its indexes are about to be made
+ 	 * invalid.  Promote them to relation locks on the heap.
+ 	 */
+ 	TransferPredicateLocksToHeapRelation(OldHeap);
+ 
  	/* rebuild_relation does all the dirty work */
  	rebuild_relation(OldHeap, indexOid, freeze_min_age, freeze_table_age,
  					 verbose);
*** a/src/backend/commands/tablecmds.c
--- b/src/backend/commands/tablecmds.c
***************
*** 70,75 ****
--- 70,76 ----
  #include "storage/bufmgr.h"
  #include "storage/lmgr.h"
  #include "storage/lock.h"
+ #include "storage/predicate.h"
  #include "storage/smgr.h"
  #include "utils/acl.h"
  #include "utils/builtins.h"
***************
*** 1040,1045 **** ExecuteTruncate(TruncateStmt *stmt)
--- 1041,1054 ----
  			Oid			toast_relid;
  
  			/*
+ 			 * This effectively deletes all rows in the table, and may be done
+ 			 * in a serializable transaction.  In that case we must record a
+ 			 * rw-conflict in to this transaction from each transaction
+ 			 * holding a predicate lock on the table.
+ 			 */
+ 			CheckTableForSerializableConflictIn(rel);
+ 
+ 			/*
  			 * Need the full transaction-safe pushups.
  			 *
  			 * Create a new empty storage file for the relation, and assign it
*** a/src/backend/storage/lmgr/predicate.c
--- b/src/backend/storage/lmgr/predicate.c
***************
*** 155,160 ****
--- 155,161 ----
   *							   BlockNumber newblkno);
   *		PredicateLockPageCombine(Relation relation, BlockNumber oldblkno,
   *								 BlockNumber newblkno);
+  *		TransferPredicateLocksToHeapRelation(const Relation relation)
   *		ReleasePredicateLocks(bool isCommit)
   *
   * conflict detection (may also trigger rollback)
***************
*** 162,167 ****
--- 163,169 ----
   *										HeapTupleData *tup, Buffer buffer)
   *		CheckForSerializableConflictIn(Relation relation, HeapTupleData *tup,
   *									   Buffer buffer)
+  *		CheckTableForSerializableConflictIn(const Relation relation)
   *
   * final rollback checking
   *		PreCommit_CheckForSerializationFailure(void)
***************
*** 257,266 ****
  #define SxactIsMarkedForDeath(sxact) (((sxact)->flags & SXACT_FLAG_MARKED_FOR_DEATH) != 0)
  
  /*
!  * When a public interface method is called for a split on an index relation,
!  * this is the test to see if we should do a quick return.
   */
! #define SkipSplitTracking(relation) \
  	(((relation)->rd_id < FirstBootstrapObjectId) \
  	|| RelationUsesLocalBuffers(relation))
  
--- 259,269 ----
  #define SxactIsMarkedForDeath(sxact) (((sxact)->flags & SXACT_FLAG_MARKED_FOR_DEATH) != 0)
  
  /*
!  * When a public interface method is called which needs to manipulate locks on
!  * a particular relation regardless of the lock holder, do a quick check to
!  * see if this relation can be skipped.
   */
! #define SkipPredicateLocksForRelation(relation) \
  	(((relation)->rd_id < FirstBootstrapObjectId) \
  	|| RelationUsesLocalBuffers(relation))
  
***************
*** 273,279 ****
  	((!IsolationIsSerializable()) \
  	|| ((MySerializableXact == InvalidSerializableXact)) \
  	|| ReleasePredicateLocksIfROSafe() \
! 	|| SkipSplitTracking(relation))
  
  
  /*
--- 276,282 ----
  	((!IsolationIsSerializable()) \
  	|| ((MySerializableXact == InvalidSerializableXact)) \
  	|| ReleasePredicateLocksIfROSafe() \
! 	|| SkipPredicateLocksForRelation(relation))
  
  
  /*
***************
*** 434,439 **** static bool TransferPredicateLocksToNewTarget(const PREDICATELOCKTARGETTAG oldta
--- 437,443 ----
  								  const PREDICATELOCKTARGETTAG newtargettag,
  								  bool removeOld);
  static void PredicateLockAcquire(const PREDICATELOCKTARGETTAG *targettag);
+ static void DropAllPredicateLocksFromTableImpl(const Relation relation, bool transfer);
  static void SetNewSxactGlobalXmin(void);
  static bool ReleasePredicateLocksIfROSafe(void);
  static void ClearOldPredicateLocks(void);
***************
*** 2543,2548 **** exit:
--- 2547,2781 ----
  	return !outOfShmem;
  }
  
+ /*
+  * Drop all predicate locks of any granularity from the specified relation,
+  * which can be a heap relation or an index relation.  Optionally acquire a
+  * relation lock on the heap for any transactions with any lock(s) on the
+  * specified relation.
+  *
+  * This requires grabbing a lot of LW locks and scanning the entire lock
+  * target table for matches.  That makes this more expensive than most
+  * predicate lock management functions, but it will only be called for DDL
+  * type commands and there are fast returns when no serializable transactions
+  * are active or the relation is temporary.
+  *
+  * We are not using the TransferPredicateLocksToNewTarget function because
+  * it acquires its own locks on the partitions of the two targets involved,
+  * and we'll already be holding all partition locks.
+  *
+  * We can't throw an error from here, because the call could be from a
+  * transaction which is not serializable.
+  *
+  * NOTE: This is currently only called with transfer set to true, but that may
+  * change.	If we decide to clean up the locks from a table on commit of a
+  * transaction which executed DROP TABLE, the false condition will be useful.
+  */
+ static void
+ DropAllPredicateLocksFromTableImpl(const Relation relation, bool transfer)
+ {
+ 	HASH_SEQ_STATUS seqstat;
+ 	PREDICATELOCKTARGET *oldtarget;
+ 	PREDICATELOCKTARGET *heaptarget;
+ 	PREDICATELOCKTARGETTAG heaptargettag;
+ 	PREDICATELOCKTAG newpredlocktag;
+ 	Oid			dbId;
+ 	Oid			relId;
+ 	Oid			heapId;
+ 	int			i;
+ 	bool		isIndex;
+ 	bool		found;
+ 	uint32		reservedtargettaghash;
+ 	uint32		heaptargettaghash;
+ 
+ 	/*
+ 	 * Bail out quickly if there are no serializable transactions running.
+ 	 * It's safe to check this without taking locks because the caller is
+ 	 * holding an ACCESS EXCLUSIVE lock on the relation.  No new locks which
+ 	 * would matter here can be acquired while that is held.
+ 	 */
+ 	if (!TransactionIdIsValid(PredXact->SxactGlobalXmin))
+ 		return;
+ 
+ 	if (SkipPredicateLocksForRelation(relation))
+ 		return;
+ 
+ 	dbId = relation->rd_node.dbNode;
+ 	relId = relation->rd_id;
+ 	if (relation->rd_index == NULL)
+ 	{
+ 		isIndex = false;
+ 		heapId = relId;
+ 	}
+ 	else
+ 	{
+ 		isIndex = true;
+ 		heapId = relation->rd_index->indrelid;
+ 	}
+ 	Assert(heapId != InvalidOid);
+ 	Assert(transfer || !isIndex);		/* index OID only makes sense with
+ 										 * transfer */
+ 
+ 	SET_PREDICATELOCKTARGETTAG_RELATION(heaptargettag, dbId, heapId);
+ 	heaptargettaghash = PredicateLockTargetTagHashCode(&heaptargettag);
+ 	heaptarget = NULL;			/* Retrieve first time needed, then keep. */
+ 
+ 	LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE);
+ 	for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
+ 		LWLockAcquire(FirstPredicateLockMgrLock + i, LW_EXCLUSIVE);
+ 	LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
+ 
+ 	/*
+ 	 * Remove the reserved entry to give us scratch space, so we know we'll be
+ 	 * able to create the new lock target.
+ 	 */
+ 	reservedtargettaghash = 0;	/* quiet compiler warning */
+ 	if (transfer)
+ 	{
+ 		reservedtargettaghash = PredicateLockTargetTagHashCode(&ReservedTargetTag);
+ 		hash_search_with_hash_value(PredicateLockTargetHash,
+ 									&ReservedTargetTag,
+ 									reservedtargettaghash,
+ 									HASH_REMOVE, &found);
+ 		Assert(found);
+ 	}
+ 
+ 	/* Scan through target map */
+ 	hash_seq_init(&seqstat, PredicateLockTargetHash);
+ 
+ 	while ((oldtarget = (PREDICATELOCKTARGET *) hash_seq_search(&seqstat)))
+ 	{
+ 		PREDICATELOCK *oldpredlock;
+ 
+ 		/*
+ 		 * Check whether this is a target which needs attention.
+ 		 */
+ 		if (GET_PREDICATELOCKTARGETTAG_RELATION(oldtarget->tag) != relId)
+ 			continue;			/* wrong relation id */
+ 		if (GET_PREDICATELOCKTARGETTAG_DB(oldtarget->tag) != dbId)
+ 			continue;			/* wrong database id */
+ 		if (transfer && !isIndex
+ 			&& GET_PREDICATELOCKTARGETTAG_TYPE(oldtarget->tag) == PREDLOCKTAG_RELATION)
+ 			continue;			/* already the right lock */
+ 
+ 		/*
+ 		 * If we made it here, we have work to do.	We make sure the heap
+ 		 * relation lock exists, then we walk the list of predicate locks for
+ 		 * the old target we found, moving all locks to the heap relation lock
+ 		 * -- unless they already hold that.
+ 		 */
+ 
+ 		/*
+ 		 * First make sure we have the heap relation target.  We only need to
+ 		 * do this once.
+ 		 */
+ 		if (transfer && heaptarget == NULL)
+ 		{
+ 			heaptarget = hash_search_with_hash_value(PredicateLockTargetHash,
+ 													 &heaptargettag,
+ 													 heaptargettaghash,
+ 													 HASH_ENTER, &found);
+ 			Assert(heaptarget != NULL);
+ 			if (!found)
+ 				SHMQueueInit(&heaptarget->predicateLocks);
+ 			newpredlocktag.myTarget = heaptarget;
+ 		}
+ 
+ 		/*
+ 		 * Loop through moving locks from this target to the relation target.
+ 		 */
+ 		oldpredlock = (PREDICATELOCK *)
+ 			SHMQueueNext(&(oldtarget->predicateLocks),
+ 						 &(oldtarget->predicateLocks),
+ 						 offsetof(PREDICATELOCK, targetLink));
+ 		while (oldpredlock)
+ 		{
+ 			PREDICATELOCK *nextpredlock;
+ 			PREDICATELOCK *newpredlock;
+ 			SerCommitSeqNo oldCommitSeqNo = oldpredlock->commitSeqNo;
+ 
+ 			nextpredlock = (PREDICATELOCK *)
+ 				SHMQueueNext(&(oldtarget->predicateLocks),
+ 							 &(oldpredlock->targetLink),
+ 							 offsetof(PREDICATELOCK, targetLink));
+ 			newpredlocktag.myXact = oldpredlock->tag.myXact;
+ 
+ 			/*
+ 			 * It's OK to remove the old lock first because of the ACCESS
+ 			 * EXCLUSIVE lock on the heap relation when this is called.  It is
+ 			 * desirable to do so because it avoids any chance of running out
+ 			 * of lock structure entries for the table.
+ 			 */
+ 			SHMQueueDelete(&(oldpredlock->xactLink));
+ 			/* No need for retail delete from oldtarget list. */
+ 			hash_search(PredicateLockHash,
+ 						&oldpredlock->tag,
+ 						HASH_REMOVE, &found);
+ 			Assert(found);
+ 
+ 			if (transfer)
+ 			{
+ 				newpredlock = (PREDICATELOCK *)
+ 					hash_search_with_hash_value
+ 					(PredicateLockHash,
+ 					 &newpredlocktag,
+ 					 PredicateLockHashCodeFromTargetHashCode(&newpredlocktag,
+ 														  heaptargettaghash),
+ 					 HASH_ENTER_NULL, &found);
+ 				Assert(newpredlock != NULL);
+ 				if (!found)
+ 				{
+ 					SHMQueueInsertBefore(&(heaptarget->predicateLocks),
+ 										 &(newpredlock->targetLink));
+ 					SHMQueueInsertBefore(&(newpredlocktag.myXact->predicateLocks),
+ 										 &(newpredlock->xactLink));
+ 					newpredlock->commitSeqNo = oldCommitSeqNo;
+ 				}
+ 				else
+ 				{
+ 					if (newpredlock->commitSeqNo < oldCommitSeqNo)
+ 						newpredlock->commitSeqNo = oldCommitSeqNo;
+ 				}
+ 
+ 				Assert(newpredlock->commitSeqNo != 0);
+ 				Assert((newpredlock->commitSeqNo == InvalidSerCommitSeqNo)
+ 					   || (newpredlock->tag.myXact == OldCommittedSxact));
+ 			}
+ 
+ 			oldpredlock = nextpredlock;
+ 		}
+ 
+ 		hash_search(PredicateLockTargetHash, &oldtarget->tag, HASH_REMOVE, &found);
+ 		Assert(found);
+ 	}
+ 
+ 	if (transfer)
+ 	{
+ 		/* Put the reserved entry back */
+ 		hash_search_with_hash_value(PredicateLockTargetHash,
+ 									&ReservedTargetTag,
+ 									reservedtargettaghash,
+ 									HASH_ENTER, &found);
+ 		Assert(!found);
+ 	}
+ 
+ 	/* Release locks in reverse order */
+ 	LWLockRelease(SerializableXactHashLock);
+ 	for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
+ 		LWLockRelease(FirstPredicateLockMgrLock + i);
+ 	LWLockRelease(SerializablePredicateLockListLock);
+ }
+ 
+ /*
+  * TransferPredicateLocksToHeapRelation
+  *		For all transactions, transfer all predicate locks for the given
+  *		relation to a single relation lock on the heap.
+  */
+ void
+ TransferPredicateLocksToHeapRelation(const Relation relation)
+ {
+ 	DropAllPredicateLocksFromTableImpl(relation, true);
+ }
+ 
  
  /*
   *		PredicateLockPageSplit
***************
*** 2581,2587 **** PredicateLockPageSplit(const Relation relation, const BlockNumber oldblkno,
  	if (!TransactionIdIsValid(PredXact->SxactGlobalXmin))
  		return;
  
! 	if (SkipSplitTracking(relation))
  		return;
  
  	Assert(oldblkno != newblkno);
--- 2814,2820 ----
  	if (!TransactionIdIsValid(PredXact->SxactGlobalXmin))
  		return;
  
! 	if (SkipPredicateLocksForRelation(relation))
  		return;
  
  	Assert(oldblkno != newblkno);
***************
*** 3792,3797 **** CheckForSerializableConflictIn(const Relation relation, const HeapTuple tuple,
--- 4025,4129 ----
  }
  
  /*
+  * CheckTableForSerializableConflictIn
+  *		The entire table is going through a DDL-style logical mass delete
+  *		(like TRUNCATE TABLE or DROP TABLE).  While these operations do not
+  *		operate entirely within the bounds of snapshot isolation, they can
+  *		occur inside of a serialziable transaction, and will logically occur
+  *		after any reads which saw rows which were destroyed by these
+  *		operations, so we do what we can to serialize properly under SSI.
+  *
+  * The relation passed in must be a heap relation for a table. Any predicate
+  * lock of any granularity on the heap will cause a rw-conflict in to this
+  * transaction.  Predicate locks on indexes do not matter because they only
+  * exist to guard against conflicting inserts into the index, and this is a
+  * mass *delete*.
+  *
+  * This should be done before altering the predicate locks because the
+  * transaction could be rolled back because of a conflict, in which case the
+  * lock changes are not needed.
+  */
+ void
+ CheckTableForSerializableConflictIn(const Relation relation)
+ {
+ 	HASH_SEQ_STATUS seqstat;
+ 	PREDICATELOCKTARGET *target;
+ 	Oid			dbId;
+ 	Oid			heapId;
+ 	int			i;
+ 
+ 	/*
+ 	 * Bail out quickly if there are no serializable transactions running.
+ 	 * It's safe to check this without taking locks because the caller is
+ 	 * holding an ACCESS EXCLUSIVE lock on the relation.  No new locks which
+ 	 * would matter here can be acquired while that is held.
+ 	 */
+ 	if (!TransactionIdIsValid(PredXact->SxactGlobalXmin))
+ 		return;
+ 
+ 	if (SkipSerialization(relation))
+ 		return;
+ 
+ 	Assert(relation->rd_index == NULL); /* not an index relation */
+ 
+ 	dbId = relation->rd_node.dbNode;
+ 	heapId = relation->rd_id;
+ 
+ 	LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE);
+ 	for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
+ 		LWLockAcquire(FirstPredicateLockMgrLock + i, LW_SHARED);
+ 	LWLockAcquire(SerializableXactHashLock, LW_SHARED);
+ 
+ 	/* Scan through target list */
+ 	hash_seq_init(&seqstat, PredicateLockTargetHash);
+ 
+ 	while ((target = (PREDICATELOCKTARGET *) hash_seq_search(&seqstat)))
+ 	{
+ 		PREDICATELOCK *predlock;
+ 
+ 		/*
+ 		 * Check whether this is a target which needs attention.
+ 		 */
+ 		if (GET_PREDICATELOCKTARGETTAG_RELATION(target->tag) != heapId)
+ 			continue;			/* wrong relation id */
+ 		if (GET_PREDICATELOCKTARGETTAG_DB(target->tag) != dbId)
+ 			continue;			/* wrong database id */
+ 
+ 		/*
+ 		 * Loop through locks for this target and flag conflicts.
+ 		 */
+ 		predlock = (PREDICATELOCK *)
+ 			SHMQueueNext(&(target->predicateLocks),
+ 						 &(target->predicateLocks),
+ 						 offsetof(PREDICATELOCK, targetLink));
+ 		while (predlock)
+ 		{
+ 			PREDICATELOCK *nextpredlock;
+ 
+ 			nextpredlock = (PREDICATELOCK *)
+ 				SHMQueueNext(&(target->predicateLocks),
+ 							 &(predlock->targetLink),
+ 							 offsetof(PREDICATELOCK, targetLink));
+ 
+ 			if (predlock->tag.myXact != MySerializableXact
+ 				&& !RWConflictExists(predlock->tag.myXact,
+ 									 (SERIALIZABLEXACT *) MySerializableXact))
+ 				FlagRWConflict(predlock->tag.myXact,
+ 							   (SERIALIZABLEXACT *) MySerializableXact);
+ 
+ 			predlock = nextpredlock;
+ 		}
+ 	}
+ 
+ 	/* Release locks in reverse order */
+ 	LWLockRelease(SerializableXactHashLock);
+ 	for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
+ 		LWLockRelease(FirstPredicateLockMgrLock + i);
+ 	LWLockRelease(SerializablePredicateLockListLock);
+ }
+ 
+ 
+ /*
   * Flag a rw-dependency between two serializable transactions.
   *
   * The caller is responsible for ensuring that we have a LW lock on
*** a/src/include/storage/predicate.h
--- b/src/include/storage/predicate.h
***************
*** 49,59 **** extern void PredicateLockPage(const Relation relation, const BlockNumber blkno);
--- 49,61 ----
  extern void PredicateLockTuple(const Relation relation, const HeapTuple tuple);
  extern void PredicateLockPageSplit(const Relation relation, const BlockNumber oldblkno, const BlockNumber newblkno);
  extern void PredicateLockPageCombine(const Relation relation, const BlockNumber oldblkno, const BlockNumber newblkno);
+ extern void TransferPredicateLocksToHeapRelation(const Relation relation);
  extern void ReleasePredicateLocks(const bool isCommit);
  
  /* conflict detection (may also trigger rollback) */
  extern void CheckForSerializableConflictOut(const bool valid, const Relation relation, const HeapTuple tuple, const Buffer buffer);
  extern void CheckForSerializableConflictIn(const Relation relation, const HeapTuple tuple, const Buffer buffer);
+ extern void CheckTableForSerializableConflictIn(const Relation relation);
  
  /* final rollback checking */
  extern void PreCommit_CheckForSerializationFailure(void);